summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/X86/GlobalISel/add-scalar.ll14
-rw-r--r--llvm/test/CodeGen/X86/O0-pipeline.ll1
-rw-r--r--llvm/test/CodeGen/X86/O3-pipeline.ll1
-rw-r--r--llvm/test/CodeGen/X86/clobber-fi0.ll37
-rw-r--r--llvm/test/CodeGen/X86/cmpxchg-clobber-flags.ll372
-rw-r--r--llvm/test/CodeGen/X86/copy-eflags.ll108
-rw-r--r--llvm/test/CodeGen/X86/eflags-copy-expansion.mir64
-rw-r--r--llvm/test/CodeGen/X86/flags-copy-lowering.mir485
-rw-r--r--llvm/test/CodeGen/X86/mul-i1024.ll6091
-rw-r--r--llvm/test/CodeGen/X86/peephole-na-phys-copy-folding.ll109
-rw-r--r--llvm/test/CodeGen/X86/win64_frame.ll88
-rw-r--r--llvm/test/CodeGen/X86/x86-repmov-copy-eflags.ll11
12 files changed, 3690 insertions, 3691 deletions
diff --git a/llvm/test/CodeGen/X86/GlobalISel/add-scalar.ll b/llvm/test/CodeGen/X86/GlobalISel/add-scalar.ll
index 606a2d779e9..e8098dd98ee 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/add-scalar.ll
+++ b/llvm/test/CodeGen/X86/GlobalISel/add-scalar.ll
@@ -10,16 +10,10 @@ define i64 @test_add_i64(i64 %arg1, i64 %arg2) {
;
; X32-LABEL: test_add_i64:
; X32: # %bb.0:
-; X32-NEXT: pushl %ebp
-; X32-NEXT: .cfi_def_cfa_offset 8
-; X32-NEXT: .cfi_offset %ebp, -8
-; X32-NEXT: movl %esp, %ebp
-; X32-NEXT: .cfi_def_cfa_register %ebp
-; X32-NEXT: movl 16(%ebp), %eax
-; X32-NEXT: movl 20(%ebp), %edx
-; X32-NEXT: addl 8(%ebp), %eax
-; X32-NEXT: adcl 12(%ebp), %edx
-; X32-NEXT: popl %ebp
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx
; X32-NEXT: retl
%ret = add i64 %arg1, %arg2
ret i64 %ret
diff --git a/llvm/test/CodeGen/X86/O0-pipeline.ll b/llvm/test/CodeGen/X86/O0-pipeline.ll
index 6dab866a751..b79fdef2ff1 100644
--- a/llvm/test/CodeGen/X86/O0-pipeline.ll
+++ b/llvm/test/CodeGen/X86/O0-pipeline.ll
@@ -37,6 +37,7 @@
; CHECK-NEXT: X86 PIC Global Base Reg Initialization
; CHECK-NEXT: Expand ISel Pseudo-instructions
; CHECK-NEXT: Local Stack Slot Allocation
+; CHECK-NEXT: X86 EFLAGS copy lowering
; CHECK-NEXT: X86 WinAlloca Expander
; CHECK-NEXT: Eliminate PHI nodes for register allocation
; CHECK-NEXT: Two-Address instruction pass
diff --git a/llvm/test/CodeGen/X86/O3-pipeline.ll b/llvm/test/CodeGen/X86/O3-pipeline.ll
index 9c69628091b..01deba7eed1 100644
--- a/llvm/test/CodeGen/X86/O3-pipeline.ll
+++ b/llvm/test/CodeGen/X86/O3-pipeline.ll
@@ -90,6 +90,7 @@
; CHECK-NEXT: X86 LEA Optimize
; CHECK-NEXT: X86 Optimize Call Frame
; CHECK-NEXT: X86 Avoid Store Forwarding Block
+; CHECK-NEXT: X86 EFLAGS copy lowering
; CHECK-NEXT: X86 WinAlloca Expander
; CHECK-NEXT: Detect Dead Lanes
; CHECK-NEXT: Process Implicit Definitions
diff --git a/llvm/test/CodeGen/X86/clobber-fi0.ll b/llvm/test/CodeGen/X86/clobber-fi0.ll
deleted file mode 100644
index b69b1853160..00000000000
--- a/llvm/test/CodeGen/X86/clobber-fi0.ll
+++ /dev/null
@@ -1,37 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mcpu=generic -mtriple=x86_64-linux | FileCheck %s
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-apple-macosx10.7.0"
-
-; In the code below we need to copy the EFLAGS because of scheduling constraints.
-; When copying the EFLAGS we need to write to the stack with push/pop. This forces
-; us to emit the prolog.
-
-; CHECK: main
-; CHECK: subq{{.*}}rsp
-; CHECK: ret
-define i32 @main(i32 %arg, i8** %arg1) nounwind {
-bb:
- %tmp = alloca i32, align 4 ; [#uses=3 type=i32*]
- %tmp2 = alloca i32, align 4 ; [#uses=3 type=i32*]
- %tmp3 = alloca i32 ; [#uses=1 type=i32*]
- store volatile i32 1, i32* %tmp, align 4
- store volatile i32 1, i32* %tmp2, align 4
- br label %bb4
-
-bb4: ; preds = %bb4, %bb
- %tmp6 = load volatile i32, i32* %tmp2, align 4 ; [#uses=1 type=i32]
- %tmp7 = add i32 %tmp6, -1 ; [#uses=2 type=i32]
- store volatile i32 %tmp7, i32* %tmp2, align 4
- %tmp8 = icmp eq i32 %tmp7, 0 ; [#uses=1 type=i1]
- %tmp9 = load volatile i32, i32* %tmp ; [#uses=1 type=i32]
- %tmp10 = add i32 %tmp9, -1 ; [#uses=1 type=i32]
- store volatile i32 %tmp10, i32* %tmp3
- br i1 %tmp8, label %bb11, label %bb4
-
-bb11: ; preds = %bb4
- %tmp12 = load volatile i32, i32* %tmp, align 4 ; [#uses=1 type=i32]
- ret i32 %tmp12
-}
-
-
diff --git a/llvm/test/CodeGen/X86/cmpxchg-clobber-flags.ll b/llvm/test/CodeGen/X86/cmpxchg-clobber-flags.ll
index 50352e2abe9..2060ac65446 100644
--- a/llvm/test/CodeGen/X86/cmpxchg-clobber-flags.ll
+++ b/llvm/test/CodeGen/X86/cmpxchg-clobber-flags.ll
@@ -1,15 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=i386-linux-gnu %s -o - | FileCheck %s --check-prefixes=32-ALL,32-GOOD-RA
-; RUN: llc -mtriple=i386-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=32-ALL,32-FAST-RA
+; RUN: llc -mtriple=i386-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=32-ALL,32-GOOD-RA
+; RUN: llc -mtriple=i386-linux-gnu -verify-machineinstrs -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=32-ALL,32-FAST-RA
-; RUN: llc -mtriple=x86_64-linux-gnu %s -o - | FileCheck %s --check-prefixes=64-ALL,64-GOOD-RA
-; RUN: llc -mtriple=x86_64-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=64-ALL,64-FAST-RA
-; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+sahf %s -o - | FileCheck %s --check-prefixes=64-ALL,64-GOOD-RA-SAHF
-; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+sahf -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=64-ALL,64-FAST-RA-SAHF
-; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=corei7 %s -o - | FileCheck %s --check-prefixes=64-ALL,64-GOOD-RA-SAHF
-
-; TODO: Reenable verify-machineinstr once the if (!AXDead) // FIXME
-; in X86InstrInfo::copyPhysReg() is resolved.
+; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=64-ALL,64-GOOD-RA
+; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=64-ALL,64-FAST-RA
+; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mattr=+sahf %s -o - | FileCheck %s --check-prefixes=64-ALL,64-GOOD-RA-SAHF
+; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mattr=+sahf -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=64-ALL,64-FAST-RA-SAHF
+; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mcpu=corei7 %s -o - | FileCheck %s --check-prefixes=64-ALL,64-GOOD-RA-SAHF
declare i32 @foo()
declare i32 @bar(i64)
@@ -22,37 +19,29 @@ declare i32 @bar(i64)
; ...
; use of eax
; During PEI the adjcallstackdown32 is replaced with the subl which
-; clobbers eflags, effectively interfering in the liveness interval.
-; Is this a case we care about? Maybe no, considering this issue
-; happens with the fast pre-regalloc scheduler enforced. A more
-; performant scheduler would move the adjcallstackdown32 out of the
-; eflags liveness interval.
+; clobbers eflags, effectively interfering in the liveness interval. However,
+; we then promote these copies into independent conditions in GPRs that avoids
+; repeated saving and restoring logic and can be trivially managed by the
+; register allocator.
define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) nounwind {
; 32-GOOD-RA-LABEL: test_intervening_call:
; 32-GOOD-RA: # %bb.0: # %entry
-; 32-GOOD-RA-NEXT: pushl %ebp
-; 32-GOOD-RA-NEXT: movl %esp, %ebp
; 32-GOOD-RA-NEXT: pushl %ebx
; 32-GOOD-RA-NEXT: pushl %esi
-; 32-GOOD-RA-NEXT: movl 12(%ebp), %eax
-; 32-GOOD-RA-NEXT: movl 16(%ebp), %edx
-; 32-GOOD-RA-NEXT: movl 20(%ebp), %ebx
-; 32-GOOD-RA-NEXT: movl 24(%ebp), %ecx
-; 32-GOOD-RA-NEXT: movl 8(%ebp), %esi
-; 32-GOOD-RA-NEXT: lock cmpxchg8b (%esi)
; 32-GOOD-RA-NEXT: pushl %eax
-; 32-GOOD-RA-NEXT: seto %al
-; 32-GOOD-RA-NEXT: lahf
-; 32-GOOD-RA-NEXT: movl %eax, %esi
-; 32-GOOD-RA-NEXT: popl %eax
+; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %eax
+; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %edx
+; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %esi
+; 32-GOOD-RA-NEXT: lock cmpxchg8b (%esi)
+; 32-GOOD-RA-NEXT: setne %bl
; 32-GOOD-RA-NEXT: subl $8, %esp
; 32-GOOD-RA-NEXT: pushl %edx
; 32-GOOD-RA-NEXT: pushl %eax
; 32-GOOD-RA-NEXT: calll bar
; 32-GOOD-RA-NEXT: addl $16, %esp
-; 32-GOOD-RA-NEXT: movl %esi, %eax
-; 32-GOOD-RA-NEXT: addb $127, %al
-; 32-GOOD-RA-NEXT: sahf
+; 32-GOOD-RA-NEXT: testb $-1, %bl
; 32-GOOD-RA-NEXT: jne .LBB0_3
; 32-GOOD-RA-NEXT: # %bb.1: # %t
; 32-GOOD-RA-NEXT: movl $42, %eax
@@ -61,46 +50,29 @@ define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) nounwind {
; 32-GOOD-RA-NEXT: xorl %eax, %eax
; 32-GOOD-RA-NEXT: .LBB0_2: # %t
; 32-GOOD-RA-NEXT: xorl %edx, %edx
+; 32-GOOD-RA-NEXT: addl $4, %esp
; 32-GOOD-RA-NEXT: popl %esi
; 32-GOOD-RA-NEXT: popl %ebx
-; 32-GOOD-RA-NEXT: popl %ebp
; 32-GOOD-RA-NEXT: retl
;
; 32-FAST-RA-LABEL: test_intervening_call:
; 32-FAST-RA: # %bb.0: # %entry
-; 32-FAST-RA-NEXT: pushl %ebp
-; 32-FAST-RA-NEXT: movl %esp, %ebp
; 32-FAST-RA-NEXT: pushl %ebx
; 32-FAST-RA-NEXT: pushl %esi
-; 32-FAST-RA-NEXT: movl 8(%ebp), %esi
-; 32-FAST-RA-NEXT: movl 20(%ebp), %ebx
-; 32-FAST-RA-NEXT: movl 24(%ebp), %ecx
-; 32-FAST-RA-NEXT: movl 12(%ebp), %eax
-; 32-FAST-RA-NEXT: movl 16(%ebp), %edx
-; 32-FAST-RA-NEXT: lock cmpxchg8b (%esi)
; 32-FAST-RA-NEXT: pushl %eax
-; 32-FAST-RA-NEXT: seto %al
-; 32-FAST-RA-NEXT: lahf
-; 32-FAST-RA-NEXT: movl %eax, %ecx
-; 32-FAST-RA-NEXT: popl %eax
+; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %esi
+; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %eax
+; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %edx
+; 32-FAST-RA-NEXT: lock cmpxchg8b (%esi)
+; 32-FAST-RA-NEXT: setne %bl
; 32-FAST-RA-NEXT: subl $8, %esp
-; 32-FAST-RA-NEXT: pushl %eax
-; 32-FAST-RA-NEXT: movl %ecx, %eax
-; 32-FAST-RA-NEXT: addb $127, %al
-; 32-FAST-RA-NEXT: sahf
-; 32-FAST-RA-NEXT: popl %eax
-; 32-FAST-RA-NEXT: pushl %eax
-; 32-FAST-RA-NEXT: seto %al
-; 32-FAST-RA-NEXT: lahf
-; 32-FAST-RA-NEXT: movl %eax, %esi
-; 32-FAST-RA-NEXT: popl %eax
; 32-FAST-RA-NEXT: pushl %edx
; 32-FAST-RA-NEXT: pushl %eax
; 32-FAST-RA-NEXT: calll bar
; 32-FAST-RA-NEXT: addl $16, %esp
-; 32-FAST-RA-NEXT: movl %esi, %eax
-; 32-FAST-RA-NEXT: addb $127, %al
-; 32-FAST-RA-NEXT: sahf
+; 32-FAST-RA-NEXT: testb $-1, %bl
; 32-FAST-RA-NEXT: jne .LBB0_3
; 32-FAST-RA-NEXT: # %bb.1: # %t
; 32-FAST-RA-NEXT: movl $42, %eax
@@ -109,122 +81,29 @@ define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) nounwind {
; 32-FAST-RA-NEXT: xorl %eax, %eax
; 32-FAST-RA-NEXT: .LBB0_2: # %t
; 32-FAST-RA-NEXT: xorl %edx, %edx
+; 32-FAST-RA-NEXT: addl $4, %esp
; 32-FAST-RA-NEXT: popl %esi
; 32-FAST-RA-NEXT: popl %ebx
-; 32-FAST-RA-NEXT: popl %ebp
; 32-FAST-RA-NEXT: retl
;
-; 64-GOOD-RA-LABEL: test_intervening_call:
-; 64-GOOD-RA: # %bb.0: # %entry
-; 64-GOOD-RA-NEXT: pushq %rbp
-; 64-GOOD-RA-NEXT: movq %rsp, %rbp
-; 64-GOOD-RA-NEXT: pushq %rbx
-; 64-GOOD-RA-NEXT: pushq %rax
-; 64-GOOD-RA-NEXT: movq %rsi, %rax
-; 64-GOOD-RA-NEXT: lock cmpxchgq %rdx, (%rdi)
-; 64-GOOD-RA-NEXT: pushfq
-; 64-GOOD-RA-NEXT: popq %rbx
-; 64-GOOD-RA-NEXT: movq %rax, %rdi
-; 64-GOOD-RA-NEXT: callq bar
-; 64-GOOD-RA-NEXT: pushq %rbx
-; 64-GOOD-RA-NEXT: popfq
-; 64-GOOD-RA-NEXT: jne .LBB0_3
-; 64-GOOD-RA-NEXT: # %bb.1: # %t
-; 64-GOOD-RA-NEXT: movl $42, %eax
-; 64-GOOD-RA-NEXT: jmp .LBB0_2
-; 64-GOOD-RA-NEXT: .LBB0_3: # %f
-; 64-GOOD-RA-NEXT: xorl %eax, %eax
-; 64-GOOD-RA-NEXT: .LBB0_2: # %t
-; 64-GOOD-RA-NEXT: addq $8, %rsp
-; 64-GOOD-RA-NEXT: popq %rbx
-; 64-GOOD-RA-NEXT: popq %rbp
-; 64-GOOD-RA-NEXT: retq
-;
-; 64-FAST-RA-LABEL: test_intervening_call:
-; 64-FAST-RA: # %bb.0: # %entry
-; 64-FAST-RA-NEXT: pushq %rbp
-; 64-FAST-RA-NEXT: movq %rsp, %rbp
-; 64-FAST-RA-NEXT: pushq %rbx
-; 64-FAST-RA-NEXT: pushq %rax
-; 64-FAST-RA-NEXT: movq %rsi, %rax
-; 64-FAST-RA-NEXT: lock cmpxchgq %rdx, (%rdi)
-; 64-FAST-RA-NEXT: pushfq
-; 64-FAST-RA-NEXT: popq %rbx
-; 64-FAST-RA-NEXT: movq %rax, %rdi
-; 64-FAST-RA-NEXT: callq bar
-; 64-FAST-RA-NEXT: pushq %rbx
-; 64-FAST-RA-NEXT: popfq
-; 64-FAST-RA-NEXT: jne .LBB0_3
-; 64-FAST-RA-NEXT: # %bb.1: # %t
-; 64-FAST-RA-NEXT: movl $42, %eax
-; 64-FAST-RA-NEXT: jmp .LBB0_2
-; 64-FAST-RA-NEXT: .LBB0_3: # %f
-; 64-FAST-RA-NEXT: xorl %eax, %eax
-; 64-FAST-RA-NEXT: .LBB0_2: # %t
-; 64-FAST-RA-NEXT: addq $8, %rsp
-; 64-FAST-RA-NEXT: popq %rbx
-; 64-FAST-RA-NEXT: popq %rbp
-; 64-FAST-RA-NEXT: retq
-;
-; 64-GOOD-RA-SAHF-LABEL: test_intervening_call:
-; 64-GOOD-RA-SAHF: # %bb.0: # %entry
-; 64-GOOD-RA-SAHF-NEXT: pushq %rbp
-; 64-GOOD-RA-SAHF-NEXT: movq %rsp, %rbp
-; 64-GOOD-RA-SAHF-NEXT: pushq %rbx
-; 64-GOOD-RA-SAHF-NEXT: pushq %rax
-; 64-GOOD-RA-SAHF-NEXT: movq %rsi, %rax
-; 64-GOOD-RA-SAHF-NEXT: lock cmpxchgq %rdx, (%rdi)
-; 64-GOOD-RA-SAHF-NEXT: pushq %rax
-; 64-GOOD-RA-SAHF-NEXT: seto %al
-; 64-GOOD-RA-SAHF-NEXT: lahf
-; 64-GOOD-RA-SAHF-NEXT: movq %rax, %rbx
-; 64-GOOD-RA-SAHF-NEXT: popq %rax
-; 64-GOOD-RA-SAHF-NEXT: movq %rax, %rdi
-; 64-GOOD-RA-SAHF-NEXT: callq bar
-; 64-GOOD-RA-SAHF-NEXT: movq %rbx, %rax
-; 64-GOOD-RA-SAHF-NEXT: addb $127, %al
-; 64-GOOD-RA-SAHF-NEXT: sahf
-; 64-GOOD-RA-SAHF-NEXT: jne .LBB0_3
-; 64-GOOD-RA-SAHF-NEXT: # %bb.1: # %t
-; 64-GOOD-RA-SAHF-NEXT: movl $42, %eax
-; 64-GOOD-RA-SAHF-NEXT: jmp .LBB0_2
-; 64-GOOD-RA-SAHF-NEXT: .LBB0_3: # %f
-; 64-GOOD-RA-SAHF-NEXT: xorl %eax, %eax
-; 64-GOOD-RA-SAHF-NEXT: .LBB0_2: # %t
-; 64-GOOD-RA-SAHF-NEXT: addq $8, %rsp
-; 64-GOOD-RA-SAHF-NEXT: popq %rbx
-; 64-GOOD-RA-SAHF-NEXT: popq %rbp
-; 64-GOOD-RA-SAHF-NEXT: retq
-;
-; 64-FAST-RA-SAHF-LABEL: test_intervening_call:
-; 64-FAST-RA-SAHF: # %bb.0: # %entry
-; 64-FAST-RA-SAHF-NEXT: pushq %rbp
-; 64-FAST-RA-SAHF-NEXT: movq %rsp, %rbp
-; 64-FAST-RA-SAHF-NEXT: pushq %rbx
-; 64-FAST-RA-SAHF-NEXT: pushq %rax
-; 64-FAST-RA-SAHF-NEXT: movq %rsi, %rax
-; 64-FAST-RA-SAHF-NEXT: lock cmpxchgq %rdx, (%rdi)
-; 64-FAST-RA-SAHF-NEXT: pushq %rax
-; 64-FAST-RA-SAHF-NEXT: seto %al
-; 64-FAST-RA-SAHF-NEXT: lahf
-; 64-FAST-RA-SAHF-NEXT: movq %rax, %rbx
-; 64-FAST-RA-SAHF-NEXT: popq %rax
-; 64-FAST-RA-SAHF-NEXT: movq %rax, %rdi
-; 64-FAST-RA-SAHF-NEXT: callq bar
-; 64-FAST-RA-SAHF-NEXT: movq %rbx, %rax
-; 64-FAST-RA-SAHF-NEXT: addb $127, %al
-; 64-FAST-RA-SAHF-NEXT: sahf
-; 64-FAST-RA-SAHF-NEXT: jne .LBB0_3
-; 64-FAST-RA-SAHF-NEXT: # %bb.1: # %t
-; 64-FAST-RA-SAHF-NEXT: movl $42, %eax
-; 64-FAST-RA-SAHF-NEXT: jmp .LBB0_2
-; 64-FAST-RA-SAHF-NEXT: .LBB0_3: # %f
-; 64-FAST-RA-SAHF-NEXT: xorl %eax, %eax
-; 64-FAST-RA-SAHF-NEXT: .LBB0_2: # %t
-; 64-FAST-RA-SAHF-NEXT: addq $8, %rsp
-; 64-FAST-RA-SAHF-NEXT: popq %rbx
-; 64-FAST-RA-SAHF-NEXT: popq %rbp
-; 64-FAST-RA-SAHF-NEXT: retq
+; 64-ALL-LABEL: test_intervening_call:
+; 64-ALL: # %bb.0: # %entry
+; 64-ALL-NEXT: pushq %rbx
+; 64-ALL-NEXT: movq %rsi, %rax
+; 64-ALL-NEXT: lock cmpxchgq %rdx, (%rdi)
+; 64-ALL-NEXT: setne %bl
+; 64-ALL-NEXT: movq %rax, %rdi
+; 64-ALL-NEXT: callq bar
+; 64-ALL-NEXT: testb $-1, %bl
+; 64-ALL-NEXT: jne .LBB0_2
+; 64-ALL-NEXT: # %bb.1: # %t
+; 64-ALL-NEXT: movl $42, %eax
+; 64-ALL-NEXT: popq %rbx
+; 64-ALL-NEXT: retq
+; 64-ALL-NEXT: .LBB0_2: # %f
+; 64-ALL-NEXT: xorl %eax, %eax
+; 64-ALL-NEXT: popq %rbx
+; 64-ALL-NEXT: retq
entry:
%cx = cmpxchg i64* %foo, i64 %bar, i64 %baz seq_cst seq_cst
%v = extractvalue { i64, i1 } %cx, 0
@@ -331,149 +210,64 @@ cond.end:
define i32 @test_feed_cmov(i32* %addr, i32 %desired, i32 %new) nounwind {
; 32-GOOD-RA-LABEL: test_feed_cmov:
; 32-GOOD-RA: # %bb.0: # %entry
-; 32-GOOD-RA-NEXT: pushl %ebp
-; 32-GOOD-RA-NEXT: movl %esp, %ebp
-; 32-GOOD-RA-NEXT: pushl %edi
+; 32-GOOD-RA-NEXT: pushl %ebx
; 32-GOOD-RA-NEXT: pushl %esi
-; 32-GOOD-RA-NEXT: movl 12(%ebp), %eax
-; 32-GOOD-RA-NEXT: movl 16(%ebp), %esi
-; 32-GOOD-RA-NEXT: movl 8(%ebp), %ecx
+; 32-GOOD-RA-NEXT: pushl %eax
+; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %eax
+; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %esi
+; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx
; 32-GOOD-RA-NEXT: lock cmpxchgl %esi, (%ecx)
-; 32-GOOD-RA-NEXT: seto %al
-; 32-GOOD-RA-NEXT: lahf
-; 32-GOOD-RA-NEXT: movl %eax, %edi
+; 32-GOOD-RA-NEXT: sete %bl
; 32-GOOD-RA-NEXT: calll foo
-; 32-GOOD-RA-NEXT: pushl %eax
-; 32-GOOD-RA-NEXT: movl %edi, %eax
-; 32-GOOD-RA-NEXT: addb $127, %al
-; 32-GOOD-RA-NEXT: sahf
-; 32-GOOD-RA-NEXT: popl %eax
-; 32-GOOD-RA-NEXT: je .LBB2_2
+; 32-GOOD-RA-NEXT: testb $-1, %bl
+; 32-GOOD-RA-NEXT: jne .LBB2_2
; 32-GOOD-RA-NEXT: # %bb.1: # %entry
; 32-GOOD-RA-NEXT: movl %eax, %esi
; 32-GOOD-RA-NEXT: .LBB2_2: # %entry
; 32-GOOD-RA-NEXT: movl %esi, %eax
+; 32-GOOD-RA-NEXT: addl $4, %esp
; 32-GOOD-RA-NEXT: popl %esi
-; 32-GOOD-RA-NEXT: popl %edi
-; 32-GOOD-RA-NEXT: popl %ebp
+; 32-GOOD-RA-NEXT: popl %ebx
; 32-GOOD-RA-NEXT: retl
;
; 32-FAST-RA-LABEL: test_feed_cmov:
; 32-FAST-RA: # %bb.0: # %entry
-; 32-FAST-RA-NEXT: pushl %ebp
-; 32-FAST-RA-NEXT: movl %esp, %ebp
-; 32-FAST-RA-NEXT: pushl %edi
+; 32-FAST-RA-NEXT: pushl %ebx
; 32-FAST-RA-NEXT: pushl %esi
-; 32-FAST-RA-NEXT: movl 8(%ebp), %ecx
-; 32-FAST-RA-NEXT: movl 16(%ebp), %esi
-; 32-FAST-RA-NEXT: movl 12(%ebp), %eax
+; 32-FAST-RA-NEXT: pushl %eax
+; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %esi
+; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %eax
; 32-FAST-RA-NEXT: lock cmpxchgl %esi, (%ecx)
-; 32-FAST-RA-NEXT: seto %al
-; 32-FAST-RA-NEXT: lahf
-; 32-FAST-RA-NEXT: movl %eax, %edi
+; 32-FAST-RA-NEXT: sete %bl
; 32-FAST-RA-NEXT: calll foo
-; 32-FAST-RA-NEXT: pushl %eax
-; 32-FAST-RA-NEXT: movl %edi, %eax
-; 32-FAST-RA-NEXT: addb $127, %al
-; 32-FAST-RA-NEXT: sahf
-; 32-FAST-RA-NEXT: popl %eax
-; 32-FAST-RA-NEXT: je .LBB2_2
+; 32-FAST-RA-NEXT: testb $-1, %bl
+; 32-FAST-RA-NEXT: jne .LBB2_2
; 32-FAST-RA-NEXT: # %bb.1: # %entry
; 32-FAST-RA-NEXT: movl %eax, %esi
; 32-FAST-RA-NEXT: .LBB2_2: # %entry
; 32-FAST-RA-NEXT: movl %esi, %eax
+; 32-FAST-RA-NEXT: addl $4, %esp
; 32-FAST-RA-NEXT: popl %esi
-; 32-FAST-RA-NEXT: popl %edi
-; 32-FAST-RA-NEXT: popl %ebp
+; 32-FAST-RA-NEXT: popl %ebx
; 32-FAST-RA-NEXT: retl
;
-; 64-GOOD-RA-LABEL: test_feed_cmov:
-; 64-GOOD-RA: # %bb.0: # %entry
-; 64-GOOD-RA-NEXT: pushq %rbp
-; 64-GOOD-RA-NEXT: movq %rsp, %rbp
-; 64-GOOD-RA-NEXT: pushq %r14
-; 64-GOOD-RA-NEXT: pushq %rbx
-; 64-GOOD-RA-NEXT: movl %edx, %ebx
-; 64-GOOD-RA-NEXT: movl %esi, %eax
-; 64-GOOD-RA-NEXT: lock cmpxchgl %edx, (%rdi)
-; 64-GOOD-RA-NEXT: pushfq
-; 64-GOOD-RA-NEXT: popq %r14
-; 64-GOOD-RA-NEXT: callq foo
-; 64-GOOD-RA-NEXT: pushq %r14
-; 64-GOOD-RA-NEXT: popfq
-; 64-GOOD-RA-NEXT: cmovel %ebx, %eax
-; 64-GOOD-RA-NEXT: popq %rbx
-; 64-GOOD-RA-NEXT: popq %r14
-; 64-GOOD-RA-NEXT: popq %rbp
-; 64-GOOD-RA-NEXT: retq
-;
-; 64-FAST-RA-LABEL: test_feed_cmov:
-; 64-FAST-RA: # %bb.0: # %entry
-; 64-FAST-RA-NEXT: pushq %rbp
-; 64-FAST-RA-NEXT: movq %rsp, %rbp
-; 64-FAST-RA-NEXT: pushq %r14
-; 64-FAST-RA-NEXT: pushq %rbx
-; 64-FAST-RA-NEXT: movl %edx, %ebx
-; 64-FAST-RA-NEXT: movl %esi, %eax
-; 64-FAST-RA-NEXT: lock cmpxchgl %edx, (%rdi)
-; 64-FAST-RA-NEXT: pushfq
-; 64-FAST-RA-NEXT: popq %r14
-; 64-FAST-RA-NEXT: callq foo
-; 64-FAST-RA-NEXT: pushq %r14
-; 64-FAST-RA-NEXT: popfq
-; 64-FAST-RA-NEXT: cmovel %ebx, %eax
-; 64-FAST-RA-NEXT: popq %rbx
-; 64-FAST-RA-NEXT: popq %r14
-; 64-FAST-RA-NEXT: popq %rbp
-; 64-FAST-RA-NEXT: retq
-;
-; 64-GOOD-RA-SAHF-LABEL: test_feed_cmov:
-; 64-GOOD-RA-SAHF: # %bb.0: # %entry
-; 64-GOOD-RA-SAHF-NEXT: pushq %rbp
-; 64-GOOD-RA-SAHF-NEXT: movq %rsp, %rbp
-; 64-GOOD-RA-SAHF-NEXT: pushq %r14
-; 64-GOOD-RA-SAHF-NEXT: pushq %rbx
-; 64-GOOD-RA-SAHF-NEXT: movl %edx, %ebx
-; 64-GOOD-RA-SAHF-NEXT: movl %esi, %eax
-; 64-GOOD-RA-SAHF-NEXT: lock cmpxchgl %edx, (%rdi)
-; 64-GOOD-RA-SAHF-NEXT: seto %al
-; 64-GOOD-RA-SAHF-NEXT: lahf
-; 64-GOOD-RA-SAHF-NEXT: movq %rax, %r14
-; 64-GOOD-RA-SAHF-NEXT: callq foo
-; 64-GOOD-RA-SAHF-NEXT: pushq %rax
-; 64-GOOD-RA-SAHF-NEXT: movq %r14, %rax
-; 64-GOOD-RA-SAHF-NEXT: addb $127, %al
-; 64-GOOD-RA-SAHF-NEXT: sahf
-; 64-GOOD-RA-SAHF-NEXT: popq %rax
-; 64-GOOD-RA-SAHF-NEXT: cmovel %ebx, %eax
-; 64-GOOD-RA-SAHF-NEXT: popq %rbx
-; 64-GOOD-RA-SAHF-NEXT: popq %r14
-; 64-GOOD-RA-SAHF-NEXT: popq %rbp
-; 64-GOOD-RA-SAHF-NEXT: retq
-;
-; 64-FAST-RA-SAHF-LABEL: test_feed_cmov:
-; 64-FAST-RA-SAHF: # %bb.0: # %entry
-; 64-FAST-RA-SAHF-NEXT: pushq %rbp
-; 64-FAST-RA-SAHF-NEXT: movq %rsp, %rbp
-; 64-FAST-RA-SAHF-NEXT: pushq %r14
-; 64-FAST-RA-SAHF-NEXT: pushq %rbx
-; 64-FAST-RA-SAHF-NEXT: movl %edx, %ebx
-; 64-FAST-RA-SAHF-NEXT: movl %esi, %eax
-; 64-FAST-RA-SAHF-NEXT: lock cmpxchgl %edx, (%rdi)
-; 64-FAST-RA-SAHF-NEXT: seto %al
-; 64-FAST-RA-SAHF-NEXT: lahf
-; 64-FAST-RA-SAHF-NEXT: movq %rax, %r14
-; 64-FAST-RA-SAHF-NEXT: callq foo
-; 64-FAST-RA-SAHF-NEXT: pushq %rax
-; 64-FAST-RA-SAHF-NEXT: movq %r14, %rax
-; 64-FAST-RA-SAHF-NEXT: addb $127, %al
-; 64-FAST-RA-SAHF-NEXT: sahf
-; 64-FAST-RA-SAHF-NEXT: popq %rax
-; 64-FAST-RA-SAHF-NEXT: cmovel %ebx, %eax
-; 64-FAST-RA-SAHF-NEXT: popq %rbx
-; 64-FAST-RA-SAHF-NEXT: popq %r14
-; 64-FAST-RA-SAHF-NEXT: popq %rbp
-; 64-FAST-RA-SAHF-NEXT: retq
+; 64-ALL-LABEL: test_feed_cmov:
+; 64-ALL: # %bb.0: # %entry
+; 64-ALL-NEXT: pushq %rbp
+; 64-ALL-NEXT: pushq %rbx
+; 64-ALL-NEXT: pushq %rax
+; 64-ALL-NEXT: movl %edx, %ebx
+; 64-ALL-NEXT: movl %esi, %eax
+; 64-ALL-NEXT: lock cmpxchgl %edx, (%rdi)
+; 64-ALL-NEXT: sete %bpl
+; 64-ALL-NEXT: callq foo
+; 64-ALL-NEXT: testb $-1, %bpl
+; 64-ALL-NEXT: cmovnel %ebx, %eax
+; 64-ALL-NEXT: addq $8, %rsp
+; 64-ALL-NEXT: popq %rbx
+; 64-ALL-NEXT: popq %rbp
+; 64-ALL-NEXT: retq
entry:
%res = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst
%success = extractvalue { i32, i1 } %res, 1
diff --git a/llvm/test/CodeGen/X86/copy-eflags.ll b/llvm/test/CodeGen/X86/copy-eflags.ll
index 1e4fca5d10a..07d13fe2e09 100644
--- a/llvm/test/CodeGen/X86/copy-eflags.ll
+++ b/llvm/test/CodeGen/X86/copy-eflags.ll
@@ -19,35 +19,24 @@ define i32 @test1() nounwind {
; X32-LABEL: test1:
; X32: # %bb.0: # %entry
; X32-NEXT: movb b, %cl
-; X32-NEXT: movb %cl, %al
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: incb %al
; X32-NEXT: movb %al, b
; X32-NEXT: incl c
-; X32-NEXT: pushl %eax
-; X32-NEXT: seto %al
-; X32-NEXT: lahf
-; X32-NEXT: movl %eax, %edx
-; X32-NEXT: popl %eax
+; X32-NEXT: sete %dl
; X32-NEXT: movb a, %ah
; X32-NEXT: movb %ah, %ch
; X32-NEXT: incb %ch
; X32-NEXT: cmpb %cl, %ah
; X32-NEXT: sete d
; X32-NEXT: movb %ch, a
-; X32-NEXT: pushl %eax
-; X32-NEXT: movl %edx, %eax
-; X32-NEXT: addb $127, %al
-; X32-NEXT: sahf
-; X32-NEXT: popl %eax
-; X32-NEXT: je .LBB0_2
+; X32-NEXT: testb $-1, %dl
+; X32-NEXT: jne .LBB0_2
; X32-NEXT: # %bb.1: # %if.then
-; X32-NEXT: pushl %ebp
-; X32-NEXT: movl %esp, %ebp
; X32-NEXT: movsbl %al, %eax
; X32-NEXT: pushl %eax
; X32-NEXT: calll external
; X32-NEXT: addl $4, %esp
-; X32-NEXT: popl %ebp
; X32-NEXT: .LBB0_2: # %if.end
; X32-NEXT: xorl %eax, %eax
; X32-NEXT: retl
@@ -59,23 +48,20 @@ define i32 @test1() nounwind {
; X64-NEXT: incb %al
; X64-NEXT: movb %al, {{.*}}(%rip)
; X64-NEXT: incl {{.*}}(%rip)
-; X64-NEXT: pushfq
-; X64-NEXT: popq %rsi
+; X64-NEXT: sete %sil
; X64-NEXT: movb {{.*}}(%rip), %cl
; X64-NEXT: movl %ecx, %edx
; X64-NEXT: incb %dl
; X64-NEXT: cmpb %dil, %cl
; X64-NEXT: sete {{.*}}(%rip)
; X64-NEXT: movb %dl, {{.*}}(%rip)
-; X64-NEXT: pushq %rsi
-; X64-NEXT: popfq
-; X64-NEXT: je .LBB0_2
+; X64-NEXT: testb $-1, %sil
+; X64-NEXT: jne .LBB0_2
; X64-NEXT: # %bb.1: # %if.then
-; X64-NEXT: pushq %rbp
-; X64-NEXT: movq %rsp, %rbp
+; X64-NEXT: pushq %rax
; X64-NEXT: movsbl %al, %edi
; X64-NEXT: callq external
-; X64-NEXT: popq %rbp
+; X64-NEXT: addq $8, %rsp
; X64-NEXT: .LBB0_2: # %if.end
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: retq
@@ -108,54 +94,40 @@ if.end:
define i32 @test2(i32* %ptr) nounwind {
; X32-LABEL: test2:
; X32: # %bb.0: # %entry
-; X32-NEXT: pushl %ebp
-; X32-NEXT: movl %esp, %ebp
-; X32-NEXT: pushl %esi
-; X32-NEXT: movl 8(%ebp), %eax
+; X32-NEXT: pushl %ebx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: incl (%eax)
-; X32-NEXT: seto %al
-; X32-NEXT: lahf
-; X32-NEXT: movl %eax, %esi
+; X32-NEXT: setne %bl
; X32-NEXT: pushl $42
; X32-NEXT: calll external
; X32-NEXT: addl $4, %esp
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: addb $127, %al
-; X32-NEXT: sahf
+; X32-NEXT: testb $-1, %bl
; X32-NEXT: je .LBB1_1
-; X32-NEXT: # %bb.3: # %else
+; X32-NEXT: # %bb.2: # %else
; X32-NEXT: xorl %eax, %eax
-; X32-NEXT: jmp .LBB1_2
+; X32-NEXT: popl %ebx
+; X32-NEXT: retl
; X32-NEXT: .LBB1_1: # %then
; X32-NEXT: movl $64, %eax
-; X32-NEXT: .LBB1_2: # %then
-; X32-NEXT: popl %esi
-; X32-NEXT: popl %ebp
+; X32-NEXT: popl %ebx
; X32-NEXT: retl
;
; X64-LABEL: test2:
; X64: # %bb.0: # %entry
-; X64-NEXT: pushq %rbp
-; X64-NEXT: movq %rsp, %rbp
; X64-NEXT: pushq %rbx
-; X64-NEXT: pushq %rax
; X64-NEXT: incl (%rdi)
-; X64-NEXT: pushfq
-; X64-NEXT: popq %rbx
+; X64-NEXT: setne %bl
; X64-NEXT: movl $42, %edi
; X64-NEXT: callq external
-; X64-NEXT: pushq %rbx
-; X64-NEXT: popfq
+; X64-NEXT: testb $-1, %bl
; X64-NEXT: je .LBB1_1
-; X64-NEXT: # %bb.3: # %else
+; X64-NEXT: # %bb.2: # %else
; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: jmp .LBB1_2
+; X64-NEXT: popq %rbx
+; X64-NEXT: retq
; X64-NEXT: .LBB1_1: # %then
; X64-NEXT: movl $64, %eax
-; X64-NEXT: .LBB1_2: # %then
-; X64-NEXT: addq $8, %rsp
; X64-NEXT: popq %rbx
-; X64-NEXT: popq %rbp
; X64-NEXT: retq
entry:
%val = load i32, i32* %ptr
@@ -183,43 +155,25 @@ declare void @external_b()
define void @test_tail_call(i32* %ptr) nounwind optsize {
; X32-LABEL: test_tail_call:
; X32: # %bb.0: # %entry
-; X32-NEXT: pushl %ebp
-; X32-NEXT: movl %esp, %ebp
-; X32-NEXT: movl 8(%ebp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: incl (%eax)
-; X32-NEXT: seto %al
-; X32-NEXT: lahf
-; X32-NEXT: movl %eax, %eax
+; X32-NEXT: setne %al
; X32-NEXT: incb a
; X32-NEXT: sete d
-; X32-NEXT: movl %eax, %eax
-; X32-NEXT: addb $127, %al
-; X32-NEXT: sahf
-; X32-NEXT: je .LBB2_1
-; X32-NEXT: # %bb.2: # %else
-; X32-NEXT: popl %ebp
-; X32-NEXT: jmp external_b # TAILCALL
-; X32-NEXT: .LBB2_1: # %then
-; X32-NEXT: popl %ebp
+; X32-NEXT: testb $-1, %al
+; X32-NEXT: jne external_b # TAILCALL
+; X32-NEXT: # %bb.1: # %then
; X32-NEXT: jmp external_a # TAILCALL
;
; X64-LABEL: test_tail_call:
; X64: # %bb.0: # %entry
-; X64-NEXT: pushq %rbp
-; X64-NEXT: movq %rsp, %rbp
; X64-NEXT: incl (%rdi)
-; X64-NEXT: pushfq
-; X64-NEXT: popq %rax
+; X64-NEXT: setne %al
; X64-NEXT: incb {{.*}}(%rip)
; X64-NEXT: sete {{.*}}(%rip)
-; X64-NEXT: pushq %rax
-; X64-NEXT: popfq
-; X64-NEXT: je .LBB2_1
-; X64-NEXT: # %bb.2: # %else
-; X64-NEXT: popq %rbp
-; X64-NEXT: jmp external_b # TAILCALL
-; X64-NEXT: .LBB2_1: # %then
-; X64-NEXT: popq %rbp
+; X64-NEXT: testb $-1, %al
+; X64-NEXT: jne external_b # TAILCALL
+; X64-NEXT: # %bb.1: # %then
; X64-NEXT: jmp external_a # TAILCALL
entry:
%val = load i32, i32* %ptr
diff --git a/llvm/test/CodeGen/X86/eflags-copy-expansion.mir b/llvm/test/CodeGen/X86/eflags-copy-expansion.mir
deleted file mode 100644
index 385e3d9a67d..00000000000
--- a/llvm/test/CodeGen/X86/eflags-copy-expansion.mir
+++ /dev/null
@@ -1,64 +0,0 @@
-# RUN: llc -run-pass postrapseudos -mtriple=i386-apple-macosx -o - %s | FileCheck %s
-
-# Verify that we correctly save and restore eax when copying eflags,
-# even when only a smaller alias of eax is used. We used to check only
-# eax and not its aliases.
-# PR27624.
-
---- |
- target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
-
- define void @foo() {
- entry:
- br label %false
- false:
- ret void
- }
-
-...
-
----
-name: foo
-tracksRegLiveness: true
-liveins:
- - { reg: '$edi' }
-body: |
- bb.0.entry:
- liveins: $edi
- NOOP implicit-def $al
-
- ; The bug was triggered only when LivePhysReg is used, which
- ; happens only when the heuristic for the liveness computation
- ; failed. The liveness computation heuristic looks at 10 instructions
- ; before and after the copy. Make sure we do not reach the definition of
- ; AL in 10 instructions, otherwise the heuristic will see that it is live.
- NOOP
- NOOP
- NOOP
- NOOP
- NOOP
- NOOP
- NOOP
- NOOP
- NOOP
- NOOP
- NOOP
- NOOP
- NOOP
- ; Save AL.
- ; CHECK: PUSH32r killed $eax
-
- ; Copy edi into EFLAGS
- ; CHECK-NEXT: $eax = MOV32rr $edi
- ; CHECK-NEXT: $al = ADD8ri $al, 127, implicit-def $eflags
- ; CHECK-NEXT: SAHF implicit-def $eflags, implicit $ah
- $eflags = COPY $edi
-
- ; Restore AL.
- ; CHECK-NEXT: $eax = POP32r
- bb.1.false:
- liveins: $al
- NOOP implicit $al
- RETQ
-
-...
diff --git a/llvm/test/CodeGen/X86/flags-copy-lowering.mir b/llvm/test/CodeGen/X86/flags-copy-lowering.mir
new file mode 100644
index 00000000000..fd263c0d1d4
--- /dev/null
+++ b/llvm/test/CodeGen/X86/flags-copy-lowering.mir
@@ -0,0 +1,485 @@
+# RUN: llc -run-pass x86-flags-copy-lowering -verify-machineinstrs -o - %s | FileCheck %s
+#
+# Lower various interesting copy patterns of EFLAGS without using LAHF/SAHF.
+
+--- |
+ target triple = "x86_64-unknown-unknown"
+
+ declare void @foo()
+
+ define i32 @test_branch(i64 %a, i64 %b) {
+ entry:
+ call void @foo()
+ ret i32 0
+ }
+
+ define i32 @test_branch_fallthrough(i64 %a, i64 %b) {
+ entry:
+ call void @foo()
+ ret i32 0
+ }
+
+ define void @test_setcc(i64 %a, i64 %b) {
+ entry:
+ call void @foo()
+ ret void
+ }
+
+ define void @test_cmov(i64 %a, i64 %b) {
+ entry:
+ call void @foo()
+ ret void
+ }
+
+ define void @test_adc(i64 %a, i64 %b) {
+ entry:
+ call void @foo()
+ ret void
+ }
+
+ define void @test_sbb(i64 %a, i64 %b) {
+ entry:
+ call void @foo()
+ ret void
+ }
+
+ define void @test_adcx(i64 %a, i64 %b) {
+ entry:
+ call void @foo()
+ ret void
+ }
+
+ define void @test_adox(i64 %a, i64 %b) {
+ entry:
+ call void @foo()
+ ret void
+ }
+
+ define void @test_rcl(i64 %a, i64 %b) {
+ entry:
+ call void @foo()
+ ret void
+ }
+
+ define void @test_rcr(i64 %a, i64 %b) {
+ entry:
+ call void @foo()
+ ret void
+ }
+...
+---
+name: test_branch
+# CHECK-LABEL: name: test_branch
+liveins:
+ - { reg: '$rdi', virtual-reg: '%0' }
+ - { reg: '$rsi', virtual-reg: '%1' }
+body: |
+ bb.0:
+ successors: %bb.1, %bb.2, %bb.3
+ liveins: $rdi, $rsi
+
+ %0:gr64 = COPY $rdi
+ %1:gr64 = COPY $rsi
+ CMP64rr %0, %1, implicit-def $eflags
+ %2:gr64 = COPY $eflags
+ ; CHECK-NOT: COPY{{( killed)?}} $eflags
+ ; CHECK: %[[A_REG:[^:]*]]:gr8 = SETAr implicit $eflags
+ ; CHECK-NEXT: %[[B_REG:[^:]*]]:gr8 = SETBr implicit $eflags
+ ; CHECK-NOT: COPY{{( killed)?}} $eflags
+
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+
+ $eflags = COPY %2
+ JA_1 %bb.1, implicit $eflags
+ JB_1 %bb.2, implicit $eflags
+ JMP_1 %bb.3
+ ; CHECK-NOT: $eflags =
+ ;
+ ; CHECK: TEST8ri %[[A_REG]], -1, implicit-def $eflags
+ ; CHECK-NEXT: JNE_1 %bb.1, implicit killed $eflags
+ ; CHECK-SAME: {{$[[:space:]]}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: {{.*$}}
+ ; CHECK-SAME: {{$[[:space:]]}}
+ ; CHECK-NEXT: TEST8ri %[[B_REG]], -1, implicit-def $eflags
+ ; CHECK-NEXT: JNE_1 %bb.2, implicit killed $eflags
+ ; CHECK-NEXT: JMP_1 %bb.3
+
+ bb.1:
+ %3:gr32 = MOV32ri64 42
+ $eax = COPY %3
+ RET 0, $eax
+
+ bb.2:
+ %4:gr32 = MOV32ri64 43
+ $eax = COPY %4
+ RET 0, $eax
+
+ bb.3:
+ %5:gr32 = MOV32r0 implicit-def dead $eflags
+ $eax = COPY %5
+ RET 0, $eax
+
+...
+---
+name: test_branch_fallthrough
+# CHECK-LABEL: name: test_branch_fallthrough
+liveins:
+ - { reg: '$rdi', virtual-reg: '%0' }
+ - { reg: '$rsi', virtual-reg: '%1' }
+body: |
+ bb.0:
+ successors: %bb.1, %bb.2, %bb.3
+ liveins: $rdi, $rsi
+
+ %0:gr64 = COPY $rdi
+ %1:gr64 = COPY $rsi
+ CMP64rr %0, %1, implicit-def $eflags
+ %2:gr64 = COPY $eflags
+ ; CHECK-NOT: COPY{{( killed)?}} $eflags
+ ; CHECK: %[[A_REG:[^:]*]]:gr8 = SETAr implicit $eflags
+ ; CHECK-NEXT: %[[B_REG:[^:]*]]:gr8 = SETBr implicit $eflags
+ ; CHECK-NOT: COPY{{( killed)?}} $eflags
+
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+
+ $eflags = COPY %2
+ JA_1 %bb.2, implicit $eflags
+ JB_1 %bb.3, implicit $eflags
+ ; CHECK-NOT: $eflags =
+ ;
+ ; CHECK: TEST8ri %[[A_REG]], -1, implicit-def $eflags
+ ; CHECK-NEXT: JNE_1 %bb.2, implicit killed $eflags
+ ; CHECK-SAME: {{$[[:space:]]}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: {{.*$}}
+ ; CHECK-SAME: {{$[[:space:]]}}
+ ; CHECK-NEXT: TEST8ri %[[B_REG]], -1, implicit-def $eflags
+ ; CHECK-NEXT: JNE_1 %bb.3, implicit killed $eflags
+ ; CHECK-SAME: {{$[[:space:]]}}
+ ; CHECK-NEXT: bb.1:
+
+ bb.1:
+ %5:gr32 = MOV32r0 implicit-def dead $eflags
+ $eax = COPY %5
+ RET 0, $eax
+
+ bb.2:
+ %3:gr32 = MOV32ri64 42
+ $eax = COPY %3
+ RET 0, $eax
+
+ bb.3:
+ %4:gr32 = MOV32ri64 43
+ $eax = COPY %4
+ RET 0, $eax
+
+...
+---
+name: test_setcc
+# CHECK-LABEL: name: test_setcc
+liveins:
+ - { reg: '$rdi', virtual-reg: '%0' }
+ - { reg: '$rsi', virtual-reg: '%1' }
+body: |
+ bb.0:
+ liveins: $rdi, $rsi
+
+ %0:gr64 = COPY $rdi
+ %1:gr64 = COPY $rsi
+ CMP64rr %0, %1, implicit-def $eflags
+ %2:gr64 = COPY $eflags
+ ; CHECK-NOT: COPY{{( killed)?}} $eflags
+ ; CHECK: %[[A_REG:[^:]*]]:gr8 = SETAr implicit $eflags
+ ; CHECK-NEXT: %[[B_REG:[^:]*]]:gr8 = SETBr implicit $eflags
+ ; CHECK-NEXT: %[[E_REG:[^:]*]]:gr8 = SETEr implicit $eflags
+ ; CHECK-NEXT: %[[NE_REG:[^:]*]]:gr8 = SETNEr implicit $eflags
+ ; CHECK-NOT: COPY{{( killed)?}} $eflags
+
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+
+ $eflags = COPY %2
+ %3:gr8 = SETAr implicit $eflags
+ %4:gr8 = SETBr implicit $eflags
+ %5:gr8 = SETEr implicit $eflags
+ %6:gr8 = SETNEr implicit killed $eflags
+ MOV8mr $rsp, 1, $noreg, -16, $noreg, killed %3
+ MOV8mr $rsp, 1, $noreg, -16, $noreg, killed %4
+ MOV8mr $rsp, 1, $noreg, -16, $noreg, killed %5
+ MOV8mr $rsp, 1, $noreg, -16, $noreg, killed %6
+ ; CHECK-NOT: $eflags =
+ ; CHECK-NOT: = SET{{.*}}
+ ; CHECK: MOV8mr {{.*}}, killed %[[A_REG]]
+ ; CHECK-CHECK: MOV8mr {{.*}}, killed %[[B_REG]]
+ ; CHECK-CHECK: MOV8mr {{.*}}, killed %[[E_REG]]
+ ; CHECK-CHECK: MOV8mr {{.*}}, killed %[[NE_REG]]
+
+ RET 0
+
+...
+---
+name: test_cmov
+# CHECK-LABEL: name: test_cmov
+liveins:
+ - { reg: '$rdi', virtual-reg: '%0' }
+ - { reg: '$rsi', virtual-reg: '%1' }
+body: |
+ bb.0:
+ liveins: $rdi, $rsi
+
+ %0:gr64 = COPY $rdi
+ %1:gr64 = COPY $rsi
+ CMP64rr %0, %1, implicit-def $eflags
+ %2:gr64 = COPY $eflags
+ ; CHECK-NOT: COPY{{( killed)?}} $eflags
+ ; CHECK: %[[A_REG:[^:]*]]:gr8 = SETAr implicit $eflags
+ ; CHECK-NEXT: %[[B_REG:[^:]*]]:gr8 = SETBr implicit $eflags
+ ; CHECK-NEXT: %[[E_REG:[^:]*]]:gr8 = SETEr implicit $eflags
+ ; CHECK-NOT: COPY{{( killed)?}} $eflags
+
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+
+ $eflags = COPY %2
+ %3:gr64 = CMOVA64rr %0, %1, implicit $eflags
+ %4:gr64 = CMOVB64rr %0, %1, implicit $eflags
+ %5:gr64 = CMOVE64rr %0, %1, implicit $eflags
+ %6:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags
+ ; CHECK-NOT: $eflags =
+ ; CHECK: TEST8ri %[[A_REG]], -1, implicit-def $eflags
+ ; CHECK-NEXT: %3:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags
+ ; CHECK-NEXT: TEST8ri %[[B_REG]], -1, implicit-def $eflags
+ ; CHECK-NEXT: %4:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags
+ ; CHECK-NEXT: TEST8ri %[[E_REG]], -1, implicit-def $eflags
+ ; CHECK-NEXT: %5:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags
+ ; CHECK-NEXT: TEST8ri %[[E_REG]], -1, implicit-def $eflags
+ ; CHECK-NEXT: %6:gr64 = CMOVE64rr %0, %1, implicit killed $eflags
+ MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %3
+ MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %4
+ MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %5
+ MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %6
+
+ RET 0
+
+...
+---
+name: test_adc
+# CHECK-LABEL: name: test_adc
+liveins:
+ - { reg: '$rdi', virtual-reg: '%0' }
+ - { reg: '$rsi', virtual-reg: '%1' }
+body: |
+ bb.0:
+ liveins: $rdi, $rsi
+
+ %0:gr64 = COPY $rdi
+ %1:gr64 = COPY $rsi
+ %2:gr64 = ADD64rr %0, %1, implicit-def $eflags
+ %3:gr64 = COPY $eflags
+ ; CHECK-NOT: COPY{{( killed)?}} $eflags
+ ; CHECK: %[[CF_REG:[^:]*]]:gr8 = SETBr implicit $eflags
+ ; CHECK-NOT: COPY{{( killed)?}} $eflags
+
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+
+ $eflags = COPY %3
+ %4:gr64 = ADC64ri32 %2:gr64, 42, implicit-def $eflags, implicit $eflags
+ %5:gr64 = ADC64ri32 %4:gr64, 42, implicit-def $eflags, implicit $eflags
+ ; CHECK-NOT: $eflags =
+ ; CHECK: dead %{{[^:]*}}:gr8 = ADD8ri %[[CF_REG]], 255, implicit-def $eflags
+ ; CHECK-NEXT: %4:gr64 = ADC64ri32 %2, 42, implicit-def $eflags, implicit killed $eflags
+ ; CHECK-NEXT: %5:gr64 = ADC64ri32 %4, 42, implicit-def{{( dead)?}} $eflags, implicit{{( killed)?}} $eflags
+ MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %5
+
+ RET 0
+
+...
+---
+name: test_sbb
+# CHECK-LABEL: name: test_sbb
+liveins:
+ - { reg: '$rdi', virtual-reg: '%0' }
+ - { reg: '$rsi', virtual-reg: '%1' }
+body: |
+ bb.0:
+ liveins: $rdi, $rsi
+
+ %0:gr64 = COPY $rdi
+ %1:gr64 = COPY $rsi
+ %2:gr64 = SUB64rr %0, %1, implicit-def $eflags
+ %3:gr64 = COPY killed $eflags
+ ; CHECK-NOT: COPY{{( killed)?}} $eflags
+ ; CHECK: %[[CF_REG:[^:]*]]:gr8 = SETBr implicit $eflags
+ ; CHECK-NOT: COPY{{( killed)?}} $eflags
+
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+
+ $eflags = COPY %3
+ %4:gr64 = SBB64ri32 %2:gr64, 42, implicit-def $eflags, implicit killed $eflags
+ %5:gr64 = SBB64ri32 %4:gr64, 42, implicit-def dead $eflags, implicit killed $eflags
+ ; CHECK-NOT: $eflags =
+ ; CHECK: dead %{{[^:]*}}:gr8 = ADD8ri %[[CF_REG]], 255, implicit-def $eflags
+ ; CHECK-NEXT: %4:gr64 = SBB64ri32 %2, 42, implicit-def $eflags, implicit killed $eflags
+ ; CHECK-NEXT: %5:gr64 = SBB64ri32 %4, 42, implicit-def{{( dead)?}} $eflags, implicit{{( killed)?}} $eflags
+ MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %5
+
+ RET 0
+
+...
+---
+name: test_adcx
+# CHECK-LABEL: name: test_adcx
+liveins:
+ - { reg: '$rdi', virtual-reg: '%0' }
+ - { reg: '$rsi', virtual-reg: '%1' }
+body: |
+ bb.0:
+ liveins: $rdi, $rsi
+
+ %0:gr64 = COPY $rdi
+ %1:gr64 = COPY $rsi
+ %2:gr64 = ADD64rr %0, %1, implicit-def $eflags
+ %3:gr64 = COPY $eflags
+ ; CHECK-NOT: COPY{{( killed)?}} $eflags
+ ; CHECK: %[[E_REG:[^:]*]]:gr8 = SETEr implicit $eflags
+ ; CHECK-NEXT: %[[CF_REG:[^:]*]]:gr8 = SETBr implicit $eflags
+ ; CHECK-NOT: COPY{{( killed)?}} $eflags
+
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+
+ $eflags = COPY %3
+ %4:gr64 = CMOVE64rr %0, %1, implicit $eflags
+ %5:gr64 = MOV64ri32 42
+ %6:gr64 = ADCX64rr %2, %5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NOT: $eflags =
+ ; CHECK: TEST8ri %[[E_REG]], -1, implicit-def $eflags
+ ; CHECK-NEXT: %4:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags
+ ; CHECK-NEXT: %5:gr64 = MOV64ri32 42
+ ; CHECK-NEXT: dead %{{[^:]*}}:gr8 = ADD8ri %[[CF_REG]], 255, implicit-def $eflags
+ ; CHECK-NEXT: %6:gr64 = ADCX64rr %2, %5, implicit-def{{( dead)?}} $eflags, implicit killed $eflags
+ MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %4
+ MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %6
+
+ RET 0
+
+...
+---
+name: test_adox
+# CHECK-LABEL: name: test_adox
+liveins:
+ - { reg: '$rdi', virtual-reg: '%0' }
+ - { reg: '$rsi', virtual-reg: '%1' }
+body: |
+ bb.0:
+ liveins: $rdi, $rsi
+
+ %0:gr64 = COPY $rdi
+ %1:gr64 = COPY $rsi
+ %2:gr64 = ADD64rr %0, %1, implicit-def $eflags
+ %3:gr64 = COPY $eflags
+ ; CHECK-NOT: COPY{{( killed)?}} $eflags
+ ; CHECK: %[[E_REG:[^:]*]]:gr8 = SETEr implicit $eflags
+ ; CHECK-NEXT: %[[OF_REG:[^:]*]]:gr8 = SETOr implicit $eflags
+ ; CHECK-NOT: COPY{{( killed)?}} $eflags
+
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+
+ $eflags = COPY %3
+ %4:gr64 = CMOVE64rr %0, %1, implicit $eflags
+ %5:gr64 = MOV64ri32 42
+ %6:gr64 = ADOX64rr %2, %5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NOT: $eflags =
+ ; CHECK: TEST8ri %[[E_REG]], -1, implicit-def $eflags
+ ; CHECK-NEXT: %4:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags
+ ; CHECK-NEXT: %5:gr64 = MOV64ri32 42
+ ; CHECK-NEXT: dead %{{[^:]*}}:gr8 = ADD8ri %[[OF_REG]], 127, implicit-def $eflags
+ ; CHECK-NEXT: %6:gr64 = ADOX64rr %2, %5, implicit-def{{( dead)?}} $eflags, implicit killed $eflags
+ MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %4
+ MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %6
+
+ RET 0
+
+...
+---
+name: test_rcl
+# CHECK-LABEL: name: test_rcl
+liveins:
+ - { reg: '$rdi', virtual-reg: '%0' }
+ - { reg: '$rsi', virtual-reg: '%1' }
+body: |
+ bb.0:
+ liveins: $rdi, $rsi
+
+ %0:gr64 = COPY $rdi
+ %1:gr64 = COPY $rsi
+ %2:gr64 = ADD64rr %0, %1, implicit-def $eflags
+ %3:gr64 = COPY $eflags
+ ; CHECK-NOT: COPY{{( killed)?}} $eflags
+ ; CHECK: %[[CF_REG:[^:]*]]:gr8 = SETBr implicit $eflags
+ ; CHECK-NOT: COPY{{( killed)?}} $eflags
+
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+
+ $eflags = COPY %3
+ %4:gr64 = RCL64r1 %2:gr64, implicit-def $eflags, implicit $eflags
+ %5:gr64 = RCL64r1 %4:gr64, implicit-def $eflags, implicit $eflags
+ ; CHECK-NOT: $eflags =
+ ; CHECK: dead %{{[^:]*}}:gr8 = ADD8ri %[[CF_REG]], 255, implicit-def $eflags
+ ; CHECK-NEXT: %4:gr64 = RCL64r1 %2, implicit-def $eflags, implicit killed $eflags
+ ; CHECK-NEXT: %5:gr64 = RCL64r1 %4, implicit-def{{( dead)?}} $eflags, implicit{{( killed)?}} $eflags
+ MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %5
+
+ RET 0
+
+...
+---
+name: test_rcr
+# CHECK-LABEL: name: test_rcr
+liveins:
+ - { reg: '$rdi', virtual-reg: '%0' }
+ - { reg: '$rsi', virtual-reg: '%1' }
+body: |
+ bb.0:
+ liveins: $rdi, $rsi
+
+ %0:gr64 = COPY $rdi
+ %1:gr64 = COPY $rsi
+ %2:gr64 = ADD64rr %0, %1, implicit-def $eflags
+ %3:gr64 = COPY $eflags
+ ; CHECK-NOT: COPY{{( killed)?}} $eflags
+ ; CHECK: %[[CF_REG:[^:]*]]:gr8 = SETBr implicit $eflags
+ ; CHECK-NOT: COPY{{( killed)?}} $eflags
+
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+
+ $eflags = COPY %3
+ %4:gr64 = RCR64r1 %2:gr64, implicit-def $eflags, implicit $eflags
+ %5:gr64 = RCR64r1 %4:gr64, implicit-def $eflags, implicit $eflags
+ ; CHECK-NOT: $eflags =
+ ; CHECK: dead %{{[^:]*}}:gr8 = ADD8ri %[[CF_REG]], 255, implicit-def $eflags
+ ; CHECK-NEXT: %4:gr64 = RCR64r1 %2, implicit-def $eflags, implicit killed $eflags
+ ; CHECK-NEXT: %5:gr64 = RCR64r1 %4, implicit-def{{( dead)?}} $eflags, implicit{{( killed)?}} $eflags
+ MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %5
+
+ RET 0
+
+...
diff --git a/llvm/test/CodeGen/X86/mul-i1024.ll b/llvm/test/CodeGen/X86/mul-i1024.ll
index 1f15818f8d4..8dd8a8476de 100644
--- a/llvm/test/CodeGen/X86/mul-i1024.ll
+++ b/llvm/test/CodeGen/X86/mul-i1024.ll
@@ -6,202 +6,195 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-LABEL: test_1024:
; X32: # %bb.0:
; X32-NEXT: pushl %ebp
-; X32-NEXT: movl %esp, %ebp
; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %edi
; X32-NEXT: pushl %esi
-; X32-NEXT: subl $996, %esp # imm = 0x3E4
-; X32-NEXT: movl 12(%ebp), %eax
-; X32-NEXT: movl 32(%eax), %eax
+; X32-NEXT: subl $1000, %esp # imm = 0x3E8
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl 48(%eax), %ecx
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl 32(%edx), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: xorl %ecx, %ecx
-; X32-NEXT: mull %ecx
+; X32-NEXT: xorl %edi, %edi
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl 8(%ebp), %esi
-; X32-NEXT: movl 48(%esi), %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: mull %ecx
-; X32-NEXT: xorl %ecx, %ecx
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: adcl %edi, %edx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl %ebx, %ecx
+; X32-NEXT: movl %edx, %eax
+; X32-NEXT: adcl %ebp, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl 32(%esi), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: mull %ecx
+; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: addl %ebx, %ecx
; X32-NEXT: movl %edx, %eax
-; X32-NEXT: adcl %edi, %eax
-; X32-NEXT: movl %edi, %ecx
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %ebp, %eax
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 12(%ebp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 36(%eax), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: xorl %edx, %edx
-; X32-NEXT: mull %edx
+; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %eax, %edi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: addl %ecx, %edi
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, %eax
; X32-NEXT: adcl $0, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl 36(%esi), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: xorl %ecx, %ecx
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %eax, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: addl %esi, %edx
-; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: addl %edi, %ebp
+; X32-NEXT: adcl $0, %esi
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: leal (%ebx,%edi), %eax
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: leal (%ecx,%edx), %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: leal (%ebx,%eax), %eax
+; X32-NEXT: leal (%ecx,%ebp), %edx
; X32-NEXT: adcl %eax, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: seto %al
-; X32-NEXT: lahf
-; X32-NEXT: movl %eax, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: addl %ecx, %edi
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl %esi, %ebx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl 12(%ebp), %eax
+; X32-NEXT: addl %ecx, %ebp
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl (%eax), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: mull %ecx
; X32-NEXT: movl %eax, %esi
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl 8(%ebp), %ecx
-; X32-NEXT: movl 16(%ecx), %eax
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X32-NEXT: movl 16(%ebp), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: xorl %edx, %edx
-; X32-NEXT: mull %edx
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: addl %esi, %eax
-; X32-NEXT: adcl %edi, %edx
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl %esi, %ecx
+; X32-NEXT: adcl %ebx, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl (%ecx), %eax
+; X32-NEXT: movl (%ebp), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %esi, %ebp
; X32-NEXT: addl %esi, %eax
; X32-NEXT: movl %edx, %eax
-; X32-NEXT: adcl %edi, %eax
+; X32-NEXT: adcl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: addl %esi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %edi, %eax
+; X32-NEXT: adcl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: addl %esi, %eax
-; X32-NEXT: movl %esi, %ecx
-; X32-NEXT: adcl %edi, %ebx
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edi, %ebx
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: adcl %ebx, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl 12(%ebp), %eax
-; X32-NEXT: movl 4(%eax), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-NEXT: movl 4(%esi), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: xorl %edx, %edx
-; X32-NEXT: mull %edx
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ebx, %edi
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: adcl $0, %esi
+; X32-NEXT: xorl %ecx, %ecx
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl %ebx, %ecx
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: addl %ebp, %ecx
+; X32-NEXT: movl %ebp, %esi
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: addl %ecx, %edi
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %esi
-; X32-NEXT: setb %bh
-; X32-NEXT: addl %eax, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movzbl %bh, %eax
+; X32-NEXT: adcl %ebx, %edi
+; X32-NEXT: movl %ebx, %ebp
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: setb %cl
+; X32-NEXT: addl %eax, %edi
+; X32-NEXT: movl %edi, (%esp) # 4-byte Spill
+; X32-NEXT: movzbl %cl, %eax
; X32-NEXT: adcl %edx, %eax
-; X32-NEXT: movl %eax, %edi
+; X32-NEXT: movl %eax, %ebx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 12(%ebp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 8(%eax), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: xorl %ebx, %ebx
-; X32-NEXT: mull %ebx
+; X32-NEXT: xorl %ecx, %ecx
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: addl %eax, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %edx, %eax
-; X32-NEXT: addl %esi, %ecx
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 8(%ebp), %eax
-; X32-NEXT: movl 52(%eax), %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: mull %ebx
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: addl %ecx, %edi
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: addl %ebx, %edi
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ecx, %esi
-; X32-NEXT: movl %ecx, %edi
-; X32-NEXT: setb %cl
; X32-NEXT: addl %eax, %esi
-; X32-NEXT: movzbl %cl, %eax
-; X32-NEXT: adcl %edx, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 8(%ebp), %eax
-; X32-NEXT: movl 56(%eax), %eax
+; X32-NEXT: adcl %edx, %ebp
+; X32-NEXT: addl %edi, %esi
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %ebx, %ebp
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X32-NEXT: movl 52(%ebp), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: mull %ecx
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: addl %edi, %ebx
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: adcl $0, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: addl %esi, %ebx
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: setb %bl
+; X32-NEXT: addl %eax, %ecx
+; X32-NEXT: movzbl %bl, %ebx
+; X32-NEXT: adcl %edx, %ebx
+; X32-NEXT: movl 56(%ebp), %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: xorl %edx, %edx
+; X32-NEXT: mull %edx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ebx, %ecx
-; X32-NEXT: addl %eax, %ebx
+; X32-NEXT: movl %esi, %ebp
+; X32-NEXT: addl %eax, %ebp
; X32-NEXT: adcl %edx, %edi
-; X32-NEXT: addl %esi, %ebx
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: addl %ecx, %ebp
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %ebx, %edi
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edi, %eax
@@ -210,10 +203,10 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 8(%ebp), %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 1-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 40(%eax), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
@@ -228,46 +221,42 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: adcl %ebx, %ecx
; X32-NEXT: addl %esi, %edi
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: adcl %ebp, %ecx
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: seto %al
-; X32-NEXT: lahf
-; X32-NEXT: movl %eax, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl %edi, %eax
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 12(%ebp), %ecx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl 16(%ecx), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ebx, %ebx
; X32-NEXT: mull %ebx
; X32-NEXT: movl %eax, %edi
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl 20(%ecx), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: mull %ebx
; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %esi, %ebx
+; X32-NEXT: addl %ebp, %ebx
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: adcl $0, %ecx
; X32-NEXT: addl %edi, %ebx
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %ecx
+; X32-NEXT: adcl %ebp, %ecx
; X32-NEXT: setb %bl
; X32-NEXT: addl %eax, %ecx
; X32-NEXT: movzbl %bl, %esi
; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl 12(%ebp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 24(%eax), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %edx, %edx
@@ -276,55 +265,54 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edi, %ebx
; X32-NEXT: addl %eax, %ebx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: adcl %edx, %eax
; X32-NEXT: addl %ecx, %ebx
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %eax
-; X32-NEXT: movl %eax, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl %ecx, %eax
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NEXT: adcl %edx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %edx, %eax
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %esi, %eax
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: movl %edx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %ecx, %eax
+; X32-NEXT: adcl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, %eax
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %esi, %eax
+; X32-NEXT: adcl %edx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: adcl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %ebx, %eax
+; X32-NEXT: adcl %esi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 8(%ebp), %eax
-; X32-NEXT: movl 20(%eax), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X32-NEXT: movl 20(%edi), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: mull %ecx
@@ -333,31 +321,30 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: addl %ebx, %esi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: addl %edi, %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: addl %ebp, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %ebx, %ecx
; X32-NEXT: setb %bl
; X32-NEXT: addl %eax, %ecx
; X32-NEXT: movzbl %bl, %esi
; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl 8(%ebp), %eax
-; X32-NEXT: movl 24(%eax), %eax
+; X32-NEXT: movl 24(%edi), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %edx, %edx
; X32-NEXT: mull %edx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edi, %edx
+; X32-NEXT: movl %ebp, %edi
; X32-NEXT: addl %eax, %edi
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: adcl %edx, %ebx
; X32-NEXT: addl %ecx, %edi
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %ebx
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -367,8 +354,8 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl %ebx, %eax
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 8(%ebp), %eax
-; X32-NEXT: movl 4(%eax), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movl 4(%ecx), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: mull %ecx
@@ -377,8 +364,8 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: addl %ecx, %esi
; X32-NEXT: movl %edx, %edi
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: addl %ebx, %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: addl %ebp, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %ecx, %edi
; X32-NEXT: setb %cl
@@ -386,8 +373,9 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movzbl %cl, %eax
; X32-NEXT: adcl %edx, %eax
+; X32-NEXT: movl %eax, %ebx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 8(%ebp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 8(%eax), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
@@ -395,57 +383,54 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ebx, %esi
-; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl %ebp, %esi
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %ecx
; X32-NEXT: adcl %edx, %ecx
; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: adcl %ebx, %ecx
; X32-NEXT: movl %esi, %edx
+; X32-NEXT: movl %esi, %ebx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %edx
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: pushl %eax
-; X32-NEXT: seto %al
-; X32-NEXT: lahf
-; X32-NEXT: movl %eax, %edx
-; X32-NEXT: popl %eax
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl %eax, %edx
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl %ecx, %edx
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %esi, %edx
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: addl %esi, %eax
; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: adcl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: adcl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, %eax
-; X32-NEXT: addl %esi, %eax
+; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edi, %eax
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %edx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: adcl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: addl %edx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
@@ -458,82 +443,78 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: addl %edx, %eax
-; X32-NEXT: adcl %ebx, %esi
+; X32-NEXT: adcl %ebp, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: addl %edx, %ecx
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl %edx, %ebx
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %edi, %eax
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: adcl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %ebx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 12(%ebp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 40(%eax), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: mull %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %edi
-; X32-NEXT: addl %eax, %edi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl %edx, %ecx
-; X32-NEXT: addl %esi, %edi
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %edx
+; X32-NEXT: addl %eax, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: adcl %ecx, %ebp
+; X32-NEXT: addl %edi, %edx
+; X32-NEXT: adcl %ebx, %ebp
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: addl %esi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %edi, %eax
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %edx, %eax
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edx, %eax
+; X32-NEXT: adcl %ebp, %eax
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: addl %esi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: seto %al
-; X32-NEXT: lahf
-; X32-NEXT: movl %eax, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %edi, %eax
+; X32-NEXT: adcl %edx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %edx, %eax
+; X32-NEXT: adcl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 12(%ebp), %esi
+; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
; X32-NEXT: movl 48(%esi), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, %ebp
; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl 52(%esi), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -542,38 +523,37 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: addl %edi, %esi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: addl %ebx, %esi
+; X32-NEXT: addl %ebp, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb %bl
; X32-NEXT: addl %eax, %ecx
; X32-NEXT: movzbl %bl, %esi
; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl 12(%ebp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 56(%eax), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %edx, %edx
; X32-NEXT: mull %edx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebp, %ebx
; X32-NEXT: addl %eax, %ebx
-; X32-NEXT: movl %edi, %edx
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: adcl %edx, %edi
; X32-NEXT: addl %ecx, %ebx
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %edi
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NEXT: movl %edx, %eax
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %edx, %eax
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movl %edx, %eax
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
@@ -584,67 +564,64 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 8(%ebp), %eax
-; X32-NEXT: movl 64(%eax), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X32-NEXT: movl 64(%edi), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %ecx
-; X32-NEXT: movl %eax, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %ecx
+; X32-NEXT: movl %eax, %ebx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: addl %eax, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: adcl %esi, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: addl %edx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %edx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 8(%ebp), %eax
-; X32-NEXT: movl 80(%eax), %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: adcl %edx, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl 80(%edi), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %edx, %edx
; X32-NEXT: mull %edx
+; X32-NEXT: movl %ebp, %edi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: addl %esi, %eax
+; X32-NEXT: addl %eax, %edi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edx, %ecx
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: addl %esi, %edi
; X32-NEXT: adcl %edx, %ebx
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 12(%ebp), %ecx
+; X32-NEXT: addl %eax, %esi
+; X32-NEXT: adcl %edx, %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl 80(%ecx), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: xorl %ebx, %ebx
-; X32-NEXT: mull %ebx
+; X32-NEXT: xorl %edi, %edi
+; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: addl %esi, %eax
; X32-NEXT: movl %edx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: adcl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: adcl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl 64(%ecx), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: mull %ebx
+; X32-NEXT: mull %edi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: addl %esi, %ecx
; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: adcl %ebx, %ecx
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
@@ -654,15 +631,16 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: adcl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: addl %esi, %eax
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, %eax
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: addl %esi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: adcl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
@@ -677,122 +655,116 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: adcl %edx, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: pushl %eax
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: addb $127, %al
-; X32-NEXT: sahf
-; X32-NEXT: popl %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: adcl %edx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
+; X32-NEXT: addb $255, %al
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: adcl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: adcl %edx, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: addl %esi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: adcl %edx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: addl %esi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: adcl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: adcl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl %esi, %eax
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: adcl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: addl %esi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %edx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: addl %esi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: adcl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %esi, %eax
+; X32-NEXT: adcl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 8(%ebp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 68(%eax), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: addl %edi, %esi
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: addl %ebp, %edi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: adcl $0, %ecx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: addl %ebx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: addl %ebx, %edi
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %ebp, %ecx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: addl %eax, %ecx
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload
-; X32-NEXT: adcl %edx, %edi
-; X32-NEXT: movl 8(%ebp), %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
+; X32-NEXT: adcl %edx, %esi
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 72(%eax), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %edx, %edx
; X32-NEXT: mull %edx
-; X32-NEXT: movl %eax, %esi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: addl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: adcl %edx, %ebx
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: adcl %edi, %ebx
+; X32-NEXT: movl %ebx, %edx
+; X32-NEXT: addl %eax, %ebx
+; X32-NEXT: adcl %edi, %ebp
+; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: adcl %esi, %ebp
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: addl %edx, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: addl %edx, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %eax, %ecx
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl %ecx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl %eax, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl %ebx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: addl %edx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: adcl %ebx, %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: adcl %ebp, %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: addl %edx, %ecx
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 8(%ebp), %ecx
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl 84(%ecx), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
@@ -806,48 +778,46 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: addl %edi, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %ebx, %ecx
-; X32-NEXT: setb %bl
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: addl %eax, %ecx
-; X32-NEXT: movzbl %bl, %esi
-; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl 8(%ebp), %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 1-byte Folded Reload
+; X32-NEXT: adcl %edx, %ebp
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 88(%eax), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %edx, %edx
; X32-NEXT: mull %edx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edi, %ebx
-; X32-NEXT: addl %eax, %edi
+; X32-NEXT: movl %edi, %esi
+; X32-NEXT: addl %eax, %esi
+; X32-NEXT: adcl %edx, %ebx
+; X32-NEXT: addl %ecx, %esi
+; X32-NEXT: adcl %ebp, %ebx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %edx, %eax
-; X32-NEXT: addl %ecx, %edi
-; X32-NEXT: adcl %esi, %eax
-; X32-NEXT: movl %eax, %esi
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: adcl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %eax, %edx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %esi, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: adcl %edi, %edx
+; X32-NEXT: adcl %ebx, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edi, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: adcl %esi, %edi
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: addl %ebx, %edi
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: addl %edi, %esi
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 12(%ebp), %eax
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 84(%eax), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
@@ -857,100 +827,96 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: addl %esi, %edi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: addl %ebp, %edi
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %ecx
; X32-NEXT: setb %bl
; X32-NEXT: addl %eax, %ecx
; X32-NEXT: movzbl %bl, %edi
; X32-NEXT: adcl %edx, %edi
-; X32-NEXT: movl 12(%ebp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 88(%eax), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %edx, %edx
; X32-NEXT: mull %edx
-; X32-NEXT: movl %eax, %esi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %esi, %ebx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %ebp, %ebx
+; X32-NEXT: addl %eax, %ebx
; X32-NEXT: adcl %edx, %esi
; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edi, %esi
-; X32-NEXT: movl %esi, %edx
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebp, %ecx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ebx, %eax
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 12(%ebp), %eax
-; X32-NEXT: movl 68(%eax), %eax
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movl 68(%ecx), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: mull %ecx
-; X32-NEXT: xorl %ebx, %ebx
; X32-NEXT: movl %eax, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: addl %edi, %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: addl %ebx, %esi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: addl %ebp, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: adcl %ebx, %ecx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: addl %eax, %ecx
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl 12(%ebp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 72(%eax), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: mull %ebx
+; X32-NEXT: xorl %edx, %edx
+; X32-NEXT: mull %edx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: movl %edx, %edi
-; X32-NEXT: addl %eax, %edi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %ebx, %eax
-; X32-NEXT: addl %ecx, %edi
-; X32-NEXT: adcl %esi, %eax
-; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebp, %edx
+; X32-NEXT: addl %eax, %ebp
+; X32-NEXT: adcl %edi, %ebx
+; X32-NEXT: addl %ecx, %ebp
+; X32-NEXT: adcl %esi, %ebx
; X32-NEXT: movl %edx, %eax
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edi, %esi
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ecx, %esi
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebp, %ecx
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebx, %ecx
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: addl %edx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl %edx, %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: adcl %esi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: adcl %ebx, %edi
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: adcl %edi, %ecx
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %edi, %ebp
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: adcl %ebp, %ebx
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: addl %edx, %eax
@@ -961,9 +927,9 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: addl %edx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl 12(%ebp), %eax
+; X32-NEXT: adcl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 12(%eax), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
@@ -984,136 +950,131 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl %ecx, %edx
; X32-NEXT: addl %ecx, %ebx
; X32-NEXT: adcl %esi, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl (%esp), %edi # 4-byte Reload
+; X32-NEXT: addl %ebp, %edi
+; X32-NEXT: movl %edi, (%esp) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: addl %edi, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: addl %ecx, %edx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: addl %edi, %esi
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: adcl %esi, %edx
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: adcl %ecx, %edx
+; X32-NEXT: movl %ebx, %ecx
+; X32-NEXT: adcl $0, %ecx
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: addl %ebp, %esi
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ebx, %edx
-; X32-NEXT: adcl $0, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl (%esp), %esi # 4-byte Reload
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: adcl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: adcl $0, %eax
-; X32-NEXT: addl %edx, %ecx
-; X32-NEXT: adcl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NEXT: adcl $0, %edx
+; X32-NEXT: addl %ecx, %esi
+; X32-NEXT: adcl %edi, %edx
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: setb %dl
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl %esi, (%esp) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movzbl %dl, %edx
; X32-NEXT: adcl %ebx, %edx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: adcl $0, %eax
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: adcl %edi, %edx
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: addl %ecx, %ebx
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl (%esp), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: adcl %ebx, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: adcl %eax, %edi
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 8(%ebp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 44(%eax), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: movl %eax, %ecx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: addl %esi, %ecx
+; X32-NEXT: addl %esi, %ebx
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, %ecx
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: addl %edi, %ebx
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %edx
-; X32-NEXT: movl %esi, %ecx
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: addl %ebx, %edx
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload
-; X32-NEXT: adcl %edi, %ebx
-; X32-NEXT: movl %eax, %esi
+; X32-NEXT: setb %bl
; X32-NEXT: addl %eax, %edx
-; X32-NEXT: adcl %ecx, %ebx
+; X32-NEXT: movzbl %bl, %eax
+; X32-NEXT: adcl %ecx, %eax
+; X32-NEXT: addl %edi, %edx
+; X32-NEXT: adcl %esi, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: addl %eax, %edi
+; X32-NEXT: movl %edi, (%esp) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl %ecx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %ebx, %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, %esi
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ebx, %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: movl %ecx, %edi
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl %ebp, (%esp) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, %eax
-; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: addl %esi, %eax
+; X32-NEXT: adcl %edi, %ebx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
-; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 1-byte Folded Reload
+; X32-NEXT: adcl %edx, %ebp
+; X32-NEXT: adcl $0, %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl %ecx, %edx
+; X32-NEXT: addl %eax, %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %ebx, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: adcl %ebp, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %edi
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: pushl %eax
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: addb $127, %al
-; X32-NEXT: sahf
-; X32-NEXT: popl %eax
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: addb $255, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl (%esp), %eax # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, %eax
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -1123,76 +1084,74 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl %edi, %eax
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 8(%ebp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 12(%eax), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: addl %ebx, %ecx
-; X32-NEXT: adcl $0, %edx
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %eax, %ebx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: addl %edi, %ecx
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %edx
-; X32-NEXT: setb %cl
+; X32-NEXT: addl %edi, %ebx
+; X32-NEXT: adcl $0, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: addl %esi, %ebx
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %edi, %edx
+; X32-NEXT: setb %bl
; X32-NEXT: addl %eax, %edx
-; X32-NEXT: movzbl %cl, %eax
-; X32-NEXT: adcl %esi, %eax
-; X32-NEXT: movl %edi, %esi
-; X32-NEXT: addl %edi, %edx
-; X32-NEXT: adcl %ebx, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: addl %edi, %ecx
+; X32-NEXT: movzbl %bl, %ebp
+; X32-NEXT: adcl %ecx, %ebp
+; X32-NEXT: movl %esi, %ecx
+; X32-NEXT: addl %esi, %edx
+; X32-NEXT: adcl %edi, %ebp
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: addl %ecx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl %eax, %esi
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %ecx, %edi
+; X32-NEXT: addl %eax, %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: adcl %esi, %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: adcl $0, %ecx
+; X32-NEXT: movl %ebp, %esi
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: addl %ebx, %edi
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: adcl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, %ecx
; X32-NEXT: adcl $0, %eax
-; X32-NEXT: addl %esi, %ecx
-; X32-NEXT: adcl %ebx, %eax
-; X32-NEXT: setb %bl
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movzbl %bl, %esi
-; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: adcl %esi, %edi
+; X32-NEXT: setb %cl
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movzbl %cl, %ecx
+; X32-NEXT: adcl %edx, %ecx
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl %edi, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: addl %eax, %ebx
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: adcl %ecx, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: adcl %ebp, %edi
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, %eax
-; X32-NEXT: addb $127, %al
-; X32-NEXT: sahf
+; X32-NEXT: addb $255, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -1203,59 +1162,59 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: adcl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl 12(%ebp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 44(%eax), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: addl %esi, %ecx
+; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: addl %edi, %ebx
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: addl %ecx, %ebx
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %esi
-; X32-NEXT: setb %bl
-; X32-NEXT: addl %eax, %esi
-; X32-NEXT: movzbl %bl, %eax
+; X32-NEXT: addl %edi, %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %esi, %ebp
+; X32-NEXT: setb %cl
+; X32-NEXT: addl %eax, %ebp
+; X32-NEXT: movzbl %cl, %eax
; X32-NEXT: adcl %edx, %eax
-; X32-NEXT: movl %ecx, %edx
-; X32-NEXT: addl %ecx, %esi
-; X32-NEXT: adcl %edi, %eax
+; X32-NEXT: addl %edi, %ebp
+; X32-NEXT: adcl %esi, %eax
+; X32-NEXT: movl %eax, %ebx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: addl %esi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %edx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl %eax, %edi
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %esi, %edx
+; X32-NEXT: movl %ebp, %edx
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebx, %edi
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, %eax
; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: adcl %ebx, %ecx
+; X32-NEXT: addl %edx, %eax
+; X32-NEXT: adcl %edi, %ecx
; X32-NEXT: setb %dl
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movzbl %dl, %edx
-; X32-NEXT: adcl %esi, %edx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl %edi, %ebx
+; X32-NEXT: movzbl %dl, %eax
+; X32-NEXT: adcl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: adcl $0, %ebp
+; X32-NEXT: movl %esi, %ebx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
@@ -1263,67 +1222,60 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %ebx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: adcl %eax, %edi
+; X32-NEXT: adcl %ebp, %esi
+; X32-NEXT: addb $255, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, %eax
-; X32-NEXT: addb $127, %al
-; X32-NEXT: sahf
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: adcl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, %eax
-; X32-NEXT: addb $127, %al
-; X32-NEXT: sahf
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addb $255, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: adcl (%esp), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 8(%ebp), %eax
-; X32-NEXT: movl 60(%eax), %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl 60(%eax), %esi
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: addl %ebp, %ebx
; X32-NEXT: adcl $0, %edi
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: adcl %edi, %ebp
; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movzbl %bl, %edi
+; X32-NEXT: adcl %edi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
@@ -1332,64 +1284,64 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edx, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %edi, %ebp
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %ecx, %ebx
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: adcl %ebx, %edi
; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %edi, %ebp
+; X32-NEXT: movzbl %bl, %eax
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %edi
-; X32-NEXT: setb %cl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ebx
; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movzbl %cl, %ecx
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: adcl %esi, %ebx
+; X32-NEXT: setb (%esp) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movzbl (%esp), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
@@ -1397,38 +1349,38 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: addl %eax, %ecx
; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, %ecx
; X32-NEXT: adcl $0, %esi
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ebp, %edi
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %edx, (%esp) # 4-byte Spill
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: mull %ebp
; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl (%esp), %ebx # 4-byte Folded Reload
+; X32-NEXT: setb (%esp) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: mull %ebp
; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload
+; X32-NEXT: movzbl (%esp), %edi # 1-byte Folded Reload
; X32-NEXT: adcl %edi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
@@ -1451,193 +1403,192 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: addl %ecx, %esi
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %esi, %ebp
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %edi
-; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: mull %ecx
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movzbl %bl, %ecx
+; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: addl %eax, %ecx
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill
; X32-NEXT: adcl %edx, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %edi, %ebx
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %ecx
+; X32-NEXT: adcl %edi, %esi
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %esi, %ebp
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %edi
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %edi, %ecx
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %esi
-; X32-NEXT: setb %cl
+; X32-NEXT: adcl %esi, %edi
+; X32-NEXT: setb %bl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %esi, %eax
-; X32-NEXT: movzbl %cl, %ecx
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movzbl %bl, %ecx
; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %edi
+; X32-NEXT: addl %eax, %ebx
; X32-NEXT: adcl %edx, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, %edi
+; X32-NEXT: adcl $0, %ebx
; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: addl (%esp), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: setb (%esp) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
; X32-NEXT: adcl %esi, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %ebx
-; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: addl %eax, %ebp
+; X32-NEXT: adcl %edx, %edi
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: addl %edi, %edx
+; X32-NEXT: addl %ebx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: adcl %ecx, %esi
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
-; X32-NEXT: adcl %eax, %ebx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl $0, %eax
+; X32-NEXT: movzbl (%esp), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %ebp
+; X32-NEXT: adcl $0, %edi
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %esi
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %ecx
-; X32-NEXT: setb %bl
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl %esi, %ecx
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
+; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: setb %cl
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %ebx
+; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
@@ -1647,35 +1598,37 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edx, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ecx, %edi
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: mull %edi
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %ecx
-; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: adcl (%esp), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
@@ -1685,13 +1638,13 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %ebx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: addl %ebx, %ecx
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl 12(%ebp), %eax
+; X32-NEXT: adcl $0, %ebp
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 28(%eax), %ebx
; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %ebx
@@ -1699,118 +1652,117 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, %ebx
; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ebx
+; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X32-NEXT: adcl %ebp, %ebx
; X32-NEXT: setb %cl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %ebx
+; X32-NEXT: addl %eax, %ebp
; X32-NEXT: adcl %edx, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl (%esp), %eax # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, %edi
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: setb %bl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
+; X32-NEXT: movzbl %bl, %esi
; X32-NEXT: adcl %esi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: addl %eax, %edi
-; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %edx, %ebx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: addl %ebx, %edx
+; X32-NEXT: addl %ebp, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: adcl %ecx, %esi
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl $0, %eax
+; X32-NEXT: adcl $0, %ebx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %eax, (%esp) # 4-byte Folded Spill
; X32-NEXT: adcl $0, %edx
; X32-NEXT: adcl $0, %esi
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: adcl $0, %eax
+; X32-NEXT: adcl $0, %ebx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ecx, %edi
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: addl %ecx, %esi
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: addl %esi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %ecx
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %ebx
+; X32-NEXT: adcl %ebx, %ecx
+; X32-NEXT: setb %bl
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %edi
; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X32-NEXT: movzbl %bl, %ecx
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
@@ -1820,81 +1772,81 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edx, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %ecx, %ebx
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: mull %ebx
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: mull %ebp
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %ecx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %ebx
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: mull %ebp
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %ebx, %ecx
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: adcl %esi, %ebp
; X32-NEXT: setb %cl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %ebx
-; X32-NEXT: addl %esi, %eax
+; X32-NEXT: mull %edi
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %ebx
+; X32-NEXT: addl %eax, %ebp
; X32-NEXT: adcl %edx, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -1905,42 +1857,40 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %ebx
; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %edi
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
; X32-NEXT: adcl %esi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: addl %eax, %edi
-; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %edx, %ebx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: addl %ebx, %edx
+; X32-NEXT: addl %ebp, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: adcl %ecx, %esi
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl $0, %eax
+; X32-NEXT: adcl $0, %ebx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
@@ -1950,8 +1900,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
-; X32-NEXT: adcl %eax, %ebx
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %eax, %ebp
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl $0, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
@@ -1965,41 +1914,42 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: mull %edi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl 8(%ebp), %eax
-; X32-NEXT: movl 28(%eax), %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl 28(%eax), %ecx
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, %ebx
; X32-NEXT: addl %esi, %ebx
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: adcl %edi, %ebp
; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movzbl %bl, %edi
+; X32-NEXT: adcl %edi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
@@ -2010,62 +1960,63 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %edi, %ebp
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
-; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: adcl %ebx, %edi
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl %esi, %ebx
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %edi, %ebp
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %edi
-; X32-NEXT: setb %cl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ebx
; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movzbl %cl, %ecx
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: adcl %esi, %ebx
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
@@ -2073,36 +2024,36 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: addl %eax, %ecx
; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, %ecx
; X32-NEXT: adcl $0, %esi
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ebp, %edi
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: mull %ebp
; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: mull %ebp
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload
; X32-NEXT: adcl %edi, %edx
@@ -2127,31 +2078,31 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: addl %ecx, %esi
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %edi
-; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %esi, %ebx
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %edi
+; X32-NEXT: adcl %edi, %esi
; X32-NEXT: setb %bl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
+; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movzbl %bl, %esi
+; X32-NEXT: adcl %esi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
@@ -2160,160 +2111,158 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edx, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %ecx, %ebx
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %ecx
+; X32-NEXT: adcl %edi, %esi
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %esi, %ebx
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %ecx, %ebp
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %edi
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %edi, %ecx
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %esi
-; X32-NEXT: setb %cl
+; X32-NEXT: adcl %esi, %edi
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %esi, %eax
-; X32-NEXT: movzbl %cl, %ecx
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %edi
-; X32-NEXT: adcl %edx, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: addl %eax, %ebp
+; X32-NEXT: adcl %edx, %edi
+; X32-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: setb %cl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
+; X32-NEXT: movzbl %cl, %esi
; X32-NEXT: adcl %esi, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %ebx
-; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: addl %eax, %esi
+; X32-NEXT: adcl %edx, %ebx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: addl %ebp, %ecx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: addl %edi, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl %ecx, %esi
+; X32-NEXT: adcl %edi, %edx
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
-; X32-NEXT: adcl %eax, %ebx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl $0, %eax
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: adcl %eax, %esi
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %esi
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %ecx
-; X32-NEXT: setb %bl
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl %esi, %ecx
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: mull %ebp
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: setb %cl
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: mull %ebp
+; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
@@ -2323,62 +2272,64 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edx, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ecx, %edi
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: mull %edi
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %ecx
-; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl %ebp, %esi
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %ebx, %ecx
+; X32-NEXT: addl %ebp, %ecx
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %ebx
+; X32-NEXT: adcl %esi, %ebp
; X32-NEXT: setb %cl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: mull %ebx
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
@@ -2387,8 +2338,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: addl %eax, %ebx
; X32-NEXT: adcl %edx, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, %ebx
@@ -2396,24 +2346,25 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, %edi
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
@@ -2422,43 +2373,41 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: adcl %esi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: addl %eax, %edi
-; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %edx, %ebp
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: addl %ebx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: adcl %ecx, %esi
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl $0, %eax
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, %edx
; X32-NEXT: adcl $0, %esi
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: adcl $0, %eax
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: movl %ebx, %eax
@@ -2466,21 +2415,22 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ecx, %edi
-; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: addl %ecx, %esi
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: addl %esi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %ecx
+; X32-NEXT: adcl %ebp, %ecx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %ebx
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
@@ -2499,167 +2449,162 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %ecx, %ebp
; X32-NEXT: adcl $0, %esi
; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %ecx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl %edi, %esi
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %ebx
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %ecx, %ebp
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %ebx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl %ebx, %ecx
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %ebx
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: adcl %esi, %ebx
; X32-NEXT: setb %cl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %ebx
-; X32-NEXT: addl %esi, %eax
+; X32-NEXT: mull %edi
+; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: addl %eax, %ebx
-; X32-NEXT: adcl %edx, %edi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl %edx, %ecx
+; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: adcl $0, %edi
+; X32-NEXT: adcl $0, %ecx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
; X32-NEXT: adcl %esi, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edx, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: addl %eax, %edi
+; X32-NEXT: adcl %edx, %ebp
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: addl %ebx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: adcl %ecx, %esi
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: adcl %eax, %edi
-; X32-NEXT: adcl $0, %ecx
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %ebx
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, %edi
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
-; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl %eax, %ebx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl $0, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, %edi
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl (%esp), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
@@ -2668,29 +2613,30 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: addl %ebp, %ebx
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X32-NEXT: adcl %edi, %ebp
; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movzbl %bl, %ecx
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
@@ -2701,81 +2647,82 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edx, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
-; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: adcl %ebx, %ecx
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl %edi, %ebp
; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: adcl (%esp), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %edx, (%esp) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %ecx, %ebp
+; X32-NEXT: adcl $0, %esi
; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %ebx, %ecx
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ebx
-; X32-NEXT: setb %cl
+; X32-NEXT: adcl %esi, %edi
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movzbl %cl, %ecx
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %ebx
+; X32-NEXT: addl %eax, %ebp
; X32-NEXT: adcl %edx, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl (%esp), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: setb (%esp) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -2786,67 +2733,66 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %ebx
; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %edi
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
; X32-NEXT: adcl %esi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: addl %eax, %edi
-; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %edx, %ebx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: addl %ebx, %edx
+; X32-NEXT: addl %ebp, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: adcl %ecx, %esi
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: movzbl (%esp), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl $0, %eax
+; X32-NEXT: adcl $0, %ebx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, (%esp) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %esi, %ebx
+; X32-NEXT: addl (%esp), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: adcl %edi, %ebp
; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %ecx
-; X32-NEXT: addl %esi, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movzbl %bl, %edi
+; X32-NEXT: adcl %edi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
@@ -2854,65 +2800,65 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: addl %eax, %ecx
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl %esi, (%esp) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %edi, %ebp
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
-; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: adcl %ebx, %edi
+; X32-NEXT: setb %cl
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %edi, %ebp
+; X32-NEXT: movzbl %cl, %eax
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %ebx, %ecx
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ebx
-; X32-NEXT: setb %cl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: adcl %esi, %ebx
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: mull %ecx
; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movzbl %cl, %ecx
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
@@ -2920,38 +2866,38 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: addl %eax, %esi
; X32-NEXT: adcl %edx, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, %esi
; X32-NEXT: adcl $0, %ecx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: adcl (%esp), %ecx # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %edi
; X32-NEXT: movl %eax, %ebx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: setb %bl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %edi
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movzbl %bl, %edi
; X32-NEXT: adcl %edi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
@@ -2959,7 +2905,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: addl %eax, %edi
; X32-NEXT: adcl %edx, %ebx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NEXT: movl (%esp), %edx # 4-byte Reload
; X32-NEXT: addl %esi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: adcl %ecx, %esi
@@ -2971,7 +2917,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, (%esp) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
@@ -2982,30 +2928,31 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl %edi, %ecx
; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %ecx
-; X32-NEXT: setb %bl
; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %edi
; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %ebx, %edi
+; X32-NEXT: setb %cl
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %esi
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
@@ -3015,106 +2962,107 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edx, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %esi, %ebx
+; X32-NEXT: addl %ecx, %ebx
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: mull %ebp
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edi, %ecx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl %esi, %edi
+; X32-NEXT: mull %ebp
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %esi, %ecx
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl 12(%ebp), %eax
-; X32-NEXT: movl 60(%eax), %esi
; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl %ebp, %ecx
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl 60(%eax), %ebp
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ebp
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %edi
+; X32-NEXT: adcl %edi, %esi
; X32-NEXT: setb %cl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: mull %ebp
+; X32-NEXT: addl %esi, %eax
; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: adcl %ecx, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %ecx
-; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: addl %eax, %ebp
+; X32-NEXT: adcl %edx, %ecx
+; X32-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: mull %esi
; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload
-; X32-NEXT: adcl %edi, %edx
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
+; X32-NEXT: adcl %esi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
@@ -3122,17 +3070,17 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: addl %eax, %edi
; X32-NEXT: adcl %edx, %ebx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: addl %ecx, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl %esi, %ecx
+; X32-NEXT: addl %ebp, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: adcl %ecx, %esi
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edi
; X32-NEXT: adcl $0, %ebx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl (%esp), %eax # 4-byte Reload
; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
@@ -3141,13 +3089,13 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: adcl $0, %ecx
+; X32-NEXT: adcl $0, %esi
; X32-NEXT: adcl $0, %edi
; X32-NEXT: adcl $0, %ebx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
@@ -3155,28 +3103,29 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %esi, %ecx
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: addl %ecx, %esi
; X32-NEXT: adcl $0, %edi
; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: mull %ebx
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: addl %esi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %esi
-; X32-NEXT: setb %cl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %ebx
-; X32-NEXT: addl %esi, %eax
-; X32-NEXT: movzbl %cl, %ecx
+; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
@@ -3192,7 +3141,8 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, %edi
@@ -3206,107 +3156,104 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %ecx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %ebx
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl (%esp), %edi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, (%esp) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %ebx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: addl %ebx, %ecx
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: mull %ebx
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: adcl %esi, %ebp
; X32-NEXT: setb %cl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %ebx
-; X32-NEXT: addl %esi, %eax
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: addl %eax, %ebx
-; X32-NEXT: adcl %edx, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %edx, %ebp
+; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl (%esp), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: adcl $0, %ecx
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, (%esp) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %edx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: addl (%esp), %edi # 4-byte Folded Reload
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
; X32-NEXT: adcl %esi, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: setb (%esp) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movzbl (%esp), %ecx # 1-byte Folded Reload
+; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %edi
-; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: addl %eax, %esi
+; X32-NEXT: adcl %edx, %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: addl %ebx, %ecx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: addl %ebx, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl %ecx, %esi
+; X32-NEXT: adcl %ebp, %edx
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
-; X32-NEXT: adcl %eax, %edi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl $0, %eax
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: adcl %eax, %esi
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl %ecx, %ebx
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %esi, (%esp) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
@@ -3318,97 +3265,91 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %ebx
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl $0, %eax
+; X32-NEXT: adcl $0, %ebp
+; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl (%esp), %eax # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: addl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: adcl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: adcl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: adcl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: adcl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: adcl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: adcl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: adcl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, %ecx
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl $0, %eax
+; X32-NEXT: adcl $0, %ebp
+; X32-NEXT: movl (%esp), %ebx # 4-byte Reload
; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, %eax
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebx, (%esp) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: addl %ebp, %ebx
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: adcl %edi, %ebp
; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %ecx
-; X32-NEXT: addl %esi, %eax
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movzbl %bl, %ecx
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
@@ -3419,64 +3360,64 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edx, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: adcl %ebx, %ecx
; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %ecx, %ebp
+; X32-NEXT: movzbl %bl, %eax
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %esi, %ecx
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ebx
-; X32-NEXT: setb %cl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %edi
; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movzbl %cl, %ecx
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %esi, %edi
+; X32-NEXT: setb %bl
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movzbl %bl, %ecx
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
@@ -3484,8 +3425,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: addl %eax, %ebx
; X32-NEXT: adcl %edx, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, %ebx
@@ -3493,7 +3433,8 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -3504,133 +3445,133 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %edi
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
; X32-NEXT: adcl %esi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: addl %eax, %edi
-; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %edx, %ebp
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: addl %ebx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: adcl %ecx, %esi
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl $0, %eax
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: addl %ebp, %ebx
; X32-NEXT: adcl $0, %edi
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: adcl %edi, %ebp
; X32-NEXT: setb %bl
; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movzbl %bl, %ecx
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: addl %eax, %ecx
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edx, %edi
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %edx, %esi
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
-; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: adcl %ebx, %ecx
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl %edi, %ebp
; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %ecx, %ebp
+; X32-NEXT: adcl $0, %esi
; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %ebx, %ecx
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ebx
-; X32-NEXT: setb %cl
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: adcl %esi, %edi
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movzbl %cl, %ecx
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
@@ -3638,16 +3579,16 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: addl %eax, %ecx
; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, %ecx
; X32-NEXT: adcl $0, %esi
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -3658,18 +3599,18 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: setb %bl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %edi
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movzbl %bl, %edi
; X32-NEXT: adcl %edi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
@@ -3700,29 +3641,30 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %esi, %ecx
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: mull %ebx
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: addl %edi, %ecx
+; X32-NEXT: adcl $0, %ebp
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %edi
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: adcl %ebp, %edi
; X32-NEXT: setb %cl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %ebx
-; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: mull %esi
+; X32-NEXT: addl %edi, %eax
; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
@@ -3735,61 +3677,61 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ecx, %edi
-; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %ecx, %ebp
+; X32-NEXT: adcl $0, %edi
; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %ecx
+; X32-NEXT: adcl %edi, %ecx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %ebx
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %ecx, %ebp
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %ebx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: addl %ebx, %ecx
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: mull %ebx
-; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %edi
+; X32-NEXT: adcl %edi, %esi
; X32-NEXT: setb %cl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %ebx
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: addl %esi, %eax
; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
@@ -3798,8 +3740,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: addl %eax, %ebx
; X32-NEXT: adcl %edx, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, %ebx
@@ -3807,24 +3748,25 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, %edi
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
@@ -3833,43 +3775,41 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: adcl %esi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: addl %eax, %edi
-; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %edx, %ebp
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: addl %ebx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: adcl %ecx, %esi
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl $0, %eax
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, %edx
; X32-NEXT: adcl $0, %esi
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: adcl $0, %eax
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: movl %ebx, %eax
@@ -3877,21 +3817,22 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, %esi
; X32-NEXT: addl %ecx, %esi
-; X32-NEXT: adcl $0, %edi
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: addl %esi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: adcl %ebp, %ecx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %ebx
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
@@ -3910,56 +3851,57 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %ecx, %ebp
; X32-NEXT: adcl $0, %esi
; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %ecx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %ebx
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %ecx, %ebp
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %ebx
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %esi
+; X32-NEXT: adcl %esi, %ebx
; X32-NEXT: setb %cl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: mull %ebx
-; X32-NEXT: addl %esi, %eax
+; X32-NEXT: mull %edi
+; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
@@ -3968,7 +3910,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: addl %eax, %esi
; X32-NEXT: adcl %edx, %ecx
-; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, %esi
@@ -3978,23 +3920,24 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: addl %ecx, %edi
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movl %eax, %edi
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %edi, %ebp
; X32-NEXT: adcl %esi, %ecx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
@@ -4010,15 +3953,15 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: adcl %edx, %ecx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %esi
; X32-NEXT: adcl $0, %ecx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %esi, %ebp
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
@@ -4032,23 +3975,21 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %ebx
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl $0, %eax
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: adcl $0, %ebx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
@@ -4062,47 +4003,49 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl (%esp), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, %edx
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, %edi
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, %ecx
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, %edx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, %ebp
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, %ebx
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, %ebx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: adcl %edi, %ebp
; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %ecx
-; X32-NEXT: addl %esi, %eax
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movzbl %bl, %ecx
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
@@ -4113,28 +4056,28 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edx, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %ecx, %ebx
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %esi, %ebp
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: adcl %ebx, %ecx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %esi
; X32-NEXT: movl %eax, %ebx
; X32-NEXT: addl %ecx, %ebx
@@ -4147,70 +4090,72 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %ecx, %ebp
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl 8(%ebp), %eax
-; X32-NEXT: movl 76(%eax), %edx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edx
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl 76(%eax), %ecx
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %ecx, %edi
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %edi
-; X32-NEXT: setb %cl
+; X32-NEXT: adcl %esi, %ecx
+; X32-NEXT: setb (%esp) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movzbl %cl, %ecx
+; X32-NEXT: mull %edi
+; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: movzbl (%esp), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %ecx
-; X32-NEXT: adcl %edx, %esi
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: addl %eax, %ebp
+; X32-NEXT: adcl %edx, %ecx
; X32-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %edx, (%esp) # 4-byte Spill
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl (%esp), %ebx # 4-byte Folded Reload
+; X32-NEXT: setb (%esp) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: mull %esi
; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload
-; X32-NEXT: adcl %edi, %edx
+; X32-NEXT: movzbl (%esp), %esi # 1-byte Folded Reload
+; X32-NEXT: adcl %esi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
@@ -4218,245 +4163,245 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: addl %eax, %edi
; X32-NEXT: adcl %edx, %ebx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: addl %ecx, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl %esi, %ecx
+; X32-NEXT: addl %ebp, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: adcl %ecx, %esi
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edi
; X32-NEXT: adcl $0, %ebx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, %ebx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: adcl %edi, %ebp
; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %ecx
-; X32-NEXT: addl %esi, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movzbl %bl, %edi
+; X32-NEXT: adcl %edi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: addl %eax, %ecx
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill
; X32-NEXT: adcl %edx, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %edi, %ebp
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: adcl %ebx, %edi
; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %edi, %ebp
+; X32-NEXT: movzbl %bl, %eax
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %edi
-; X32-NEXT: setb %cl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movzbl %cl, %ecx
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: adcl %esi, %ebx
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %edi
+; X32-NEXT: addl %eax, %esi
; X32-NEXT: adcl %edx, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, %edi
+; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl $0, %esi
; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: addl (%esp), %esi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: mull %edi
; X32-NEXT: movl %eax, %ebx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: setb %bl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
-; X32-NEXT: adcl %esi, %edx
+; X32-NEXT: mull %edi
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movzbl %bl, %edi
+; X32-NEXT: adcl %edi, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %ebx
-; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: addl %edi, %edx
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: addl %eax, %edi
+; X32-NEXT: adcl %edx, %ebx
+; X32-NEXT: movl (%esp), %edx # 4-byte Reload
+; X32-NEXT: addl %esi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: adcl %ecx, %esi
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
-; X32-NEXT: adcl %eax, %ebx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl $0, %eax
+; X32-NEXT: adcl %eax, %edi
+; X32-NEXT: adcl $0, %ebx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, (%esp) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %ecx
-; X32-NEXT: setb %bl
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl %edi, %ecx
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: mull %ebp
+; X32-NEXT: movl %edx, %edi
; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %ebx, %edi
+; X32-NEXT: setb %cl
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl %ebp, %esi
+; X32-NEXT: mull %ebp
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: addl %eax, %ecx
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %edx, %edi
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, %ebx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %esi
+; X32-NEXT: adcl $0, %edi
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: mull %edi
+; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %ecx
+; X32-NEXT: adcl %edi, %ecx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
@@ -4467,67 +4412,66 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl 8(%ebp), %eax
-; X32-NEXT: movl 92(%eax), %ebx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl 92(%eax), %ebp
; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull %ebx
-; X32-NEXT: movl %ebx, %esi
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: mull %ebp
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ebx
+; X32-NEXT: adcl %edi, %esi
; X32-NEXT: setb %cl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: mull %ebp
+; X32-NEXT: addl %esi, %eax
; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: adcl %ecx, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %ecx
-; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: addl %eax, %ebp
+; X32-NEXT: adcl %edx, %ecx
+; X32-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: mull %esi
; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload
-; X32-NEXT: adcl %edi, %edx
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
+; X32-NEXT: adcl %esi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
@@ -4535,17 +4479,17 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: addl %eax, %edi
; X32-NEXT: adcl %edx, %ebx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: addl %ecx, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl %esi, %ecx
+; X32-NEXT: addl %ebp, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: adcl %ecx, %esi
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edi
; X32-NEXT: adcl $0, %ebx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl (%esp), %eax # 4-byte Reload
; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
@@ -4554,42 +4498,43 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: adcl $0, %ecx
+; X32-NEXT: adcl $0, %esi
; X32-NEXT: adcl $0, %edi
; X32-NEXT: adcl $0, %ebx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: addl %ecx, %esi
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: mull %ebp
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: addl %esi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %edi, %ecx
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: setb (%esp) # 1-byte Folded Spill
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: mull %ebp
; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %esi
-; X32-NEXT: setb %cl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %esi, %eax
-; X32-NEXT: movzbl %cl, %ecx
+; X32-NEXT: movzbl (%esp), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
@@ -4599,63 +4544,63 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edx, %ecx
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ecx, %edi
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %ecx
-; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %edi, %ecx
-; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl %ebx, %ecx
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %esi
+; X32-NEXT: adcl %esi, %ebp
; X32-NEXT: setb %cl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edi, %ebx
-; X32-NEXT: addl %esi, %eax
+; X32-NEXT: mull %ebx
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
@@ -4664,8 +4609,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: addl %eax, %esi
; X32-NEXT: adcl %edx, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, %esi
@@ -4681,7 +4625,8 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, %edi
@@ -4694,7 +4639,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl %eax, %edi
; X32-NEXT: adcl %esi, %ecx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %ebx
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
@@ -4707,56 +4652,59 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: adcl %edx, %ecx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, %ebp
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %esi
; X32-NEXT: adcl $0, %ecx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: addl %eax, (%esp) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl $0, %ebp
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, %ecx
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, %ebx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: adcl %edi, %ebp
; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movzbl %bl, %edi
+; X32-NEXT: adcl %edi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
@@ -4765,41 +4713,43 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edx, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %ecx, %ebp
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: adcl %ebx, %edi
; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl %esi, %ebp
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movzbl %bl, %edi
+; X32-NEXT: adcl %edi, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %esi
-; X32-NEXT: adcl %edx, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: addl %eax, %edi
+; X32-NEXT: adcl %edx, %esi
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
@@ -4808,7 +4758,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, %edi
@@ -4822,7 +4772,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl %eax, %edi
; X32-NEXT: adcl %esi, %ecx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %ebx
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
@@ -4844,31 +4794,32 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %ecx
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: setb %bl
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %esi, %ebp
; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X32-NEXT: movzbl %bl, %ecx
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
@@ -4886,15 +4837,14 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: adcl $0, %ecx
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %edi, %ecx
-; X32-NEXT: imull %eax, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: imull %eax, %ebp
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: addl %ecx, %edx
-; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: addl %edx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl %ebp, %edx
+; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: addl %edx, %ecx
+; X32-NEXT: movl %ecx, %ebp
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: movl %eax, %esi
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
@@ -4908,7 +4858,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: addl %edx, %esi
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: adcl %ebp, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edi, %eax
; X32-NEXT: movl %edi, %esi
@@ -4918,20 +4868,20 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %ebx, %eax
; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, %ebx
; X32-NEXT: addl %ecx, %ebx
-; X32-NEXT: adcl $0, %edi
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %esi
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: adcl %ebp, %esi
; X32-NEXT: setb %bl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %ecx
+; X32-NEXT: mull %edi
; X32-NEXT: addl %esi, %eax
; X32-NEXT: movzbl %bl, %ecx
; X32-NEXT: adcl %ecx, %edx
@@ -4939,9 +4889,10 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: imull %eax, %esi
+; X32-NEXT: imull %ebp, %esi
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -4950,62 +4901,62 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: addl %edx, %ecx
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: imull %ebx, %esi
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: imull %edi, %ebx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %esi, %edx
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: addl %ebx, %edx
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: imull %eax, %ecx
; X32-NEXT: addl %edx, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: mull %ebp
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: addl %ecx, %esi
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: mull %ebp
; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %esi, %ecx
-; X32-NEXT: adcl %edi, %ebx
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %ecx, %ebp
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ebp, %edi
+; X32-NEXT: adcl %ebx, %esi
+; X32-NEXT: setb %bl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
-; X32-NEXT: adcl %esi, %edx
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movzbl %bl, %ecx
+; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 8(%ebp), %esi
-; X32-NEXT: movl 104(%esi), %ebx
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-NEXT: movl 104(%esi), %ebp
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: mull %edi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -5014,274 +4965,274 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %ecx, %ebx
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %ebx
+; X32-NEXT: adcl %esi, %ecx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %ecx
+; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ebx, %edi
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %ecx, %ebx
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 8(%ebp), %ecx
-; X32-NEXT: movl 96(%ecx), %edi
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X32-NEXT: movl 96(%edi), %ebx
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 100(%ecx), %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl 100(%edi), %edi
; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %ecx, %ebp
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %edi
+; X32-NEXT: adcl %esi, %ecx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %edi, %ecx
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ecx, %edi
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
-; X32-NEXT: adcl %eax, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: xorl %edx, %edx
-; X32-NEXT: mull %edx
+; X32-NEXT: adcl %eax, %ebx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: xorl %ecx, %ecx
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: addl %eax, %edi
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: addl %eax, %ecx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %edx, %eax
-; X32-NEXT: addl %ecx, %edi
-; X32-NEXT: adcl %esi, %eax
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl %edi, %ecx
+; X32-NEXT: adcl %ebx, %eax
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %ecx, %ebx
; X32-NEXT: adcl $0, %edi
; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ebx
-; X32-NEXT: setb %cl
+; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: setb %bl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movzbl %cl, %ecx
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movzbl %bl, %ecx
; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %esi
+; X32-NEXT: addl %eax, %ebp
; X32-NEXT: adcl %edx, %ecx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, %esi
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %esi
+; X32-NEXT: addl %ebp, %ebx
+; X32-NEXT: adcl $0, %edi
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: mull %ebp
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %ecx
+; X32-NEXT: adcl %edi, %ecx
; X32-NEXT: setb %bl
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ebp
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movzbl %bl, %ecx
; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %edi
+; X32-NEXT: addl %eax, %esi
; X32-NEXT: adcl %edx, %ecx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
-; X32-NEXT: adcl %eax, %edi
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %eax, %esi
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, %ecx
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 8(%ebp), %ecx
-; X32-NEXT: movl 112(%ecx), %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: imull %eax, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-NEXT: movl 112(%esi), %edi
+; X32-NEXT: imull %edi, %ebp
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: addl %esi, %edx
-; X32-NEXT: movl 116(%ecx), %eax
+; X32-NEXT: addl %ebp, %edx
+; X32-NEXT: movl 116(%esi), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: imull %eax, %edi
-; X32-NEXT: addl %edx, %edi
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 120(%ecx), %eax
+; X32-NEXT: imull %eax, %ecx
+; X32-NEXT: addl %edx, %ecx
; X32-NEXT: movl %ecx, %ebx
-; X32-NEXT: movl %eax, %edi
+; X32-NEXT: movl 120(%esi), %eax
+; X32-NEXT: movl %eax, %ecx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: imull %esi, %edi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
+; X32-NEXT: imull %esi, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: mull %ebp
+; X32-NEXT: addl %ecx, %edx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movl 124(%ecx), %ecx
+; X32-NEXT: imull %ebp, %ecx
+; X32-NEXT: addl %edx, %ecx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: addl %edi, %edx
-; X32-NEXT: movl 124(%ebx), %ebx
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: imull %ecx, %ebx
-; X32-NEXT: addl %edx, %ebx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %ebx, %ecx
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %ebp, %ebx
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ecx, %esi
-; X32-NEXT: setb %cl
+; X32-NEXT: adcl %esi, %ebp
+; X32-NEXT: setb %bl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X32-NEXT: addl %esi, %eax
-; X32-NEXT: movzbl %cl, %ecx
-; X32-NEXT: adcl %ecx, %edx
+; X32-NEXT: mull %edi
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movzbl %bl, %esi
+; X32-NEXT: adcl %esi, %edx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %edx
+; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: imull %eax, %edi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
+; X32-NEXT: imull %eax, %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: addl %edi, %edx
-; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: addl %edx, %ecx
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl %esi, %edx
+; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: addl %edx, %ebx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: imull %ebx, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
; X32-NEXT: movl %eax, %esi
-; X32-NEXT: addl %ecx, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: imull %ebp, %esi
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: imull %edi, %ecx
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %esi, %edx
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: imull %eax, %ecx
; X32-NEXT: addl %edx, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %ebx, %ecx
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl %edi, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %ebx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %esi, %ebx
-; X32-NEXT: adcl $0, %edi
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: addl %ebx, %esi
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %ebx, %ecx
-; X32-NEXT: adcl %edi, %esi
-; X32-NEXT: setb %bl
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %esi, %edi
+; X32-NEXT: adcl %ebp, %ebx
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X32-NEXT: addl %esi, %eax
-; X32-NEXT: movzbl %bl, %esi
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
; X32-NEXT: adcl %esi, %edx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, %ecx
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
@@ -5292,7 +5243,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
@@ -5305,10 +5256,9 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: adcl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: addl (%esp), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -5320,37 +5270,38 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 12(%ebp), %eax
-; X32-NEXT: movl 92(%eax), %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl 92(%eax), %esi
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, %ebx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: adcl %edi, %ebp
; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movzbl %bl, %edi
+; X32-NEXT: adcl %edi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
@@ -5361,62 +5312,62 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %edi, %ebp
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: adcl %ebx, %edi
; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %edi, %ebp
+; X32-NEXT: movzbl %bl, %eax
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %edi
-; X32-NEXT: setb %cl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ebx
; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movzbl %cl, %ecx
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: adcl %esi, %ebx
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
@@ -5424,36 +5375,36 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: addl %eax, %ecx
; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, %ecx
; X32-NEXT: adcl $0, %esi
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ebp, %edi
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: mull %ebp
; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: mull %ebp
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload
; X32-NEXT: adcl %edi, %edx
@@ -5478,32 +5429,32 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 12(%ebp), %eax
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 76(%eax), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %esi, %ebp
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
-; X32-NEXT: setb %bl
+; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: setb %cl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
+; X32-NEXT: mull %ebx
+; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
@@ -5513,157 +5464,158 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edx, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
-; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %esi, %ebp
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: adcl $0, %esi
; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %edi
-; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %edi
-; X32-NEXT: setb %cl
+; X32-NEXT: adcl %esi, %edi
+; X32-NEXT: setb %bl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: mull %ecx
; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movzbl %cl, %ecx
+; X32-NEXT: movzbl %bl, %ecx
; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %esi
+; X32-NEXT: addl %eax, %ebx
; X32-NEXT: adcl %edx, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, %esi
+; X32-NEXT: adcl $0, %ebx
; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload
-; X32-NEXT: adcl %edi, %edx
+; X32-NEXT: mull %esi
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
+; X32-NEXT: adcl %esi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: addl %eax, %edi
-; X32-NEXT: adcl %edx, %ebx
+; X32-NEXT: adcl %edx, %ebp
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: addl %esi, %edx
+; X32-NEXT: addl %ebx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: adcl %ecx, %esi
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edi
-; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl %esi, %ecx
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
-; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: setb %cl
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %ebx
+; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
@@ -5673,104 +5625,105 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edx, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %esi, %ebx
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %esi, %ecx
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl %ebp, %esi
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %edx, %ebx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %edi, %ecx
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %edi
+; X32-NEXT: addl %ebx, %ecx
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: mull %ebp
+; X32-NEXT: movl %edx, %ebx
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %edi
+; X32-NEXT: adcl %esi, %ebx
; X32-NEXT: setb %cl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: mull %ebp
+; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: adcl %ecx, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %ecx
-; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: addl %eax, %ebp
+; X32-NEXT: adcl %edx, %ecx
+; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: mull %esi
; X32-NEXT: movl %eax, %ebx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %edi
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: setb %bl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload
-; X32-NEXT: adcl %edi, %edx
+; X32-NEXT: mull %esi
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movzbl %bl, %esi
+; X32-NEXT: adcl %esi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
@@ -5778,14 +5731,14 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: addl %eax, %edi
; X32-NEXT: adcl %edx, %ebx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: addl %ecx, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl %esi, %ecx
+; X32-NEXT: addl %ebp, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: adcl %ecx, %esi
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edi
; X32-NEXT: adcl $0, %ebx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
@@ -5797,42 +5750,43 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: adcl $0, %ecx
+; X32-NEXT: adcl $0, %esi
; X32-NEXT: adcl $0, %edi
; X32-NEXT: adcl $0, %ebx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %edx, %ebx
; X32-NEXT: movl %eax, %esi
; X32-NEXT: addl %ecx, %esi
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: mull %ebx
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: addl %esi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %ebx
+; X32-NEXT: adcl %ebx, %ecx
+; X32-NEXT: setb %bl
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %edi
; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X32-NEXT: movzbl %bl, %ecx
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
@@ -5842,61 +5796,64 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edx, %ecx
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %ecx, %ebx
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: mull %ebx
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: mull %ebp
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %ecx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %ebx
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl %edi, %esi
+; X32-NEXT: mull %ebp
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %ecx, %ebx
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %ebx
; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl %ebp, %ecx
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %esi
+; X32-NEXT: adcl %esi, %ebp
; X32-NEXT: setb %cl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X32-NEXT: addl %esi, %eax
+; X32-NEXT: mull %edi
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
@@ -5905,7 +5862,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: addl %eax, %esi
; X32-NEXT: adcl %edx, %ecx
-; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, %esi
@@ -5915,28 +5872,29 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: movl %ebx, %eax
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: addl %ecx, %edi
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: mull %ebp
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, %edi
; X32-NEXT: adcl %esi, %ecx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X32-NEXT: mull %ebp
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
@@ -5973,74 +5931,77 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, %ecx
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 12(%ebp), %ebx
-; X32-NEXT: movl 96(%ebx), %ecx
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl 100(%ebx), %ebx
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull %ebx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movl 96(%ecx), %ebx
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %esi
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %esi, %ebp
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl 100(%eax), %esi
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %esi, %ecx
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %ebx, %esi
+; X32-NEXT: setb %bl
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %esi, %ebx
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %esi, %edi
+; X32-NEXT: movzbl %bl, %eax
; X32-NEXT: adcl %eax, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: xorl %edx, %edx
; X32-NEXT: mull %edx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: addl %eax, %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: addl %eax, %ebx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: addl %ebx, %edi
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl %edi, %ebx
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %ecx, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: mull %ebp
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %ecx
+; X32-NEXT: mull %ebp
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %ebx, %ecx
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ecx, %edi
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: mull %ebx
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %edi
-; X32-NEXT: setb %cl
+; X32-NEXT: adcl %esi, %ecx
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %ebx
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movzbl %cl, %ecx
+; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
@@ -6054,37 +6015,35 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl 12(%ebp), %eax
-; X32-NEXT: movl 104(%eax), %ecx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X32-NEXT: movl 104(%ebp), %ecx
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %edx, %ebx
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl 12(%ebp), %eax
-; X32-NEXT: movl 108(%eax), %edx
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: mull %edx
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl 108(%ebp), %esi
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %edi
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %esi
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: adcl %ebx, %edi
+; X32-NEXT: setb %bl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %ebx
+; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %esi, %edi
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: addl %edi, %esi
+; X32-NEXT: movzbl %bl, %eax
; X32-NEXT: adcl %eax, %ecx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: movl %ebx, %eax
@@ -6092,20 +6051,20 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: mull %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: addl %eax, %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: addl %eax, %edi
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %edx, %eax
-; X32-NEXT: addl %edi, %esi
+; X32-NEXT: addl %esi, %edi
; X32-NEXT: adcl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, %esi
+; X32-NEXT: adcl $0, %edi
; X32-NEXT: adcl $0, %eax
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
@@ -6113,24 +6072,24 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl %ebx, %esi
; X32-NEXT: mull %ebx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %ebx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %ebx, %ebp
; X32-NEXT: adcl $0, %esi
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movl %eax, %ebp
; X32-NEXT: adcl %esi, %ecx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %ebx
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
@@ -6143,8 +6102,8 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: adcl %edx, %ecx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -6159,51 +6118,50 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: addl %ecx, %edx
; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: addl %edx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %esi, %ebx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: movl %eax, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: imull %ebx, %esi
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: imull %edi, %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %eax, %ebp
; X32-NEXT: addl %esi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: imull %edi, %esi
+; X32-NEXT: imull %ecx, %esi
; X32-NEXT: addl %edx, %esi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: adcl %ebx, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ebx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull %esi
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ebx, %edi
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %ecx
-; X32-NEXT: setb %bl
+; X32-NEXT: adcl %esi, %ebx
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 12(%ebp), %edx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movl 124(%edx), %ecx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: imull %eax, %ecx
@@ -6214,84 +6172,86 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: addl %ecx, %edx
; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: addl %edx, %esi
-; X32-NEXT: movl 112(%edi), %ebx
-; X32-NEXT: movl 116(%edi), %ecx
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl 112(%edi), %ebp
+; X32-NEXT: movl 116(%edi), %ebx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: movl %eax, %edi
-; X32-NEXT: imull %ecx, %edi
-; X32-NEXT: mull %ebx
+; X32-NEXT: imull %ebx, %edi
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: mull %ebp
; X32-NEXT: addl %edi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: imull %ebx, %ecx
+; X32-NEXT: imull %ebp, %ecx
; X32-NEXT: addl %edx, %ecx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %ecx
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %esi, %ebx
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %esi, %ecx
-; X32-NEXT: adcl %edi, %ebx
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %ebx, %ebp
+; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: setb %bl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
-; X32-NEXT: adcl %esi, %edx
+; X32-NEXT: mull %esi
+; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: movzbl %bl, %ecx
+; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %edi, %ebx
+; X32-NEXT: adcl $0, %ecx
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: adcl %ecx, %ebp
; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %edi
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movzbl %bl, %edi
+; X32-NEXT: adcl %edi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
@@ -6300,41 +6260,43 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edx, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %ecx, %ebp
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: adcl %ebx, %edi
; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl %esi, %ebp
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movzbl %bl, %edi
+; X32-NEXT: adcl %edi, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %esi
-; X32-NEXT: adcl %edx, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: addl %eax, %edi
+; X32-NEXT: adcl %edx, %esi
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
@@ -6343,7 +6305,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, %edi
@@ -6357,7 +6319,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl %eax, %edi
; X32-NEXT: adcl %esi, %ecx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %ebx
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
@@ -6383,25 +6345,25 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: movl %ebx, %eax
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: addl %ebp, %edi
; X32-NEXT: adcl $0, %esi
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: mull %ebp
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, %edi
; X32-NEXT: adcl %esi, %ecx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: mull %ebp
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
@@ -6413,61 +6375,62 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: adcl %edx, %ecx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, %ecx
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %edi, %ecx
+; X32-NEXT: movl %ebp, %ecx
; X32-NEXT: imull %eax, %ecx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: addl %ecx, %edx
-; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: imull %ebp, %esi
; X32-NEXT: addl %edx, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, %esi
+; X32-NEXT: movl %eax, %edi
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: imull %ebx, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %esi, %edx
+; X32-NEXT: imull %ebx, %edi
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: imull %edi, %esi
-; X32-NEXT: addl %edx, %esi
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl %edi, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: imull %esi, %edi
+; X32-NEXT: addl %edx, %edi
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %ecx, %ebx
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %esi, %ebx
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: mull %ebp
+; X32-NEXT: movl %ebp, %esi
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %edi
-; X32-NEXT: setb %bl
+; X32-NEXT: adcl %edi, %ebp
+; X32-NEXT: setb %cl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movzbl %bl, %ecx
+; X32-NEXT: mull %esi
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -6476,146 +6439,139 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: imull %eax, %edi
-; X32-NEXT: movl %eax, %esi
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, %ebp
; X32-NEXT: addl %edi, %edx
-; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: imull %ebx, %ecx
; X32-NEXT: addl %edx, %ecx
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: imull %esi, %ecx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: imull %edi, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: mull %ebx
+; X32-NEXT: mull %edi
; X32-NEXT: addl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: imull %ebx, %ecx
+; X32-NEXT: imull %edi, %ecx
; X32-NEXT: addl %edx, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %esi
+; X32-NEXT: movl %edi, %ebp
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %ebx, %ecx
+; X32-NEXT: addl %edi, %ecx
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %edi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %ecx, %ebp
; X32-NEXT: adcl %esi, %edi
; X32-NEXT: setb %cl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movl %eax, %edx
+; X32-NEXT: addl %edi, %edx
; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: adcl %ecx, %esi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %ecx, %ebp
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
@@ -6625,18 +6581,18 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -6649,13 +6605,10 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 16(%ebp), %ecx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: movl %esi, (%ecx)
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
@@ -6688,36 +6641,34 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-NEXT: movl %esi, 56(%ecx)
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: movl %esi, 60(%ecx)
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, 64(%ecx)
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, 68(%ecx)
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, 72(%ecx)
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, 76(%ecx)
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, 80(%ecx)
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, 84(%ecx)
-; X32-NEXT: movl %ebx, 88(%ecx)
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, 92(%ecx)
-; X32-NEXT: movl %edi, 96(%ecx)
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, 100(%ecx)
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, 104(%ecx)
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, 108(%ecx)
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, 112(%ecx)
-; X32-NEXT: movl %edx, 116(%ecx)
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, 120(%ecx)
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, 64(%ecx)
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, 68(%ecx)
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, 72(%ecx)
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, 76(%ecx)
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, 80(%ecx)
+; X32-NEXT: movl %ebp, 84(%ecx)
+; X32-NEXT: movl %edi, 88(%ecx)
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, 92(%ecx)
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, 96(%ecx)
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, 100(%ecx)
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, 104(%ecx)
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, 108(%ecx)
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, 112(%ecx)
+; X32-NEXT: movl %ebx, 116(%ecx)
+; X32-NEXT: movl %edx, 120(%ecx)
; X32-NEXT: movl %eax, 124(%ecx)
-; X32-NEXT: addl $996, %esp # imm = 0x3E4
+; X32-NEXT: addl $1000, %esp # imm = 0x3E8
; X32-NEXT: popl %esi
; X32-NEXT: popl %edi
; X32-NEXT: popl %ebx
diff --git a/llvm/test/CodeGen/X86/peephole-na-phys-copy-folding.ll b/llvm/test/CodeGen/X86/peephole-na-phys-copy-folding.ll
index 66047e3677f..d011916f093 100644
--- a/llvm/test/CodeGen/X86/peephole-na-phys-copy-folding.ll
+++ b/llvm/test/CodeGen/X86/peephole-na-phys-copy-folding.ll
@@ -1,13 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=i386-linux-gnu %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK32
-; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+sahf %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK64
-
-; TODO: Reenable verify-machineinstrs once the if (!AXDead) // FIXME in
-; X86InstrInfo::copyPhysReg() is resolved.
+; RUN: llc -mtriple=i386-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK32
+; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK64
; The peephole optimizer can elide some physical register copies such as
; EFLAGS. Make sure the flags are used directly, instead of needlessly using
-; lahf, when possible.
+; saving and restoring specific conditions.
@L = external global i32
@M = external global i8
@@ -209,29 +206,22 @@ exit2:
define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) nounwind {
; CHECK32-LABEL: test_intervening_call:
; CHECK32: # %bb.0: # %entry
-; CHECK32-NEXT: pushl %ebp
-; CHECK32-NEXT: movl %esp, %ebp
; CHECK32-NEXT: pushl %ebx
; CHECK32-NEXT: pushl %esi
-; CHECK32-NEXT: movl 12(%ebp), %eax
-; CHECK32-NEXT: movl 16(%ebp), %edx
-; CHECK32-NEXT: movl 20(%ebp), %ebx
-; CHECK32-NEXT: movl 24(%ebp), %ecx
-; CHECK32-NEXT: movl 8(%ebp), %esi
-; CHECK32-NEXT: lock cmpxchg8b (%esi)
; CHECK32-NEXT: pushl %eax
-; CHECK32-NEXT: seto %al
-; CHECK32-NEXT: lahf
-; CHECK32-NEXT: movl %eax, %esi
-; CHECK32-NEXT: popl %eax
+; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; CHECK32-NEXT: lock cmpxchg8b (%esi)
+; CHECK32-NEXT: setne %bl
; CHECK32-NEXT: subl $8, %esp
; CHECK32-NEXT: pushl %edx
; CHECK32-NEXT: pushl %eax
; CHECK32-NEXT: calll bar
; CHECK32-NEXT: addl $16, %esp
-; CHECK32-NEXT: movl %esi, %eax
-; CHECK32-NEXT: addb $127, %al
-; CHECK32-NEXT: sahf
+; CHECK32-NEXT: testb $-1, %bl
; CHECK32-NEXT: jne .LBB4_3
; CHECK32-NEXT: # %bb.1: # %t
; CHECK32-NEXT: movl $42, %eax
@@ -240,39 +230,28 @@ define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) nounwind {
; CHECK32-NEXT: xorl %eax, %eax
; CHECK32-NEXT: .LBB4_2: # %t
; CHECK32-NEXT: xorl %edx, %edx
+; CHECK32-NEXT: addl $4, %esp
; CHECK32-NEXT: popl %esi
; CHECK32-NEXT: popl %ebx
-; CHECK32-NEXT: popl %ebp
; CHECK32-NEXT: retl
;
; CHECK64-LABEL: test_intervening_call:
; CHECK64: # %bb.0: # %entry
-; CHECK64-NEXT: pushq %rbp
-; CHECK64-NEXT: movq %rsp, %rbp
; CHECK64-NEXT: pushq %rbx
-; CHECK64-NEXT: pushq %rax
; CHECK64-NEXT: movq %rsi, %rax
; CHECK64-NEXT: lock cmpxchgq %rdx, (%rdi)
-; CHECK64-NEXT: pushq %rax
-; CHECK64-NEXT: seto %al
-; CHECK64-NEXT: lahf
-; CHECK64-NEXT: movq %rax, %rbx
-; CHECK64-NEXT: popq %rax
+; CHECK64-NEXT: setne %bl
; CHECK64-NEXT: movq %rax, %rdi
; CHECK64-NEXT: callq bar
-; CHECK64-NEXT: movq %rbx, %rax
-; CHECK64-NEXT: addb $127, %al
-; CHECK64-NEXT: sahf
-; CHECK64-NEXT: jne .LBB4_3
+; CHECK64-NEXT: testb $-1, %bl
+; CHECK64-NEXT: jne .LBB4_2
; CHECK64-NEXT: # %bb.1: # %t
; CHECK64-NEXT: movl $42, %eax
-; CHECK64-NEXT: jmp .LBB4_2
-; CHECK64-NEXT: .LBB4_3: # %f
+; CHECK64-NEXT: popq %rbx
+; CHECK64-NEXT: retq
+; CHECK64-NEXT: .LBB4_2: # %f
; CHECK64-NEXT: xorl %eax, %eax
-; CHECK64-NEXT: .LBB4_2: # %t
-; CHECK64-NEXT: addq $8, %rsp
; CHECK64-NEXT: popq %rbx
-; CHECK64-NEXT: popq %rbp
; CHECK64-NEXT: retq
entry:
; cmpxchg sets EFLAGS, call clobbers it, then br uses EFLAGS.
@@ -293,32 +272,27 @@ define i64 @test_two_live_flags(i64* %foo0, i64 %bar0, i64 %baz0, i64* %foo1, i6
; CHECK32-LABEL: test_two_live_flags:
; CHECK32: # %bb.0: # %entry
; CHECK32-NEXT: pushl %ebp
-; CHECK32-NEXT: movl %esp, %ebp
; CHECK32-NEXT: pushl %ebx
; CHECK32-NEXT: pushl %edi
; CHECK32-NEXT: pushl %esi
-; CHECK32-NEXT: movl 44(%ebp), %edi
-; CHECK32-NEXT: movl 12(%ebp), %eax
-; CHECK32-NEXT: movl 16(%ebp), %edx
-; CHECK32-NEXT: movl 20(%ebp), %ebx
-; CHECK32-NEXT: movl 24(%ebp), %ecx
-; CHECK32-NEXT: movl 8(%ebp), %esi
+; CHECK32-NEXT: pushl %eax
+; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; CHECK32-NEXT: lock cmpxchg8b (%esi)
+; CHECK32-NEXT: setne {{[0-9]+}}(%esp) # 1-byte Folded Spill
+; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT: movl %edi, %edx
+; CHECK32-NEXT: movl %ebp, %ecx
+; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi
; CHECK32-NEXT: lock cmpxchg8b (%esi)
-; CHECK32-NEXT: seto %al
-; CHECK32-NEXT: lahf
-; CHECK32-NEXT: movl %eax, %esi
-; CHECK32-NEXT: movl 32(%ebp), %eax
-; CHECK32-NEXT: movl 36(%ebp), %edx
-; CHECK32-NEXT: movl %edi, %ecx
-; CHECK32-NEXT: movl 40(%ebp), %ebx
-; CHECK32-NEXT: movl 28(%ebp), %edi
-; CHECK32-NEXT: lock cmpxchg8b (%edi)
; CHECK32-NEXT: sete %al
-; CHECK32-NEXT: pushl %eax
-; CHECK32-NEXT: movl %esi, %eax
-; CHECK32-NEXT: addb $127, %al
-; CHECK32-NEXT: sahf
-; CHECK32-NEXT: popl %eax
+; CHECK32-NEXT: testb $-1, {{[0-9]+}}(%esp) # 1-byte Folded Reload
; CHECK32-NEXT: jne .LBB5_4
; CHECK32-NEXT: # %bb.1: # %entry
; CHECK32-NEXT: testb %al, %al
@@ -330,6 +304,7 @@ define i64 @test_two_live_flags(i64* %foo0, i64 %bar0, i64 %baz0, i64* %foo1, i6
; CHECK32-NEXT: xorl %eax, %eax
; CHECK32-NEXT: .LBB5_3: # %t
; CHECK32-NEXT: xorl %edx, %edx
+; CHECK32-NEXT: addl $4, %esp
; CHECK32-NEXT: popl %esi
; CHECK32-NEXT: popl %edi
; CHECK32-NEXT: popl %ebx
@@ -338,32 +313,22 @@ define i64 @test_two_live_flags(i64* %foo0, i64 %bar0, i64 %baz0, i64* %foo1, i6
;
; CHECK64-LABEL: test_two_live_flags:
; CHECK64: # %bb.0: # %entry
-; CHECK64-NEXT: pushq %rbp
-; CHECK64-NEXT: movq %rsp, %rbp
; CHECK64-NEXT: movq %rsi, %rax
; CHECK64-NEXT: lock cmpxchgq %rdx, (%rdi)
-; CHECK64-NEXT: seto %al
-; CHECK64-NEXT: lahf
-; CHECK64-NEXT: movq %rax, %rdx
+; CHECK64-NEXT: setne %dl
; CHECK64-NEXT: movq %r8, %rax
; CHECK64-NEXT: lock cmpxchgq %r9, (%rcx)
; CHECK64-NEXT: sete %al
-; CHECK64-NEXT: pushq %rax
-; CHECK64-NEXT: movq %rdx, %rax
-; CHECK64-NEXT: addb $127, %al
-; CHECK64-NEXT: sahf
-; CHECK64-NEXT: popq %rax
+; CHECK64-NEXT: testb $-1, %dl
; CHECK64-NEXT: jne .LBB5_3
; CHECK64-NEXT: # %bb.1: # %entry
; CHECK64-NEXT: testb %al, %al
; CHECK64-NEXT: je .LBB5_3
; CHECK64-NEXT: # %bb.2: # %t
; CHECK64-NEXT: movl $42, %eax
-; CHECK64-NEXT: popq %rbp
; CHECK64-NEXT: retq
; CHECK64-NEXT: .LBB5_3: # %f
; CHECK64-NEXT: xorl %eax, %eax
-; CHECK64-NEXT: popq %rbp
; CHECK64-NEXT: retq
entry:
%cx0 = cmpxchg i64* %foo0, i64 %bar0, i64 %baz0 seq_cst seq_cst
diff --git a/llvm/test/CodeGen/X86/win64_frame.ll b/llvm/test/CodeGen/X86/win64_frame.ll
index e8733472df0..49350a8edf8 100644
--- a/llvm/test/CodeGen/X86/win64_frame.ll
+++ b/llvm/test/CodeGen/X86/win64_frame.ll
@@ -224,71 +224,29 @@ entry:
declare i64 @dummy()
define i64 @f10(i64* %foo, i64 %bar, i64 %baz) {
-; PUSHF-LABEL: f10:
-; PUSHF: # %bb.0:
-; PUSHF-NEXT: pushq %rbp
-; PUSHF-NEXT: .seh_pushreg 5
-; PUSHF-NEXT: pushq %rsi
-; PUSHF-NEXT: .seh_pushreg 6
-; PUSHF-NEXT: pushq %rdi
-; PUSHF-NEXT: .seh_pushreg 7
-; PUSHF-NEXT: subq $32, %rsp
-; PUSHF-NEXT: .seh_stackalloc 32
-; PUSHF-NEXT: leaq {{[0-9]+}}(%rsp), %rbp
-; PUSHF-NEXT: .seh_setframe 5, 32
-; PUSHF-NEXT: .seh_endprologue
-; PUSHF-NEXT: movq %rdx, %rsi
-; PUSHF-NEXT: movq %rdx, %rax
-; PUSHF-NEXT: lock cmpxchgq %r8, (%rcx)
-; PUSHF-NEXT: pushfq
-; PUSHF-NEXT: popq %rdi
-; PUSHF-NEXT: callq dummy
-; PUSHF-NEXT: pushq %rdi
-; PUSHF-NEXT: popfq
-; PUSHF-NEXT: cmovneq %rsi, %rax
-; PUSHF-NEXT: addq $32, %rsp
-; PUSHF-NEXT: popq %rdi
-; PUSHF-NEXT: popq %rsi
-; PUSHF-NEXT: popq %rbp
-; PUSHF-NEXT: retq
-; PUSHF-NEXT: .seh_handlerdata
-; PUSHF-NEXT: .text
-; PUSHF-NEXT: .seh_endproc
-;
-; SAHF-LABEL: f10:
-; SAHF: # %bb.0:
-; SAHF-NEXT: pushq %rbp
-; SAHF-NEXT: .seh_pushreg 5
-; SAHF-NEXT: pushq %rsi
-; SAHF-NEXT: .seh_pushreg 6
-; SAHF-NEXT: pushq %rdi
-; SAHF-NEXT: .seh_pushreg 7
-; SAHF-NEXT: subq $32, %rsp
-; SAHF-NEXT: .seh_stackalloc 32
-; SAHF-NEXT: leaq {{[0-9]+}}(%rsp), %rbp
-; SAHF-NEXT: .seh_setframe 5, 32
-; SAHF-NEXT: .seh_endprologue
-; SAHF-NEXT: movq %rdx, %rsi
-; SAHF-NEXT: movq %rdx, %rax
-; SAHF-NEXT: lock cmpxchgq %r8, (%rcx)
-; SAHF-NEXT: seto %al
-; SAHF-NEXT: lahf
-; SAHF-NEXT: movq %rax, %rdi
-; SAHF-NEXT: callq dummy
-; SAHF-NEXT: pushq %rax
-; SAHF-NEXT: movq %rdi, %rax
-; SAHF-NEXT: addb $127, %al
-; SAHF-NEXT: sahf
-; SAHF-NEXT: popq %rax
-; SAHF-NEXT: cmovneq %rsi, %rax
-; SAHF-NEXT: addq $32, %rsp
-; SAHF-NEXT: popq %rdi
-; SAHF-NEXT: popq %rsi
-; SAHF-NEXT: popq %rbp
-; SAHF-NEXT: retq
-; SAHF-NEXT: .seh_handlerdata
-; SAHF-NEXT: .text
-; SAHF-NEXT: .seh_endproc
+; ALL-LABEL: f10:
+; ALL: # %bb.0:
+; ALL-NEXT: pushq %rsi
+; ALL-NEXT: .seh_pushreg 6
+; ALL-NEXT: pushq %rbx
+; ALL-NEXT: .seh_pushreg 3
+; ALL-NEXT: subq $40, %rsp
+; ALL-NEXT: .seh_stackalloc 40
+; ALL-NEXT: .seh_endprologue
+; ALL-NEXT: movq %rdx, %rsi
+; ALL-NEXT: movq %rdx, %rax
+; ALL-NEXT: lock cmpxchgq %r8, (%rcx)
+; ALL-NEXT: sete %bl
+; ALL-NEXT: callq dummy
+; ALL-NEXT: testb $-1, %bl
+; ALL-NEXT: cmoveq %rsi, %rax
+; ALL-NEXT: addq $40, %rsp
+; ALL-NEXT: popq %rbx
+; ALL-NEXT: popq %rsi
+; ALL-NEXT: retq
+; ALL-NEXT: .seh_handlerdata
+; ALL-NEXT: .text
+; ALL-NEXT: .seh_endproc
%cx = cmpxchg i64* %foo, i64 %bar, i64 %baz seq_cst seq_cst
%v = extractvalue { i64, i1 } %cx, 0
%p = extractvalue { i64, i1 } %cx, 1
diff --git a/llvm/test/CodeGen/X86/x86-repmov-copy-eflags.ll b/llvm/test/CodeGen/X86/x86-repmov-copy-eflags.ll
index 49afb39b6d4..2a55dd41f69 100644
--- a/llvm/test/CodeGen/X86/x86-repmov-copy-eflags.ll
+++ b/llvm/test/CodeGen/X86/x86-repmov-copy-eflags.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s | FileCheck %s
target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
target triple = "i686-pc-windows-msvc18.0.0"
@@ -39,15 +39,12 @@ declare void @g(%struct.T*)
; CHECK: leal 8(%esp), %esi
; CHECK: decl (%esp)
-; CHECK: seto %al
-; CHECK: lahf
-; CHECK: movl %eax, %edi
+; CHECK: setne %[[NE_REG:.*]]
; CHECK: pushl %esi
; CHECK: calll _g
; CHECK: addl $4, %esp
-; CHECK: movl %edi, %eax
-; CHECK: addb $127, %al
-; CHECK: sahf
+; CHECK: testb $-1, %[[NE_REG]]
+; CHECK: jne
attributes #0 = { nounwind optsize }
attributes #1 = { argmemonly nounwind }
OpenPOWER on IntegriCloud