4 files changed, 298 insertions, 143 deletions
diff --git a/llvm/test/CodeGen/X86/muloti.ll b/llvm/test/CodeGen/X86/muloti.ll
index 6c6198e400f..305f71e8eb3 100644
--- a/llvm/test/CodeGen/X86/muloti.ll
+++ b/llvm/test/CodeGen/X86/muloti.ll
@@ -32,50 +32,6 @@ nooverflow:                                       ; preds = %entry
   ret %0 %tmp24
 }
 
-define %0 @foo(i64 %a.coerce0, i64 %a.coerce1, i64 %b.coerce0, i64 %b.coerce1) nounwind uwtable ssp {
-entry:
-; CHECK: foo
-  %retval = alloca i128, align 16
-  %coerce = alloca i128, align 16
-  %a.addr = alloca i128, align 16
-  %coerce1 = alloca i128, align 16
-  %b.addr = alloca i128, align 16
-  %0 = bitcast i128* %coerce to %0*
-  %1 = getelementptr %0, %0* %0, i32 0, i32 0
-  store i64 %a.coerce0, i64* %1
-  %2 = getelementptr %0, %0* %0, i32 0, i32 1
-  store i64 %a.coerce1, i64* %2
-  %a = load i128, i128* %coerce, align 16
-  store i128 %a, i128* %a.addr, align 16
-  %3 = bitcast i128* %coerce1 to %0*
-  %4 = getelementptr %0, %0* %3, i32 0, i32 0
-  store i64 %b.coerce0, i64* %4
-  %5 = getelementptr %0, %0* %3, i32 0, i32 1
-  store i64 %b.coerce1, i64* %5
-  %b = load i128, i128* %coerce1, align 16
-  store i128 %b, i128* %b.addr, align 16
-  %tmp = load i128, i128* %a.addr, align 16
-  %tmp2 = load i128, i128* %b.addr, align 16
-  %6 = call %1 @llvm.umul.with.overflow.i128(i128 %tmp, i128 %tmp2)
-; CHECK: cmov
-; CHECK: divti3
-  %7 = extractvalue %1 %6, 0
-  %8 = extractvalue %1 %6, 1
-  br i1 %8, label %overflow, label %nooverflow
-
-overflow:                                         ; preds = %entry
-  call void @llvm.trap()
-  unreachable
-
-nooverflow:                                       ; preds = %entry
-  store i128 %7, i128* %retval
-  %9 = bitcast i128* %retval to %0*
-  %10 = load %0, %0* %9, align 1
-  ret %0 %10
-}
-
-declare %1 @llvm.umul.with.overflow.i128(i128, i128) nounwind readnone
-
 declare %1 @llvm.smul.with.overflow.i128(i128, i128) nounwind readnone
 
 declare void @llvm.trap() nounwind
diff --git a/llvm/test/CodeGen/X86/select.ll b/llvm/test/CodeGen/X86/select.ll
index 7d518ab8751..c703c46f0a2 100644
--- a/llvm/test/CodeGen/X86/select.ll
+++ b/llvm/test/CodeGen/X86/select.ll
@@ -53,6 +53,7 @@ define i32 @test2() nounwind {
 ; GENERIC-NEXT:    popq %rcx
 ; GENERIC-NEXT:    retq
 ; GENERIC-NEXT:  LBB1_1: ## %bb90
+; GENERIC-NEXT:    ud2
 ;
 ; ATOM-LABEL: test2:
 ; ATOM:       ## %bb.0: ## %entry
@@ -70,6 +71,7 @@ define i32 @test2() nounwind {
 ; ATOM-NEXT:    popq %rcx
 ; ATOM-NEXT:    retq
 ; ATOM-NEXT:  LBB1_1: ## %bb90
+; ATOM-NEXT:    ud2
 ;
 ; MCU-LABEL: test2:
 ; MCU:       # %bb.0: # %entry
@@ -636,71 +638,6 @@ define i64 @test11a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
   ret i64 %cond
 }
 
-
-declare noalias i8* @_Znam(i64) noredzone
-
-define noalias i8* @test12(i64 %count) nounwind ssp noredzone {
-; GENERIC-LABEL: test12:
-; GENERIC:       ## %bb.0: ## %entry
-; GENERIC-NEXT:    movl $4, %ecx
-; GENERIC-NEXT:    movq %rdi, %rax
-; GENERIC-NEXT:    mulq %rcx
-; GENERIC-NEXT:    movq $-1, %rdi
-; GENERIC-NEXT:    cmovnoq %rax, %rdi
-; GENERIC-NEXT:    jmp __Znam ## TAILCALL
-;
-; ATOM-LABEL: test12:
-; ATOM:       ## %bb.0: ## %entry
-; ATOM-NEXT:    movq %rdi, %rax
-; ATOM-NEXT:    movl $4, %ecx
-; ATOM-NEXT:    movq $-1, %rdi
-; ATOM-NEXT:    mulq %rcx
-; ATOM-NEXT:    cmovnoq %rax, %rdi
-; ATOM-NEXT:    jmp __Znam ## TAILCALL
-;
-; MCU-LABEL: test12:
-; MCU:       # %bb.0: # %entry
-; MCU-NEXT:    pushl %ebp
-; MCU-NEXT:    pushl %ebx
-; MCU-NEXT:    pushl %edi
-; MCU-NEXT:    pushl %esi
-; MCU-NEXT:    movl %edx, %ebx
-; MCU-NEXT:    movl %eax, %ebp
-; MCU-NEXT:    movl $4, %ecx
-; MCU-NEXT:    mull %ecx
-; MCU-NEXT:    movl %eax, %esi
-; MCU-NEXT:    leal (%edx,%ebx,4), %edi
-; MCU-NEXT:    movl %edi, %edx
-; MCU-NEXT:    pushl $0
-; MCU-NEXT:    pushl $4
-; MCU-NEXT:    calll __udivdi3
-; MCU-NEXT:    addl $8, %esp
-; MCU-NEXT:    xorl %ebx, %edx
-; MCU-NEXT:    xorl %ebp, %eax
-; MCU-NEXT:    orl %edx, %eax
-; MCU-NEXT:    movl $-1, %eax
-; MCU-NEXT:    movl $-1, %edx
-; MCU-NEXT:    jne .LBB14_2
-; MCU-NEXT:  # %bb.1: # %entry
-; MCU-NEXT:    movl %esi, %eax
-; MCU-NEXT:    movl %edi, %edx
-; MCU-NEXT:  .LBB14_2: # %entry
-; MCU-NEXT:    popl %esi
-; MCU-NEXT:    popl %edi
-; MCU-NEXT:    popl %ebx
-; MCU-NEXT:    popl %ebp
-; MCU-NEXT:    jmp _Znam # TAILCALL
-entry:
-  %A = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %count, i64 4)
-  %B = extractvalue { i64, i1 } %A, 1
-  %C = extractvalue { i64, i1 } %A, 0
-  %D = select i1 %B, i64 -1, i64 %C
-  %call = tail call noalias i8* @_Znam(i64 %D) nounwind noredzone
-  ret i8* %call
-}
-
-declare { i64, i1 } @llvm.umul.with.overflow.i64(i64, i64) nounwind readnone
-
 define i32 @test13(i32 %a, i32 %b) nounwind {
 ; GENERIC-LABEL: test13:
 ; GENERIC:       ## %bb.0:
@@ -862,10 +799,10 @@ define i8 @test18(i32 %x, i8 zeroext %a, i8 zeroext %b) nounwind {
 ; MCU-LABEL: test18:
 ; MCU:       # %bb.0:
 ; MCU-NEXT:    cmpl $15, %eax
-; MCU-NEXT:    jl .LBB20_2
+; MCU-NEXT:    jl .LBB19_2
 ; MCU-NEXT:  # %bb.1:
 ; MCU-NEXT:    movl %ecx, %edx
-; MCU-NEXT:  .LBB20_2:
+; MCU-NEXT:  .LBB19_2:
 ; MCU-NEXT:    movl %edx, %eax
 ; MCU-NEXT:    retl
   %cmp = icmp slt i32 %x, 15
@@ -902,10 +839,10 @@ define void @clamp_i8(i32 %src, i8* %dst) {
 ; GENERIC-NEXT:    cmovlel %edi, %eax
 ; GENERIC-NEXT:    cmpl $-128, %eax
 ; GENERIC-NEXT:    movb $-128, %cl
-; GENERIC-NEXT:    jl LBB22_2
+; GENERIC-NEXT:    jl LBB21_2
 ; GENERIC-NEXT:  ## %bb.1:
 ; GENERIC-NEXT:    movl %eax, %ecx
-; GENERIC-NEXT:  LBB22_2:
+; GENERIC-NEXT:  LBB21_2:
 ; GENERIC-NEXT:    movb %cl, (%rsi)
 ; GENERIC-NEXT:    retq
 ;
@@ -916,10 +853,10 @@ define void @clamp_i8(i32 %src, i8* %dst) {
 ; ATOM-NEXT:    movb $-128, %cl
 ; ATOM-NEXT:    cmovlel %edi, %eax
 ; ATOM-NEXT:    cmpl $-128, %eax
-; ATOM-NEXT:    jl LBB22_2
+; ATOM-NEXT:    jl LBB21_2
 ; ATOM-NEXT:  ## %bb.1:
 ; ATOM-NEXT:    movl %eax, %ecx
-; ATOM-NEXT:  LBB22_2:
+; ATOM-NEXT:  LBB21_2:
 ; ATOM-NEXT:    movb %cl, (%rsi)
 ; ATOM-NEXT:    retq
 ;
@@ -927,16 +864,16 @@ define void @clamp_i8(i32 %src, i8* %dst) {
 ; MCU:       # %bb.0:
 ; MCU-NEXT:    cmpl $127, %eax
 ; MCU-NEXT:    movl $127, %ecx
-; MCU-NEXT:    jg .LBB22_2
+; MCU-NEXT:    jg .LBB21_2
 ; MCU-NEXT:  # %bb.1:
 ; MCU-NEXT:    movl %eax, %ecx
-; MCU-NEXT:  .LBB22_2:
+; MCU-NEXT:  .LBB21_2:
 ; MCU-NEXT:    cmpl $-128, %ecx
 ; MCU-NEXT:    movb $-128, %al
-; MCU-NEXT:    jl .LBB22_4
+; MCU-NEXT:    jl .LBB21_4
 ; MCU-NEXT:  # %bb.3:
 ; MCU-NEXT:    movl %ecx, %eax
-; MCU-NEXT:  .LBB22_4:
+; MCU-NEXT:  .LBB21_4:
 ; MCU-NEXT:    movb %al, (%edx)
 ; MCU-NEXT:    retl
   %cmp = icmp sgt i32 %src, 127
@@ -976,16 +913,16 @@ define void @clamp(i32 %src, i16* %dst) {
 ; MCU:       # %bb.0:
 ; MCU-NEXT:    cmpl $32767, %eax # imm = 0x7FFF
 ; MCU-NEXT:    movl $32767, %ecx # imm = 0x7FFF
-; MCU-NEXT:    jg .LBB23_2
+; MCU-NEXT:    jg .LBB22_2
 ; MCU-NEXT:  # %bb.1:
 ; MCU-NEXT:    movl %eax, %ecx
-; MCU-NEXT:  .LBB23_2:
+; MCU-NEXT:  .LBB22_2:
 ; MCU-NEXT:    cmpl $-32768, %ecx # imm = 0x8000
 ; MCU-NEXT:    movl $32768, %eax # imm = 0x8000
-; MCU-NEXT:    jl .LBB23_4
+; MCU-NEXT:    jl .LBB22_4
 ; MCU-NEXT:  # %bb.3:
 ; MCU-NEXT:    movl %ecx, %eax
-; MCU-NEXT:  .LBB23_4:
+; MCU-NEXT:  .LBB22_4:
 ; MCU-NEXT:    movw %ax, (%edx)
 ; MCU-NEXT:    retl
   %cmp = icmp sgt i32 %src, 32767
@@ -1009,19 +946,19 @@ define void @test19() {
 ; CHECK-NEXT:    movl $-1, %eax
 ; CHECK-NEXT:    movb $1, %cl
 ; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  LBB24_1: ## %CF
+; CHECK-NEXT:  LBB23_1: ## %CF
 ; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    testb %cl, %cl
-; CHECK-NEXT:    jne LBB24_1
+; CHECK-NEXT:    jne LBB23_1
 ; CHECK-NEXT:  ## %bb.2: ## %CF250
-; CHECK-NEXT:    ## in Loop: Header=BB24_1 Depth=1
-; CHECK-NEXT:    jne LBB24_1
+; CHECK-NEXT:    ## in Loop: Header=BB23_1 Depth=1
+; CHECK-NEXT:    jne LBB23_1
 ; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  LBB24_3: ## %CF242
+; CHECK-NEXT:  LBB23_3: ## %CF242
 ; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    cmpl %eax, %eax
 ; CHECK-NEXT:    ucomiss %xmm0, %xmm0
-; CHECK-NEXT:    jp LBB24_3
+; CHECK-NEXT:    jp LBB23_3
 ; CHECK-NEXT:  ## %bb.4: ## %CF244
 ; CHECK-NEXT:    retq
 ;
@@ -1030,24 +967,24 @@ define void @test19() {
 ; MCU-NEXT:    movl $-1, %ecx
 ; MCU-NEXT:    movb $1, %al
 ; MCU-NEXT:    .p2align 4, 0x90
-; MCU-NEXT:  .LBB24_1: # %CF
+; MCU-NEXT:  .LBB23_1: # %CF
 ; MCU-NEXT:    # =>This Inner Loop Header: Depth=1
 ; MCU-NEXT:    testb %al, %al
-; MCU-NEXT:    jne .LBB24_1
+; MCU-NEXT:    jne .LBB23_1
 ; MCU-NEXT:  # %bb.2: # %CF250
-; MCU-NEXT:    # in Loop: Header=BB24_1 Depth=1
-; MCU-NEXT:    jne .LBB24_1
+; MCU-NEXT:    # in Loop: Header=BB23_1 Depth=1
+; MCU-NEXT:    jne .LBB23_1
 ; MCU-NEXT:  # %bb.3: # %CF242.preheader
 ; MCU-NEXT:    fldz
 ; MCU-NEXT:    .p2align 4, 0x90
-; MCU-NEXT:  .LBB24_4: # %CF242
+; MCU-NEXT:  .LBB23_4: # %CF242
 ; MCU-NEXT:    # =>This Inner Loop Header: Depth=1
 ; MCU-NEXT:    cmpl %eax, %ecx
 ; MCU-NEXT:    fucom %st(0)
 ; MCU-NEXT:    fnstsw %ax
 ; MCU-NEXT:    # kill: def $ah killed $ah killed $ax
 ; MCU-NEXT:    sahf
-; MCU-NEXT:    jp .LBB24_4
+; MCU-NEXT:    jp .LBB23_4
 ; MCU-NEXT:  # %bb.5: # %CF244
 ; MCU-NEXT:    fstp %st(0)
 ; MCU-NEXT:    retl
@@ -1116,10 +1053,10 @@ define i16 @select_xor_1b(i16 %A, i8 %cond) {
 ; MCU-LABEL: select_xor_1b:
 ; MCU:       # %bb.0: # %entry
 ; MCU-NEXT:    testb $1, %dl
-; MCU-NEXT:    je .LBB26_2
+; MCU-NEXT:    je .LBB25_2
 ; MCU-NEXT:  # %bb.1:
 ; MCU-NEXT:    xorl $43, %eax
-; MCU-NEXT:  .LBB26_2: # %entry
+; MCU-NEXT:  .LBB25_2: # %entry
 ; MCU-NEXT:    # kill: def $ax killed $ax killed $eax
 ; MCU-NEXT:    retl
 entry:
@@ -1168,10 +1105,10 @@ define i32 @select_xor_2b(i32 %A, i32 %B, i8 %cond) {
 ; MCU-LABEL: select_xor_2b:
 ; MCU:       # %bb.0: # %entry
 ; MCU-NEXT:    testb $1, %cl
-; MCU-NEXT:    je .LBB28_2
+; MCU-NEXT:    je .LBB27_2
 ; MCU-NEXT:  # %bb.1:
 ; MCU-NEXT:    xorl %edx, %eax
-; MCU-NEXT:  .LBB28_2: # %entry
+; MCU-NEXT:  .LBB27_2: # %entry
 ; MCU-NEXT:    retl
 entry:
  %and = and i8 %cond, 1
@@ -1219,10 +1156,10 @@ define i32 @select_or_b(i32 %A, i32 %B, i8 %cond) {
 ; MCU-LABEL: select_or_b:
 ; MCU:       # %bb.0: # %entry
 ; MCU-NEXT:    testb $1, %cl
-; MCU-NEXT:    je .LBB30_2
+; MCU-NEXT:    je .LBB29_2
 ; MCU-NEXT:  # %bb.1:
 ; MCU-NEXT:    orl %edx, %eax
-; MCU-NEXT:  .LBB30_2: # %entry
+; MCU-NEXT:  .LBB29_2: # %entry
 ; MCU-NEXT:    retl
 entry:
  %and = and i8 %cond, 1
@@ -1270,10 +1207,10 @@ define i32 @select_or_1b(i32 %A, i32 %B, i32 %cond) {
 ; MCU-LABEL: select_or_1b:
 ; MCU:       # %bb.0: # %entry
 ; MCU-NEXT:    testb $1, %cl
-; MCU-NEXT:    je .LBB32_2
+; MCU-NEXT:    je .LBB31_2
 ; MCU-NEXT:  # %bb.1:
 ; MCU-NEXT:    orl %edx, %eax
-; MCU-NEXT:  .LBB32_2: # %entry
+; MCU-NEXT:  .LBB31_2: # %entry
 ; MCU-NEXT:    retl
 entry:
  %and = and i32 %cond, 1
diff --git a/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll
new file mode 100644
index 00000000000..a1193afc626
--- /dev/null
+++ b/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll
@@ -0,0 +1,196 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s --check-prefixes=X64
+; RUN: llc < %s -mtriple=i686-unknown-linux-gnu | FileCheck %s --check-prefixes=X86
+
+define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
+; X64-LABEL: muloti_test:
+; X64:       # %bb.0: # %start
+; X64-NEXT:    movq %rdx, %r8
+; X64-NEXT:    testq %rcx, %rcx
+; X64-NEXT:    setne %al
+; X64-NEXT:    testq %rsi, %rsi
+; X64-NEXT:    setne %r9b
+; X64-NEXT:    andb %al, %r9b
+; X64-NEXT:    movq %rsi, %rax
+; X64-NEXT:    mulq %rdx
+; X64-NEXT:    movq %rax, %rsi
+; X64-NEXT:    seto %r10b
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    mulq %rdi
+; X64-NEXT:    movq %rax, %rcx
+; X64-NEXT:    seto %r11b
+; X64-NEXT:    orb %r10b, %r11b
+; X64-NEXT:    addq %rsi, %rcx
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    mulq %r8
+; X64-NEXT:    addq %rcx, %rdx
+; X64-NEXT:    setb %cl
+; X64-NEXT:    orb %r11b, %cl
+; X64-NEXT:    orb %r9b, %cl
+; X64-NEXT:    retq
+;
+; X86-LABEL: muloti_test:
+; X86:       # %bb.0: # %start
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    .cfi_def_cfa_offset 12
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    .cfi_def_cfa_offset 16
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 20
+; X86-NEXT:    subl $28, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 48
+; X86-NEXT:    .cfi_offset %esi, -20
+; X86-NEXT:    .cfi_offset %edi, -16
+; X86-NEXT:    .cfi_offset %ebx, -12
+; X86-NEXT:    .cfi_offset %ebp, -8
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    seto {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    seto {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    addl %ecx, %esi
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    addl %esi, %ecx
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    seto {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    seto {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    addl %edi, %esi
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    movl %ebp, %ebx
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    addl %esi, %edi
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %ecx, %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    addl %ebx, %ecx
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %esi, %ebx
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    addl %ebx, %esi
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %ecx
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    mull %edx
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    mull %edx
+; X86-NEXT:    addl %ebx, %eax
+; X86-NEXT:    adcl %ebp, %edx
+; X86-NEXT:    addl %esi, %eax
+; X86-NEXT:    adcl %ecx, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    adcl %edi, %edx
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    cmpl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    setne %cl
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    testl %esi, %esi
+; X86-NEXT:    setne %ch
+; X86-NEXT:    andb %cl, %ch
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    orb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload
+; X86-NEXT:    orb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload
+; X86-NEXT:    orb %ch, %cl
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    cmpl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    setne %cl
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    testl %edi, %edi
+; X86-NEXT:    setne %bh
+; X86-NEXT:    andb %cl, %bh
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    orb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload
+; X86-NEXT:    orb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    orl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    setne %bl
+; X86-NEXT:    orl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, (%ecx)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, 4(%ecx)
+; X86-NEXT:    movl %eax, 8(%ecx)
+; X86-NEXT:    movl %edx, 12(%ecx)
+; X86-NEXT:    setne %al
+; X86-NEXT:    andb %bl, %al
+; X86-NEXT:    orb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Folded Reload
+; X86-NEXT:    orb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Folded Reload
+; X86-NEXT:    orb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Folded Reload
+; X86-NEXT:    orb %bh, %al
+; X86-NEXT:    andb $1, %al
+; X86-NEXT:    movb %al, 16(%ecx)
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    addl $28, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 20
+; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 16
+; X86-NEXT:    popl %edi
+; X86-NEXT:    .cfi_def_cfa_offset 12
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    retl $4
+start:
+  %0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2
+  %1 = extractvalue { i128, i1 } %0, 0
+  %2 = extractvalue { i128, i1 } %0, 1
+  %3 = zext i1 %2 to i8
+  %4 = insertvalue { i128, i8 } undef, i128 %1, 0
+  %5 = insertvalue { i128, i8 } %4, i8 %3, 1
+  ret { i128, i8 } %5
+}
+
+; Function Attrs: nounwind readnone speculatable
+declare { i128, i1 } @llvm.umul.with.overflow.i128(i128, i128) #1
+
+attributes #0 = { nounwind readnone uwtable }
+attributes #1 = { nounwind readnone speculatable }
+attributes #2 = { nounwind }
diff --git a/llvm/test/CodeGen/X86/umulo-64-legalisation-lowering.ll b/llvm/test/CodeGen/X86/umulo-64-legalisation-lowering.ll
new file mode 100644
index 00000000000..044e5f2da28
--- /dev/null
+++ b/llvm/test/CodeGen/X86/umulo-64-legalisation-lowering.ll
@@ -0,0 +1,66 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-linux-gnu | FileCheck %s --check-prefixes=X86
+
+define { i64, i8 } @mulodi_test(i64 %l, i64 %r) unnamed_addr #0 {
+; X86-LABEL: mulodi_test:
+; X86:       # %bb.0: # %start
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    .cfi_def_cfa_offset 12
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    .cfi_def_cfa_offset 16
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 20
+; X86-NEXT:    .cfi_offset %esi, -20
+; X86-NEXT:    .cfi_offset %edi, -16
+; X86-NEXT:    .cfi_offset %ebx, -12
+; X86-NEXT:    .cfi_offset %ebp, -8
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    testl %esi, %esi
+; X86-NEXT:    setne %dl
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    setne %bl
+; X86-NEXT:    andb %dl, %bl
+; X86-NEXT:    mull {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    seto %cl
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    seto %ch
+; X86-NEXT:    orb %cl, %ch
+; X86-NEXT:    addl %edi, %esi
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    mull {{[0-9]+}}(%esp)
+; X86-NEXT:    addl %esi, %edx
+; X86-NEXT:    setb %cl
+; X86-NEXT:    orb %ch, %cl
+; X86-NEXT:    orb %bl, %cl
+; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 16
+; X86-NEXT:    popl %edi
+; X86-NEXT:    .cfi_def_cfa_offset 12
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    retl
+start:
+  %0 = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %l, i64 %r) #2
+  %1 = extractvalue { i64, i1 } %0, 0
+  %2 = extractvalue { i64, i1 } %0, 1
+  %3 = zext i1 %2 to i8
+  %4 = insertvalue { i64, i8 } undef, i64 %1, 0
+  %5 = insertvalue { i64, i8 } %4, i8 %3, 1
+  ret { i64, i8 } %5
+}
+
+; Function Attrs: nounwind readnone speculatable
+declare { i64, i1 } @llvm.umul.with.overflow.i64(i64, i64) #1
+
+attributes #0 = { nounwind readnone uwtable }
+attributes #1 = { nounwind readnone speculatable }
+attributes #2 = { nounwind }