summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/optimize-max-0.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/X86/optimize-max-0.ll')
-rw-r--r--llvm/test/CodeGen/X86/optimize-max-0.ll418
1 files changed, 417 insertions, 1 deletions
diff --git a/llvm/test/CodeGen/X86/optimize-max-0.ll b/llvm/test/CodeGen/X86/optimize-max-0.ll
index b5e8627a88b..5fb2371cb8e 100644
--- a/llvm/test/CodeGen/X86/optimize-max-0.ll
+++ b/llvm/test/CodeGen/X86/optimize-max-0.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s | not grep cmov
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s | FileCheck %s
; LSR should be able to eliminate the max computations by
; making the loops use slt/ult comparisons instead of ne comparisons.
@@ -7,6 +8,219 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
target triple = "i386-apple-darwin9"
define void @foo(i8* %r, i32 %s, i32 %w, i32 %x, i8* %j, i32 %d) nounwind {
+; CHECK-LABEL: foo:
+; CHECK: ## %bb.0: ## %entry
+; CHECK-NEXT: pushl %ebp
+; CHECK-NEXT: pushl %ebx
+; CHECK-NEXT: pushl %edi
+; CHECK-NEXT: pushl %esi
+; CHECK-NEXT: subl $28, %esp
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; CHECK-NEXT: movl %ebx, %eax
+; CHECK-NEXT: imull %edi, %eax
+; CHECK-NEXT: cmpl $1, {{[0-9]+}}(%esp)
+; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; CHECK-NEXT: je LBB0_19
+; CHECK-NEXT: ## %bb.1: ## %bb10.preheader
+; CHECK-NEXT: movl %eax, %ebp
+; CHECK-NEXT: sarl $31, %ebp
+; CHECK-NEXT: shrl $30, %ebp
+; CHECK-NEXT: addl %eax, %ebp
+; CHECK-NEXT: sarl $2, %ebp
+; CHECK-NEXT: testl %ebx, %ebx
+; CHECK-NEXT: jle LBB0_12
+; CHECK-NEXT: ## %bb.2: ## %bb.nph9
+; CHECK-NEXT: testl %edi, %edi
+; CHECK-NEXT: jle LBB0_12
+; CHECK-NEXT: ## %bb.3: ## %bb.nph9.split
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: incl %eax
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: xorl %esi, %esi
+; CHECK-NEXT: .p2align 4, 0x90
+; CHECK-NEXT: LBB0_4: ## %bb6
+; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: movzbl (%eax,%esi,2), %ebx
+; CHECK-NEXT: movb %bl, (%edx,%esi)
+; CHECK-NEXT: incl %esi
+; CHECK-NEXT: cmpl %edi, %esi
+; CHECK-NEXT: jl LBB0_4
+; CHECK-NEXT: ## %bb.5: ## %bb9
+; CHECK-NEXT: ## in Loop: Header=BB0_4 Depth=1
+; CHECK-NEXT: incl %ecx
+; CHECK-NEXT: addl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: addl %edi, %edx
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; CHECK-NEXT: cmpl %ebx, %ecx
+; CHECK-NEXT: je LBB0_12
+; CHECK-NEXT: ## %bb.6: ## %bb7.preheader
+; CHECK-NEXT: ## in Loop: Header=BB0_4 Depth=1
+; CHECK-NEXT: xorl %esi, %esi
+; CHECK-NEXT: jmp LBB0_4
+; CHECK-NEXT: LBB0_12: ## %bb18.loopexit
+; CHECK-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
+; CHECK-NEXT: addl %ebp, %eax
+; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; CHECK-NEXT: cmpl $1, %ebx
+; CHECK-NEXT: jle LBB0_13
+; CHECK-NEXT: ## %bb.7: ## %bb.nph5
+; CHECK-NEXT: cmpl $2, {{[0-9]+}}(%esp)
+; CHECK-NEXT: jl LBB0_13
+; CHECK-NEXT: ## %bb.8: ## %bb.nph5.split
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: movl %eax, %edx
+; CHECK-NEXT: shrl $31, %edx
+; CHECK-NEXT: addl %eax, %edx
+; CHECK-NEXT: sarl %edx
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: movl %eax, %ecx
+; CHECK-NEXT: shrl $31, %ecx
+; CHECK-NEXT: addl %eax, %ecx
+; CHECK-NEXT: sarl %ecx
+; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
+; CHECK-NEXT: addl %ecx, %eax
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
+; CHECK-NEXT: leal 2(%esi), %esi
+; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload
+; CHECK-NEXT: addl %esi, %ecx
+; CHECK-NEXT: xorl %ebx, %ebx
+; CHECK-NEXT: xorl %ebp, %ebp
+; CHECK-NEXT: .p2align 4, 0x90
+; CHECK-NEXT: LBB0_9: ## %bb13
+; CHECK-NEXT: ## =>This Loop Header: Depth=1
+; CHECK-NEXT: ## Child Loop BB0_10 Depth 2
+; CHECK-NEXT: movl %ebp, %esi
+; CHECK-NEXT: shrl $31, %esi
+; CHECK-NEXT: addl %ebp, %esi
+; CHECK-NEXT: andl $-2, %esi
+; CHECK-NEXT: movl %ebp, %edi
+; CHECK-NEXT: subl %esi, %edi
+; CHECK-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; CHECK-NEXT: addl %ebx, %edi
+; CHECK-NEXT: imull {{[0-9]+}}(%esp), %edi
+; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
+; CHECK-NEXT: xorl %esi, %esi
+; CHECK-NEXT: .p2align 4, 0x90
+; CHECK-NEXT: LBB0_10: ## %bb14
+; CHECK-NEXT: ## Parent Loop BB0_9 Depth=1
+; CHECK-NEXT: ## => This Inner Loop Header: Depth=2
+; CHECK-NEXT: movzbl -2(%edi,%esi,4), %ebx
+; CHECK-NEXT: movb %bl, (%ecx,%esi)
+; CHECK-NEXT: movzbl (%edi,%esi,4), %ebx
+; CHECK-NEXT: movb %bl, (%eax,%esi)
+; CHECK-NEXT: incl %esi
+; CHECK-NEXT: cmpl %edx, %esi
+; CHECK-NEXT: jl LBB0_10
+; CHECK-NEXT: ## %bb.11: ## %bb17
+; CHECK-NEXT: ## in Loop: Header=BB0_9 Depth=1
+; CHECK-NEXT: incl %ebp
+; CHECK-NEXT: addl %edx, %eax
+; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload
+; CHECK-NEXT: addl $2, %ebx
+; CHECK-NEXT: addl %edx, %ecx
+; CHECK-NEXT: cmpl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload
+; CHECK-NEXT: jl LBB0_9
+; CHECK-NEXT: LBB0_13: ## %bb20
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
+; CHECK-NEXT: cmpl $1, %edx
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; CHECK-NEXT: je LBB0_19
+; CHECK-NEXT: ## %bb.14: ## %bb20
+; CHECK-NEXT: cmpl $3, %edx
+; CHECK-NEXT: jne LBB0_24
+; CHECK-NEXT: ## %bb.15: ## %bb22
+; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload
+; CHECK-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; CHECK-NEXT: testl %ebx, %ebx
+; CHECK-NEXT: jle LBB0_18
+; CHECK-NEXT: ## %bb.16: ## %bb.nph
+; CHECK-NEXT: leal 15(%ebx), %eax
+; CHECK-NEXT: andl $-16, %eax
+; CHECK-NEXT: imull {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: addl %eax, %ebp
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: addl $15, %eax
+; CHECK-NEXT: andl $-16, %eax
+; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; CHECK-NEXT: addl %esi, %esi
+; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload
+; CHECK-NEXT: addl {{[0-9]+}}(%esp), %esi
+; CHECK-NEXT: .p2align 4, 0x90
+; CHECK-NEXT: LBB0_17: ## %bb23
+; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: subl $4, %esp
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi
+; CHECK-NEXT: pushl %edi
+; CHECK-NEXT: pushl %ebp
+; CHECK-NEXT: pushl %esi
+; CHECK-NEXT: calll _memcpy
+; CHECK-NEXT: addl $16, %esp
+; CHECK-NEXT: addl %edi, %esi
+; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload
+; CHECK-NEXT: decl %ebx
+; CHECK-NEXT: jne LBB0_17
+; CHECK-NEXT: LBB0_18: ## %bb26
+; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
+; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload
+; CHECK-NEXT: addl %ecx, %esi
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
+; CHECK-NEXT: addl %esi, %edx
+; CHECK-NEXT: jmp LBB0_23
+; CHECK-NEXT: LBB0_19: ## %bb29
+; CHECK-NEXT: testl %ebx, %ebx
+; CHECK-NEXT: jle LBB0_22
+; CHECK-NEXT: ## %bb.20: ## %bb.nph11
+; CHECK-NEXT: movl %edi, %esi
+; CHECK-NEXT: leal 15(%edi), %eax
+; CHECK-NEXT: andl $-16, %eax
+; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi
+; CHECK-NEXT: .p2align 4, 0x90
+; CHECK-NEXT: LBB0_21: ## %bb30
+; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: subl $4, %esp
+; CHECK-NEXT: pushl %esi
+; CHECK-NEXT: pushl %ebp
+; CHECK-NEXT: pushl %edi
+; CHECK-NEXT: calll _memcpy
+; CHECK-NEXT: addl $16, %esp
+; CHECK-NEXT: addl %esi, %edi
+; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload
+; CHECK-NEXT: decl %ebx
+; CHECK-NEXT: jne LBB0_21
+; CHECK-NEXT: LBB0_22: ## %bb33
+; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
+; CHECK-NEXT: addl %ecx, %edx
+; CHECK-NEXT: LBB0_23: ## %bb33
+; CHECK-NEXT: movl %ecx, %eax
+; CHECK-NEXT: shrl $31, %eax
+; CHECK-NEXT: addl %ecx, %eax
+; CHECK-NEXT: sarl %eax
+; CHECK-NEXT: subl $4, %esp
+; CHECK-NEXT: pushl %eax
+; CHECK-NEXT: pushl $128
+; CHECK-NEXT: pushl %edx
+; CHECK-NEXT: calll _memset
+; CHECK-NEXT: addl $44, %esp
+; CHECK-NEXT: LBB0_25: ## %return
+; CHECK-NEXT: popl %esi
+; CHECK-NEXT: popl %edi
+; CHECK-NEXT: popl %ebx
+; CHECK-NEXT: popl %ebp
+; CHECK-NEXT: retl
+; CHECK-NEXT: LBB0_24: ## %return
+; CHECK-NEXT: addl $28, %esp
+; CHECK-NEXT: jmp LBB0_25
entry:
%0 = mul i32 %x, %w
%1 = mul i32 %x, %w
@@ -232,6 +446,208 @@ return: ; preds = %bb20
}
define void @bar(i8* %r, i32 %s, i32 %w, i32 %x, i8* %j, i32 %d) nounwind {
+; CHECK-LABEL: bar:
+; CHECK: ## %bb.0: ## %entry
+; CHECK-NEXT: pushl %ebp
+; CHECK-NEXT: pushl %ebx
+; CHECK-NEXT: pushl %edi
+; CHECK-NEXT: pushl %esi
+; CHECK-NEXT: subl $28, %esp
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi
+; CHECK-NEXT: movl %ebp, %eax
+; CHECK-NEXT: imull %ecx, %eax
+; CHECK-NEXT: cmpl $1, {{[0-9]+}}(%esp)
+; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; CHECK-NEXT: je LBB1_19
+; CHECK-NEXT: ## %bb.1: ## %bb10.preheader
+; CHECK-NEXT: shrl $2, %eax
+; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; CHECK-NEXT: testl %ebp, %ebp
+; CHECK-NEXT: je LBB1_12
+; CHECK-NEXT: ## %bb.2: ## %bb.nph9
+; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp)
+; CHECK-NEXT: je LBB1_12
+; CHECK-NEXT: ## %bb.3: ## %bb.nph9.split
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: incl %eax
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: .p2align 4, 0x90
+; CHECK-NEXT: LBB1_6: ## %bb7.preheader
+; CHECK-NEXT: ## =>This Loop Header: Depth=1
+; CHECK-NEXT: ## Child Loop BB1_4 Depth 2
+; CHECK-NEXT: xorl %esi, %esi
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi
+; CHECK-NEXT: .p2align 4, 0x90
+; CHECK-NEXT: LBB1_4: ## %bb6
+; CHECK-NEXT: ## Parent Loop BB1_6 Depth=1
+; CHECK-NEXT: ## => This Inner Loop Header: Depth=2
+; CHECK-NEXT: movzbl (%eax,%esi,2), %ebx
+; CHECK-NEXT: movb %bl, (%edx,%esi)
+; CHECK-NEXT: incl %esi
+; CHECK-NEXT: cmpl %edi, %esi
+; CHECK-NEXT: jb LBB1_4
+; CHECK-NEXT: ## %bb.5: ## %bb9
+; CHECK-NEXT: ## in Loop: Header=BB1_6 Depth=1
+; CHECK-NEXT: incl %ecx
+; CHECK-NEXT: addl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: addl %edi, %edx
+; CHECK-NEXT: cmpl %ebp, %ecx
+; CHECK-NEXT: jne LBB1_6
+; CHECK-NEXT: LBB1_12: ## %bb18.loopexit
+; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
+; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
+; CHECK-NEXT: addl %ecx, %eax
+; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; CHECK-NEXT: cmpl $1, %ebp
+; CHECK-NEXT: jbe LBB1_13
+; CHECK-NEXT: ## %bb.7: ## %bb.nph5
+; CHECK-NEXT: cmpl $2, {{[0-9]+}}(%esp)
+; CHECK-NEXT: jb LBB1_13
+; CHECK-NEXT: ## %bb.8: ## %bb.nph5.split
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; CHECK-NEXT: shrl %ebp
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: shrl %eax
+; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
+; CHECK-NEXT: addl %eax, %ecx
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
+; CHECK-NEXT: leal 2(%edx), %edx
+; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
+; CHECK-NEXT: addl %edx, %eax
+; CHECK-NEXT: xorl %edx, %edx
+; CHECK-NEXT: xorl %edi, %edi
+; CHECK-NEXT: .p2align 4, 0x90
+; CHECK-NEXT: LBB1_9: ## %bb13
+; CHECK-NEXT: ## =>This Loop Header: Depth=1
+; CHECK-NEXT: ## Child Loop BB1_10 Depth 2
+; CHECK-NEXT: movl %edi, %ebx
+; CHECK-NEXT: andl $1, %ebx
+; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; CHECK-NEXT: addl %edx, %ebx
+; CHECK-NEXT: imull {{[0-9]+}}(%esp), %ebx
+; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload
+; CHECK-NEXT: xorl %esi, %esi
+; CHECK-NEXT: .p2align 4, 0x90
+; CHECK-NEXT: LBB1_10: ## %bb14
+; CHECK-NEXT: ## Parent Loop BB1_9 Depth=1
+; CHECK-NEXT: ## => This Inner Loop Header: Depth=2
+; CHECK-NEXT: movzbl -2(%ebx,%esi,4), %edx
+; CHECK-NEXT: movb %dl, (%eax,%esi)
+; CHECK-NEXT: movzbl (%ebx,%esi,4), %edx
+; CHECK-NEXT: movb %dl, (%ecx,%esi)
+; CHECK-NEXT: incl %esi
+; CHECK-NEXT: cmpl %ebp, %esi
+; CHECK-NEXT: jb LBB1_10
+; CHECK-NEXT: ## %bb.11: ## %bb17
+; CHECK-NEXT: ## in Loop: Header=BB1_9 Depth=1
+; CHECK-NEXT: incl %edi
+; CHECK-NEXT: addl %ebp, %ecx
+; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
+; CHECK-NEXT: addl $2, %edx
+; CHECK-NEXT: addl %ebp, %eax
+; CHECK-NEXT: cmpl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
+; CHECK-NEXT: jb LBB1_9
+; CHECK-NEXT: LBB1_13: ## %bb20
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
+; CHECK-NEXT: cmpl $1, %edx
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi
+; CHECK-NEXT: je LBB1_19
+; CHECK-NEXT: ## %bb.14: ## %bb20
+; CHECK-NEXT: cmpl $3, %edx
+; CHECK-NEXT: jne LBB1_24
+; CHECK-NEXT: ## %bb.15: ## %bb22
+; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
+; CHECK-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; CHECK-NEXT: testl %ebp, %ebp
+; CHECK-NEXT: je LBB1_18
+; CHECK-NEXT: ## %bb.16: ## %bb.nph
+; CHECK-NEXT: movl %ebp, %esi
+; CHECK-NEXT: leal 15(%ebp), %eax
+; CHECK-NEXT: andl $-16, %eax
+; CHECK-NEXT: imull {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: leal 15(%ecx), %ebx
+; CHECK-NEXT: andl $-16, %ebx
+; CHECK-NEXT: addl %eax, %edi
+; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
+; CHECK-NEXT: leal (%edx,%eax), %ebp
+; CHECK-NEXT: .p2align 4, 0x90
+; CHECK-NEXT: LBB1_17: ## %bb23
+; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: subl $4, %esp
+; CHECK-NEXT: pushl %ecx
+; CHECK-NEXT: pushl %edi
+; CHECK-NEXT: pushl %ebp
+; CHECK-NEXT: calll _memcpy
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT: addl $16, %esp
+; CHECK-NEXT: addl %ecx, %ebp
+; CHECK-NEXT: addl %ebx, %edi
+; CHECK-NEXT: decl %esi
+; CHECK-NEXT: jne LBB1_17
+; CHECK-NEXT: LBB1_18: ## %bb26
+; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
+; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
+; CHECK-NEXT: addl %ecx, %eax
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
+; CHECK-NEXT: addl %eax, %edx
+; CHECK-NEXT: shrl %ecx
+; CHECK-NEXT: subl $4, %esp
+; CHECK-NEXT: pushl %ecx
+; CHECK-NEXT: pushl $128
+; CHECK-NEXT: pushl %edx
+; CHECK-NEXT: jmp LBB1_23
+; CHECK-NEXT: LBB1_19: ## %bb29
+; CHECK-NEXT: testl %ebp, %ebp
+; CHECK-NEXT: je LBB1_22
+; CHECK-NEXT: ## %bb.20: ## %bb.nph11
+; CHECK-NEXT: movl %ebp, %esi
+; CHECK-NEXT: leal 15(%ecx), %ebx
+; CHECK-NEXT: andl $-16, %ebx
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; CHECK-NEXT: .p2align 4, 0x90
+; CHECK-NEXT: LBB1_21: ## %bb30
+; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: subl $4, %esp
+; CHECK-NEXT: pushl %ecx
+; CHECK-NEXT: pushl %edi
+; CHECK-NEXT: pushl %ebp
+; CHECK-NEXT: calll _memcpy
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT: addl $16, %esp
+; CHECK-NEXT: addl %ecx, %ebp
+; CHECK-NEXT: addl %ebx, %edi
+; CHECK-NEXT: decl %esi
+; CHECK-NEXT: jne LBB1_21
+; CHECK-NEXT: LBB1_22: ## %bb33
+; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT: addl %eax, %ecx
+; CHECK-NEXT: shrl %eax
+; CHECK-NEXT: subl $4, %esp
+; CHECK-NEXT: pushl %eax
+; CHECK-NEXT: pushl $128
+; CHECK-NEXT: pushl %ecx
+; CHECK-NEXT: LBB1_23: ## %bb33
+; CHECK-NEXT: calll _memset
+; CHECK-NEXT: addl $44, %esp
+; CHECK-NEXT: LBB1_25: ## %return
+; CHECK-NEXT: popl %esi
+; CHECK-NEXT: popl %edi
+; CHECK-NEXT: popl %ebx
+; CHECK-NEXT: popl %ebp
+; CHECK-NEXT: retl
+; CHECK-NEXT: LBB1_24: ## %return
+; CHECK-NEXT: addl $28, %esp
+; CHECK-NEXT: jmp LBB1_25
entry:
%0 = mul i32 %x, %w
%1 = mul i32 %x, %w
OpenPOWER on IntegriCloud