diff options
Diffstat (limited to 'llvm/test/CodeGen/X86/optimize-max-0.ll')
-rw-r--r-- | llvm/test/CodeGen/X86/optimize-max-0.ll | 418 |
1 files changed, 417 insertions, 1 deletions
diff --git a/llvm/test/CodeGen/X86/optimize-max-0.ll b/llvm/test/CodeGen/X86/optimize-max-0.ll index b5e8627a88b..5fb2371cb8e 100644 --- a/llvm/test/CodeGen/X86/optimize-max-0.ll +++ b/llvm/test/CodeGen/X86/optimize-max-0.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s | not grep cmov +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck %s ; LSR should be able to eliminate the max computations by ; making the loops use slt/ult comparisons instead of ne comparisons. @@ -7,6 +8,219 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3 target triple = "i386-apple-darwin9" define void @foo(i8* %r, i32 %s, i32 %w, i32 %x, i8* %j, i32 %d) nounwind { +; CHECK-LABEL: foo: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: pushl %ebx +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: subl $28, %esp +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp +; CHECK-NEXT: movl %ebx, %eax +; CHECK-NEXT: imull %edi, %eax +; CHECK-NEXT: cmpl $1, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: je LBB0_19 +; CHECK-NEXT: ## %bb.1: ## %bb10.preheader +; CHECK-NEXT: movl %eax, %ebp +; CHECK-NEXT: sarl $31, %ebp +; CHECK-NEXT: shrl $30, %ebp +; CHECK-NEXT: addl %eax, %ebp +; CHECK-NEXT: sarl $2, %ebp +; CHECK-NEXT: testl %ebx, %ebx +; CHECK-NEXT: jle LBB0_12 +; CHECK-NEXT: ## %bb.2: ## %bb.nph9 +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: jle LBB0_12 +; CHECK-NEXT: ## %bb.3: ## %bb.nph9.split +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: incl %eax +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: xorl %esi, %esi +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB0_4: ## %bb6 +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movzbl (%eax,%esi,2), %ebx +; CHECK-NEXT: movb %bl, (%edx,%esi) +; CHECK-NEXT: incl %esi +; CHECK-NEXT: cmpl %edi, %esi +; CHECK-NEXT: jl LBB0_4 +; CHECK-NEXT: ## %bb.5: ## %bb9 +; CHECK-NEXT: ## in Loop: Header=BB0_4 Depth=1 +; CHECK-NEXT: incl %ecx +; CHECK-NEXT: addl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: addl %edi, %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx +; CHECK-NEXT: cmpl %ebx, %ecx +; CHECK-NEXT: je LBB0_12 +; CHECK-NEXT: ## %bb.6: ## %bb7.preheader +; CHECK-NEXT: ## in Loop: Header=BB0_4 Depth=1 +; CHECK-NEXT: xorl %esi, %esi +; CHECK-NEXT: jmp LBB0_4 +; CHECK-NEXT: LBB0_12: ## %bb18.loopexit +; CHECK-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload +; CHECK-NEXT: addl %ebp, %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: cmpl $1, %ebx +; CHECK-NEXT: jle LBB0_13 +; CHECK-NEXT: ## %bb.7: ## %bb.nph5 +; CHECK-NEXT: cmpl $2, {{[0-9]+}}(%esp) +; CHECK-NEXT: jl LBB0_13 +; CHECK-NEXT: ## %bb.8: ## %bb.nph5.split +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl %eax, %edx +; CHECK-NEXT: shrl $31, %edx +; CHECK-NEXT: addl %eax, %edx +; CHECK-NEXT: sarl %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: shrl $31, %ecx +; CHECK-NEXT: addl %eax, %ecx +; CHECK-NEXT: sarl %ecx +; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload +; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: leal 2(%esi), %esi +; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload +; CHECK-NEXT: addl %esi, %ecx +; CHECK-NEXT: xorl %ebx, %ebx +; CHECK-NEXT: xorl %ebp, %ebp +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB0_9: ## %bb13 +; CHECK-NEXT: ## =>This Loop Header: Depth=1 +; CHECK-NEXT: ## Child Loop BB0_10 Depth 2 +; CHECK-NEXT: movl %ebp, %esi +; CHECK-NEXT: shrl $31, %esi +; CHECK-NEXT: addl %ebp, %esi +; CHECK-NEXT: andl $-2, %esi +; CHECK-NEXT: movl %ebp, %edi +; CHECK-NEXT: subl %esi, %edi +; CHECK-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: addl %ebx, %edi +; CHECK-NEXT: imull {{[0-9]+}}(%esp), %edi +; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload +; CHECK-NEXT: xorl %esi, %esi +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB0_10: ## %bb14 +; CHECK-NEXT: ## Parent Loop BB0_9 Depth=1 +; CHECK-NEXT: ## => This Inner Loop Header: Depth=2 +; CHECK-NEXT: movzbl -2(%edi,%esi,4), %ebx +; CHECK-NEXT: movb %bl, (%ecx,%esi) +; CHECK-NEXT: movzbl (%edi,%esi,4), %ebx +; CHECK-NEXT: movb %bl, (%eax,%esi) +; CHECK-NEXT: incl %esi +; CHECK-NEXT: cmpl %edx, %esi +; CHECK-NEXT: jl LBB0_10 +; CHECK-NEXT: ## %bb.11: ## %bb17 +; CHECK-NEXT: ## in Loop: Header=BB0_9 Depth=1 +; CHECK-NEXT: incl %ebp +; CHECK-NEXT: addl %edx, %eax +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload +; CHECK-NEXT: addl $2, %ebx +; CHECK-NEXT: addl %edx, %ecx +; CHECK-NEXT: cmpl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload +; CHECK-NEXT: jl LBB0_9 +; CHECK-NEXT: LBB0_13: ## %bb20 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: cmpl $1, %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp +; CHECK-NEXT: je LBB0_19 +; CHECK-NEXT: ## %bb.14: ## %bb20 +; CHECK-NEXT: cmpl $3, %edx +; CHECK-NEXT: jne LBB0_24 +; CHECK-NEXT: ## %bb.15: ## %bb22 +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload +; CHECK-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill +; CHECK-NEXT: testl %ebx, %ebx +; CHECK-NEXT: jle LBB0_18 +; CHECK-NEXT: ## %bb.16: ## %bb.nph +; CHECK-NEXT: leal 15(%ebx), %eax +; CHECK-NEXT: andl $-16, %eax +; CHECK-NEXT: imull {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: addl %eax, %ebp +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: addl $15, %eax +; CHECK-NEXT: andl $-16, %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: addl %esi, %esi +; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload +; CHECK-NEXT: addl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB0_17: ## %bb23 +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: subl $4, %esp +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: calll _memcpy +; CHECK-NEXT: addl $16, %esp +; CHECK-NEXT: addl %edi, %esi +; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload +; CHECK-NEXT: decl %ebx +; CHECK-NEXT: jne LBB0_17 +; CHECK-NEXT: LBB0_18: ## %bb26 +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload +; CHECK-NEXT: addl %ecx, %esi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: addl %esi, %edx +; CHECK-NEXT: jmp LBB0_23 +; CHECK-NEXT: LBB0_19: ## %bb29 +; CHECK-NEXT: testl %ebx, %ebx +; CHECK-NEXT: jle LBB0_22 +; CHECK-NEXT: ## %bb.20: ## %bb.nph11 +; CHECK-NEXT: movl %edi, %esi +; CHECK-NEXT: leal 15(%edi), %eax +; CHECK-NEXT: andl $-16, %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB0_21: ## %bb30 +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: subl $4, %esp +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: calll _memcpy +; CHECK-NEXT: addl $16, %esp +; CHECK-NEXT: addl %esi, %edi +; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload +; CHECK-NEXT: decl %ebx +; CHECK-NEXT: jne LBB0_21 +; CHECK-NEXT: LBB0_22: ## %bb33 +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: addl %ecx, %edx +; CHECK-NEXT: LBB0_23: ## %bb33 +; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: shrl $31, %eax +; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: sarl %eax +; CHECK-NEXT: subl $4, %esp +; CHECK-NEXT: pushl %eax +; CHECK-NEXT: pushl $128 +; CHECK-NEXT: pushl %edx +; CHECK-NEXT: calll _memset +; CHECK-NEXT: addl $44, %esp +; CHECK-NEXT: LBB0_25: ## %return +; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %edi +; CHECK-NEXT: popl %ebx +; CHECK-NEXT: popl %ebp +; CHECK-NEXT: retl +; CHECK-NEXT: LBB0_24: ## %return +; CHECK-NEXT: addl $28, %esp +; CHECK-NEXT: jmp LBB0_25 entry: %0 = mul i32 %x, %w %1 = mul i32 %x, %w @@ -232,6 +446,208 @@ return: ; preds = %bb20 } define void @bar(i8* %r, i32 %s, i32 %w, i32 %x, i8* %j, i32 %d) nounwind { +; CHECK-LABEL: bar: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: pushl %ebx +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: subl $28, %esp +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-NEXT: movl %ebp, %eax +; CHECK-NEXT: imull %ecx, %eax +; CHECK-NEXT: cmpl $1, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: je LBB1_19 +; CHECK-NEXT: ## %bb.1: ## %bb10.preheader +; CHECK-NEXT: shrl $2, %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: testl %ebp, %ebp +; CHECK-NEXT: je LBB1_12 +; CHECK-NEXT: ## %bb.2: ## %bb.nph9 +; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: je LBB1_12 +; CHECK-NEXT: ## %bb.3: ## %bb.nph9.split +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: incl %eax +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB1_6: ## %bb7.preheader +; CHECK-NEXT: ## =>This Loop Header: Depth=1 +; CHECK-NEXT: ## Child Loop BB1_4 Depth 2 +; CHECK-NEXT: xorl %esi, %esi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB1_4: ## %bb6 +; CHECK-NEXT: ## Parent Loop BB1_6 Depth=1 +; CHECK-NEXT: ## => This Inner Loop Header: Depth=2 +; CHECK-NEXT: movzbl (%eax,%esi,2), %ebx +; CHECK-NEXT: movb %bl, (%edx,%esi) +; CHECK-NEXT: incl %esi +; CHECK-NEXT: cmpl %edi, %esi +; CHECK-NEXT: jb LBB1_4 +; CHECK-NEXT: ## %bb.5: ## %bb9 +; CHECK-NEXT: ## in Loop: Header=BB1_6 Depth=1 +; CHECK-NEXT: incl %ecx +; CHECK-NEXT: addl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: addl %edi, %edx +; CHECK-NEXT: cmpl %ebp, %ecx +; CHECK-NEXT: jne LBB1_6 +; CHECK-NEXT: LBB1_12: ## %bb18.loopexit +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload +; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: cmpl $1, %ebp +; CHECK-NEXT: jbe LBB1_13 +; CHECK-NEXT: ## %bb.7: ## %bb.nph5 +; CHECK-NEXT: cmpl $2, {{[0-9]+}}(%esp) +; CHECK-NEXT: jb LBB1_13 +; CHECK-NEXT: ## %bb.8: ## %bb.nph5.split +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp +; CHECK-NEXT: shrl %ebp +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: shrl %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload +; CHECK-NEXT: addl %eax, %ecx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: leal 2(%edx), %edx +; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload +; CHECK-NEXT: addl %edx, %eax +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB1_9: ## %bb13 +; CHECK-NEXT: ## =>This Loop Header: Depth=1 +; CHECK-NEXT: ## Child Loop BB1_10 Depth 2 +; CHECK-NEXT: movl %edi, %ebx +; CHECK-NEXT: andl $1, %ebx +; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: addl %edx, %ebx +; CHECK-NEXT: imull {{[0-9]+}}(%esp), %ebx +; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload +; CHECK-NEXT: xorl %esi, %esi +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB1_10: ## %bb14 +; CHECK-NEXT: ## Parent Loop BB1_9 Depth=1 +; CHECK-NEXT: ## => This Inner Loop Header: Depth=2 +; CHECK-NEXT: movzbl -2(%ebx,%esi,4), %edx +; CHECK-NEXT: movb %dl, (%eax,%esi) +; CHECK-NEXT: movzbl (%ebx,%esi,4), %edx +; CHECK-NEXT: movb %dl, (%ecx,%esi) +; CHECK-NEXT: incl %esi +; CHECK-NEXT: cmpl %ebp, %esi +; CHECK-NEXT: jb LBB1_10 +; CHECK-NEXT: ## %bb.11: ## %bb17 +; CHECK-NEXT: ## in Loop: Header=BB1_9 Depth=1 +; CHECK-NEXT: incl %edi +; CHECK-NEXT: addl %ebp, %ecx +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload +; CHECK-NEXT: addl $2, %edx +; CHECK-NEXT: addl %ebp, %eax +; CHECK-NEXT: cmpl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload +; CHECK-NEXT: jb LBB1_9 +; CHECK-NEXT: LBB1_13: ## %bb20 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: cmpl $1, %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-NEXT: je LBB1_19 +; CHECK-NEXT: ## %bb.14: ## %bb20 +; CHECK-NEXT: cmpl $3, %edx +; CHECK-NEXT: jne LBB1_24 +; CHECK-NEXT: ## %bb.15: ## %bb22 +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload +; CHECK-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill +; CHECK-NEXT: testl %ebp, %ebp +; CHECK-NEXT: je LBB1_18 +; CHECK-NEXT: ## %bb.16: ## %bb.nph +; CHECK-NEXT: movl %ebp, %esi +; CHECK-NEXT: leal 15(%ebp), %eax +; CHECK-NEXT: andl $-16, %eax +; CHECK-NEXT: imull {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: leal 15(%ecx), %ebx +; CHECK-NEXT: andl $-16, %ebx +; CHECK-NEXT: addl %eax, %edi +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: leal (%edx,%eax), %ebp +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB1_17: ## %bb23 +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: subl $4, %esp +; CHECK-NEXT: pushl %ecx +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: calll _memcpy +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: addl $16, %esp +; CHECK-NEXT: addl %ecx, %ebp +; CHECK-NEXT: addl %ebx, %edi +; CHECK-NEXT: decl %esi +; CHECK-NEXT: jne LBB1_17 +; CHECK-NEXT: LBB1_18: ## %bb26 +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload +; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: addl %eax, %edx +; CHECK-NEXT: shrl %ecx +; CHECK-NEXT: subl $4, %esp +; CHECK-NEXT: pushl %ecx +; CHECK-NEXT: pushl $128 +; CHECK-NEXT: pushl %edx +; CHECK-NEXT: jmp LBB1_23 +; CHECK-NEXT: LBB1_19: ## %bb29 +; CHECK-NEXT: testl %ebp, %ebp +; CHECK-NEXT: je LBB1_22 +; CHECK-NEXT: ## %bb.20: ## %bb.nph11 +; CHECK-NEXT: movl %ebp, %esi +; CHECK-NEXT: leal 15(%ecx), %ebx +; CHECK-NEXT: andl $-16, %ebx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB1_21: ## %bb30 +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: subl $4, %esp +; CHECK-NEXT: pushl %ecx +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: calll _memcpy +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: addl $16, %esp +; CHECK-NEXT: addl %ecx, %ebp +; CHECK-NEXT: addl %ebx, %edi +; CHECK-NEXT: decl %esi +; CHECK-NEXT: jne LBB1_21 +; CHECK-NEXT: LBB1_22: ## %bb33 +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: addl %eax, %ecx +; CHECK-NEXT: shrl %eax +; CHECK-NEXT: subl $4, %esp +; CHECK-NEXT: pushl %eax +; CHECK-NEXT: pushl $128 +; CHECK-NEXT: pushl %ecx +; CHECK-NEXT: LBB1_23: ## %bb33 +; CHECK-NEXT: calll _memset +; CHECK-NEXT: addl $44, %esp +; CHECK-NEXT: LBB1_25: ## %return +; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %edi +; CHECK-NEXT: popl %ebx +; CHECK-NEXT: popl %ebp +; CHECK-NEXT: retl +; CHECK-NEXT: LBB1_24: ## %return +; CHECK-NEXT: addl $28, %esp +; CHECK-NEXT: jmp LBB1_25 entry: %0 = mul i32 %x, %w %1 = mul i32 %x, %w |