diff options
| author | Jatin Bhateja <jatin.bhateja@gmail.com> | 2017-10-04 09:02:10 +0000 |
|---|---|---|
| committer | Jatin Bhateja <jatin.bhateja@gmail.com> | 2017-10-04 09:02:10 +0000 |
| commit | 3c29bacd43ffd9bb15eebcf1afc12106806030bd (patch) | |
| tree | 10750a4a1a3e12086479b7b0022ca840f332e4ce /llvm/test/CodeGen/X86/lea-opt-cse3.ll | |
| parent | ea9dceed777adc46556192a7adc4ae48be1c2113 (diff) | |
| download | bcm5719-llvm-3c29bacd43ffd9bb15eebcf1afc12106806030bd.tar.gz bcm5719-llvm-3c29bacd43ffd9bb15eebcf1afc12106806030bd.zip | |
[X86] Improvement in CodeGen instruction selection for LEAs (re-applying post required revision changes.)
Summary:
1/ Operand folding during complex pattern matching for LEAs has been
extended, such that it promotes Scale to accommodate similar operand
appearing in the DAG.
e.g.
T1 = A + B
T2 = T1 + 10
T3 = T2 + A
For above DAG rooted at T3, X86AddressMode will no look like
Base = B , Index = A , Scale = 2 , Disp = 10
2/ During OptimizeLEAPass down the pipeline factorization is now performed over LEAs
so that if there is an opportunity then complex LEAs (having 3 operands)
could be factored out.
e.g.
leal 1(%rax,%rcx,1), %rdx
leal 1(%rax,%rcx,2), %rcx
will be factored as following
leal 1(%rax,%rcx,1), %rdx
leal (%rdx,%rcx) , %edx
3/ Aggressive operand folding for AM based selection for LEAs is sensitive to loops,
thus avoiding creation of any complex LEAs within a loop.
Reviewers: lsaba, RKSimon, craig.topper, qcolombet, jmolloy
Reviewed By: lsaba
Subscribers: jmolloy, spatel, igorb, llvm-commits
Differential Revision: https://reviews.llvm.org/D35014
llvm-svn: 314886
Diffstat (limited to 'llvm/test/CodeGen/X86/lea-opt-cse3.ll')
| -rw-r--r-- | llvm/test/CodeGen/X86/lea-opt-cse3.ll | 40 |
1 files changed, 16 insertions, 24 deletions
diff --git a/llvm/test/CodeGen/X86/lea-opt-cse3.ll b/llvm/test/CodeGen/X86/lea-opt-cse3.ll index 4e030fb03a7..7fabd58dd69 100644 --- a/llvm/test/CodeGen/X86/lea-opt-cse3.ll +++ b/llvm/test/CodeGen/X86/lea-opt-cse3.ll @@ -8,7 +8,7 @@ define i32 @foo(i32 %a, i32 %b) local_unnamed_addr #0 { ; X64-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def> ; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> ; X64-NEXT: leal 4(%rdi,%rsi,2), %ecx -; X64-NEXT: leal 4(%rdi,%rsi,4), %eax +; X64-NEXT: leal (%ecx,%rsi,2), %eax ; X64-NEXT: imull %ecx, %eax ; X64-NEXT: retq ; @@ -16,9 +16,9 @@ define i32 @foo(i32 %a, i32 %b) local_unnamed_addr #0 { ; X86: # BB#0: # %entry ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: leal 4(%ecx,%eax,2), %edx -; X86-NEXT: leal 4(%ecx,%eax,4), %eax -; X86-NEXT: imull %edx, %eax +; X86-NEXT: leal 4(%ecx,%eax,2), %ecx +; X86-NEXT: leal (%ecx,%eax,2), %eax +; X86-NEXT: imull %ecx, %eax ; X86-NEXT: retl entry: %mul = shl i32 %b, 1 @@ -36,7 +36,7 @@ define i32 @foo1(i32 %a, i32 %b) local_unnamed_addr #0 { ; X64-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def> ; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> ; X64-NEXT: leal 4(%rdi,%rsi,4), %ecx -; X64-NEXT: leal 4(%rdi,%rsi,8), %eax +; X64-NEXT: leal (%ecx,%rsi,4), %eax ; X64-NEXT: imull %ecx, %eax ; X64-NEXT: retq ; @@ -44,9 +44,9 @@ define i32 @foo1(i32 %a, i32 %b) local_unnamed_addr #0 { ; X86: # BB#0: # %entry ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: leal 4(%ecx,%eax,4), %edx -; X86-NEXT: leal 4(%ecx,%eax,8), %eax -; X86-NEXT: imull %edx, %eax +; X86-NEXT: leal 4(%ecx,%eax,4), %ecx +; X86-NEXT: leal (%ecx,%eax,4), %eax +; X86-NEXT: imull %ecx, %eax ; X86-NEXT: retl entry: %mul = shl i32 %b, 2 @@ -68,31 +68,23 @@ define i32 @foo1_mult_basic_blocks(i32 %a, i32 %b) local_unnamed_addr #0 { ; X64-NEXT: cmpl $10, %ecx ; X64-NEXT: je .LBB2_2 ; X64-NEXT: # BB#1: # %mid -; X64-NEXT: leal 4(%rdi,%rsi,8), %eax -; X64-NEXT: imull %eax, %ecx -; X64-NEXT: movl %ecx, %eax +; X64-NEXT: leal (%ecx,%rsi,4), %eax +; X64-NEXT: imull %ecx, %eax ; X64-NEXT: .LBB2_2: # %exit ; X64-NEXT: retq ; ; X86-LABEL: foo1_mult_basic_blocks: ; X86: # BB#0: # %entry -; X86-NEXT: pushl %esi -; X86-NEXT: .Lcfi0: -; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: .Lcfi1: -; X86-NEXT: .cfi_offset %esi, -8 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: leal 4(%esi,%edx,4), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal 4(%eax,%edx,4), %ecx ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: cmpl $10, %ecx ; X86-NEXT: je .LBB2_2 ; X86-NEXT: # BB#1: # %mid -; X86-NEXT: leal 4(%esi,%edx,8), %eax -; X86-NEXT: imull %eax, %ecx -; X86-NEXT: movl %ecx, %eax +; X86-NEXT: leal (%ecx,%edx,4), %eax +; X86-NEXT: imull %ecx, %eax ; X86-NEXT: .LBB2_2: # %exit -; X86-NEXT: popl %esi ; X86-NEXT: retl entry: %mul = shl i32 %b, 2 @@ -131,9 +123,9 @@ define i32 @foo1_mult_basic_blocks_illegal_scale(i32 %a, i32 %b) local_unnamed_a ; X86-LABEL: foo1_mult_basic_blocks_illegal_scale: ; X86: # BB#0: # %entry ; X86-NEXT: pushl %esi -; X86-NEXT: .Lcfi2: +; X86-NEXT: .Lcfi0: ; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: .Lcfi3: +; X86-NEXT: .Lcfi1: ; X86-NEXT: .cfi_offset %esi, -8 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi |

