summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/lea-opt-cse3.ll
diff options
context:
space:
mode:
authorJatin Bhateja <jatin.bhateja@gmail.com>2017-10-04 09:02:10 +0000
committerJatin Bhateja <jatin.bhateja@gmail.com>2017-10-04 09:02:10 +0000
commit3c29bacd43ffd9bb15eebcf1afc12106806030bd (patch)
tree10750a4a1a3e12086479b7b0022ca840f332e4ce /llvm/test/CodeGen/X86/lea-opt-cse3.ll
parentea9dceed777adc46556192a7adc4ae48be1c2113 (diff)
downloadbcm5719-llvm-3c29bacd43ffd9bb15eebcf1afc12106806030bd.tar.gz
bcm5719-llvm-3c29bacd43ffd9bb15eebcf1afc12106806030bd.zip
[X86] Improvement in CodeGen instruction selection for LEAs (re-applying post required revision changes.)
Summary: 1/ Operand folding during complex pattern matching for LEAs has been extended, such that it promotes Scale to accommodate similar operand appearing in the DAG. e.g. T1 = A + B T2 = T1 + 10 T3 = T2 + A For above DAG rooted at T3, X86AddressMode will no look like Base = B , Index = A , Scale = 2 , Disp = 10 2/ During OptimizeLEAPass down the pipeline factorization is now performed over LEAs so that if there is an opportunity then complex LEAs (having 3 operands) could be factored out. e.g. leal 1(%rax,%rcx,1), %rdx leal 1(%rax,%rcx,2), %rcx will be factored as following leal 1(%rax,%rcx,1), %rdx leal (%rdx,%rcx) , %edx 3/ Aggressive operand folding for AM based selection for LEAs is sensitive to loops, thus avoiding creation of any complex LEAs within a loop. Reviewers: lsaba, RKSimon, craig.topper, qcolombet, jmolloy Reviewed By: lsaba Subscribers: jmolloy, spatel, igorb, llvm-commits Differential Revision: https://reviews.llvm.org/D35014 llvm-svn: 314886
Diffstat (limited to 'llvm/test/CodeGen/X86/lea-opt-cse3.ll')
-rw-r--r--llvm/test/CodeGen/X86/lea-opt-cse3.ll40
1 files changed, 16 insertions, 24 deletions
diff --git a/llvm/test/CodeGen/X86/lea-opt-cse3.ll b/llvm/test/CodeGen/X86/lea-opt-cse3.ll
index 4e030fb03a7..7fabd58dd69 100644
--- a/llvm/test/CodeGen/X86/lea-opt-cse3.ll
+++ b/llvm/test/CodeGen/X86/lea-opt-cse3.ll
@@ -8,7 +8,7 @@ define i32 @foo(i32 %a, i32 %b) local_unnamed_addr #0 {
; X64-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-NEXT: leal 4(%rdi,%rsi,2), %ecx
-; X64-NEXT: leal 4(%rdi,%rsi,4), %eax
+; X64-NEXT: leal (%ecx,%rsi,2), %eax
; X64-NEXT: imull %ecx, %eax
; X64-NEXT: retq
;
@@ -16,9 +16,9 @@ define i32 @foo(i32 %a, i32 %b) local_unnamed_addr #0 {
; X86: # BB#0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: leal 4(%ecx,%eax,2), %edx
-; X86-NEXT: leal 4(%ecx,%eax,4), %eax
-; X86-NEXT: imull %edx, %eax
+; X86-NEXT: leal 4(%ecx,%eax,2), %ecx
+; X86-NEXT: leal (%ecx,%eax,2), %eax
+; X86-NEXT: imull %ecx, %eax
; X86-NEXT: retl
entry:
%mul = shl i32 %b, 1
@@ -36,7 +36,7 @@ define i32 @foo1(i32 %a, i32 %b) local_unnamed_addr #0 {
; X64-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-NEXT: leal 4(%rdi,%rsi,4), %ecx
-; X64-NEXT: leal 4(%rdi,%rsi,8), %eax
+; X64-NEXT: leal (%ecx,%rsi,4), %eax
; X64-NEXT: imull %ecx, %eax
; X64-NEXT: retq
;
@@ -44,9 +44,9 @@ define i32 @foo1(i32 %a, i32 %b) local_unnamed_addr #0 {
; X86: # BB#0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: leal 4(%ecx,%eax,4), %edx
-; X86-NEXT: leal 4(%ecx,%eax,8), %eax
-; X86-NEXT: imull %edx, %eax
+; X86-NEXT: leal 4(%ecx,%eax,4), %ecx
+; X86-NEXT: leal (%ecx,%eax,4), %eax
+; X86-NEXT: imull %ecx, %eax
; X86-NEXT: retl
entry:
%mul = shl i32 %b, 2
@@ -68,31 +68,23 @@ define i32 @foo1_mult_basic_blocks(i32 %a, i32 %b) local_unnamed_addr #0 {
; X64-NEXT: cmpl $10, %ecx
; X64-NEXT: je .LBB2_2
; X64-NEXT: # BB#1: # %mid
-; X64-NEXT: leal 4(%rdi,%rsi,8), %eax
-; X64-NEXT: imull %eax, %ecx
-; X64-NEXT: movl %ecx, %eax
+; X64-NEXT: leal (%ecx,%rsi,4), %eax
+; X64-NEXT: imull %ecx, %eax
; X64-NEXT: .LBB2_2: # %exit
; X64-NEXT: retq
;
; X86-LABEL: foo1_mult_basic_blocks:
; X86: # BB#0: # %entry
-; X86-NEXT: pushl %esi
-; X86-NEXT: .Lcfi0:
-; X86-NEXT: .cfi_def_cfa_offset 8
-; X86-NEXT: .Lcfi1:
-; X86-NEXT: .cfi_offset %esi, -8
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: leal 4(%esi,%edx,4), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: leal 4(%eax,%edx,4), %ecx
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl $10, %ecx
; X86-NEXT: je .LBB2_2
; X86-NEXT: # BB#1: # %mid
-; X86-NEXT: leal 4(%esi,%edx,8), %eax
-; X86-NEXT: imull %eax, %ecx
-; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: leal (%ecx,%edx,4), %eax
+; X86-NEXT: imull %ecx, %eax
; X86-NEXT: .LBB2_2: # %exit
-; X86-NEXT: popl %esi
; X86-NEXT: retl
entry:
%mul = shl i32 %b, 2
@@ -131,9 +123,9 @@ define i32 @foo1_mult_basic_blocks_illegal_scale(i32 %a, i32 %b) local_unnamed_a
; X86-LABEL: foo1_mult_basic_blocks_illegal_scale:
; X86: # BB#0: # %entry
; X86-NEXT: pushl %esi
-; X86-NEXT: .Lcfi2:
+; X86-NEXT: .Lcfi0:
; X86-NEXT: .cfi_def_cfa_offset 8
-; X86-NEXT: .Lcfi3:
+; X86-NEXT: .Lcfi1:
; X86-NEXT: .cfi_offset %esi, -8
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
OpenPOWER on IntegriCloud