diff options
| author | Eli Friedman <eli.friedman@gmail.com> | 2010-06-03 01:01:48 +0000 | 
|---|---|---|
| committer | Eli Friedman <eli.friedman@gmail.com> | 2010-06-03 01:01:48 +0000 | 
| commit | 1f41303260f56318f346834d2ae4726e28427007 (patch) | |
| tree | 4b0ce1e6b4ffa93fe68ae8f813f8530425153432 | |
| parent | a2f74f3c9849f7ad5513e0c89e6d2a45a8621985 (diff) | |
| download | bcm5719-llvm-1f41303260f56318f346834d2ae4726e28427007.tar.gz bcm5719-llvm-1f41303260f56318f346834d2ae4726e28427007.zip  | |
Remove a fixed item, update a couple partially-fixed items.
llvm-svn: 105375
| -rw-r--r-- | llvm/lib/Target/X86/README.txt | 103 | 
1 files changed, 22 insertions, 81 deletions
diff --git a/llvm/lib/Target/X86/README.txt b/llvm/lib/Target/X86/README.txt index d4545a6fcfd..efc0cd82f23 100644 --- a/llvm/lib/Target/X86/README.txt +++ b/llvm/lib/Target/X86/README.txt @@ -1103,57 +1103,6 @@ be folded into: shl [mem], 1  //===---------------------------------------------------------------------===// -This testcase misses a read/modify/write opportunity (from PR1425): - -void vertical_decompose97iH1(int *b0, int *b1, int *b2, int width){ -    int i; -    for(i=0; i<width; i++) -        b1[i] += (1*(b0[i] + b2[i])+0)>>0; -} - -We compile it down to: - -LBB1_2:	# bb -	movl	(%esi,%edi,4), %ebx -	addl	(%ecx,%edi,4), %ebx -	addl	(%edx,%edi,4), %ebx -	movl	%ebx, (%ecx,%edi,4) -	incl	%edi -	cmpl	%eax, %edi -	jne	LBB1_2	# bb - -the inner loop should add to the memory location (%ecx,%edi,4), saving -a mov.  Something like: - -        movl    (%esi,%edi,4), %ebx -        addl    (%edx,%edi,4), %ebx -        addl    %ebx, (%ecx,%edi,4) - -Here is another interesting example: - -void vertical_compose97iH1(int *b0, int *b1, int *b2, int width){ -    int i; -    for(i=0; i<width; i++) -        b1[i] -= (1*(b0[i] + b2[i])+0)>>0; -} - -We miss the r/m/w opportunity here by using 2 subs instead of an add+sub[mem]: - -LBB9_2:	# bb -	movl	(%ecx,%edi,4), %ebx -	subl	(%esi,%edi,4), %ebx -	subl	(%edx,%edi,4), %ebx -	movl	%ebx, (%ecx,%edi,4) -	incl	%edi -	cmpl	%eax, %edi -	jne	LBB9_2	# bb - -Additionally, LSR should rewrite the exit condition of these loops to use -a stride-4 IV, would would allow all the scales in the loop to go away. -This would result in smaller code and more efficient microops. - -//===---------------------------------------------------------------------===// -  In SSE mode, we turn abs and neg into a load from the constant pool plus a xor  or and instruction, for example: @@ -1301,15 +1250,8 @@ FirstOnet:          xorl    %eax, %eax          ret -There are a few possible improvements here: -1. We should be able to eliminate the dead load into %ecx -2. We could change the "movl 8(%esp), %eax" into -   "movzwl 10(%esp), %eax"; this lets us change the cmpl -   into a testl, which is shorter, and eliminate the shift. - -We could also in theory eliminate the branch by using a conditional -for the address of the load, but that seems unlikely to be worthwhile -in general. +We could change the "movl 8(%esp), %eax" into "movzwl 10(%esp), %eax"; this +lets us change the cmpl into a testl, which is shorter, and eliminate the shift.  //===---------------------------------------------------------------------===// @@ -1331,22 +1273,23 @@ bb7:		; preds = %entry  to: -_foo: +foo:                                    # @foo +# BB#0:                                 # %entry +	movl	4(%esp), %ecx  	cmpb	$0, 16(%esp) -	movl	12(%esp), %ecx +	je	.LBB0_2 +# BB#1:                                 # %bb  	movl	8(%esp), %eax -	movl	4(%esp), %edx -	je	LBB1_2	# bb7 -LBB1_1:	# bb -	addl	%edx, %eax +	addl	%ecx, %eax  	ret -LBB1_2:	# bb7 -	movl	%edx, %eax -	subl	%ecx, %eax +.LBB0_2:                                # %bb7 +	movl	12(%esp), %edx +	movl	%ecx, %eax +	subl	%edx, %eax  	ret -The coalescer could coalesce "edx" with "eax" to avoid the movl in LBB1_2 -if it commuted the addl in LBB1_1. +There's an obviously unnecessary movl in .LBB0_2, and we could eliminate a +couple more movls by putting 4(%esp) into %eax instead of %ecx.  //===---------------------------------------------------------------------===// @@ -1396,8 +1339,7 @@ Also check why xmm7 is not used at all in the function.  //===---------------------------------------------------------------------===// -Legalize loses track of the fact that bools are always zero extended when in -memory.  This causes us to compile abort_gzip (from 164.gzip) from: +Take the following:  target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"  target triple = "i386-apple-darwin8" @@ -1416,16 +1358,15 @@ bb4.i:		; preds = %entry  }  declare void @exit(i32) noreturn nounwind  -into: - -_abort_gzip: +This compiles into: +_abort_gzip:                            ## @abort_gzip +## BB#0:                                ## %entry  	subl	$12, %esp  	movb	_in_exit.4870.b, %al -	notb	%al -	testb	$1, %al -	jne	LBB1_2	## bb4.i -LBB1_1:	## bb.i -  ... +	cmpb	$1, %al +	jne	LBB0_2 + +We somehow miss folding the movb into the cmpb.  //===---------------------------------------------------------------------===//  | 

