Switch spill weights from a basic loop depth estimation to BlockFrequencyInfo.

The main advantages here are way better heuristics, taking into account not just loop depth but also __builtin_expect and other static heuristics and will eventually learn how to use profile info. Most of the work in this patch is pushing the MachineBlockFrequencyInfo analysis into the right places. This is good for a 5% speedup on zlib's deflate (x86_64), there were some very unfortunate spilling decisions in its hottest loop in longest_match(). Other benchmarks I tried were mostly neutral. This changes register allocation in subtle ways, update the tests for it. 2012-02-20-MachineCPBug.ll was deleted as it's very fragile and the instruction it looked for was gone already (but the FileCheck pattern picked up unrelated stuff). llvm-svn: 184105
author: Benjamin Kramer <benny.kra@googlemail.com> 2013-06-17 19:00:36 +0000
committer: Benjamin Kramer <benny.kra@googlemail.com> 2013-06-17 19:00:36 +0000
commit: e2a1d89e140d0e67196e28d2977ef7b770453d6a (patch)
tree: ff47514e9634f91023bd4fc4d7dab9213ddfeae3 /llvm/test/CodeGen/X86
parent: 36e5a7db2afad1f014b27acdc78d16a445084fb9 (diff)
download: bcm5719-llvm-e2a1d89e140d0e67196e28d2977ef7b770453d6a.tar.gz
bcm5719-llvm-e2a1d89e140d0e67196e28d2977ef7b770453d6a.zip
2 files changed, 9 insertions, 84 deletions
diff --git a/llvm/test/CodeGen/X86/2012-02-20-MachineCPBug.ll b/llvm/test/CodeGen/X86/2012-02-20-MachineCPBug.ll
deleted file mode 100644
index 477b4deba82..00000000000
--- a/llvm/test/CodeGen/X86/2012-02-20-MachineCPBug.ll
+++ /dev/null
@@ -1,78 +0,0 @@
-; RUN: llc < %s -mtriple=i386-apple-macosx -mcpu=core2 -mattr=+sse | FileCheck %s
-; PR11940: Do not optimize away movb %al, %ch
-
-%struct.APInt = type { i64* }
-
-declare noalias i8* @calloc(i32, i32) nounwind
-
-define void @bug(%struct.APInt* noalias nocapture sret %agg.result, %struct.APInt* nocapture %this, i32 %rotateAmt) nounwind align 2 {
-entry:
-; CHECK: bug:
-  %call = tail call i8* @calloc(i32 1, i32 32)
-  %call.i = tail call i8* @calloc(i32 1, i32 32) nounwind
-  %0 = bitcast i8* %call.i to i64*
-  %rem.i = and i32 %rotateAmt, 63
-  %div.i = lshr i32 %rotateAmt, 6
-  %cmp.i = icmp eq i32 %rem.i, 0
-  br i1 %cmp.i, label %for.cond.preheader.i, label %if.end.i
-
-for.cond.preheader.i:                             ; preds = %entry
-  %sub.i = sub i32 4, %div.i
-  %cmp23.i = icmp eq i32 %div.i, 4
-  br i1 %cmp23.i, label %for.body9.lr.ph.i, label %for.body.lr.ph.i
-
-for.body.lr.ph.i:                                 ; preds = %for.cond.preheader.i
-  %pVal.i = getelementptr inbounds %struct.APInt* %this, i32 0, i32 0
-  %.pre5.i = load i64** %pVal.i, align 4
-  br label %for.body.i
-
-for.body.i:                                       ; preds = %for.body.i, %for.body.lr.ph.i
-  %i.04.i = phi i32 [ 0, %for.body.lr.ph.i ], [ %inc.i, %for.body.i ]
-  %add.i = add i32 %i.04.i, %div.i
-  %arrayidx.i = getelementptr inbounds i64* %.pre5.i, i32 %add.i
-  %1 = load i64* %arrayidx.i, align 4
-  %arrayidx3.i = getelementptr inbounds i64* %0, i32 %i.04.i
-  store i64 %1, i64* %arrayidx3.i, align 4
-  %inc.i = add i32 %i.04.i, 1
-  %cmp2.i = icmp ult i32 %inc.i, %sub.i
-  br i1 %cmp2.i, label %for.body.i, label %if.end.i
-
-if.end.i:                                         ; preds = %for.body.i, %entry
-  %cmp81.i = icmp eq i32 %div.i, 3
-  br i1 %cmp81.i, label %_ZNK5APInt4lshrEj.exit, label %for.body9.lr.ph.i
-
-for.body9.lr.ph.i:                                ; preds = %if.end.i, %for.cond.preheader.i
-  %sub58.i = sub i32 3, %div.i
-  %pVal11.i = getelementptr inbounds %struct.APInt* %this, i32 0, i32 0
-  %sh_prom.i = zext i32 %rem.i to i64
-  %sub17.i = sub i32 64, %rem.i
-  %sh_prom18.i = zext i32 %sub17.i to i64
-  %.pre.i = load i64** %pVal11.i, align 4
-  br label %for.body9.i
-
-for.body9.i:                                      ; preds = %for.body9.i, %for.body9.lr.ph.i
-; CHECK: %for.body9.i
-; CHECK: movb
-; CHECK: shrdl
-  %i6.02.i = phi i32 [ 0, %for.body9.lr.ph.i ], [ %inc21.i, %for.body9.i ]
-  %add10.i = add i32 %i6.02.i, %div.i
-  %arrayidx12.i = getelementptr inbounds i64* %.pre.i, i32 %add10.i
-  %2 = load i64* %arrayidx12.i, align 4
-  %shr.i = lshr i64 %2, %sh_prom.i
-  %add14.i = add i32 %add10.i, 1
-  %arrayidx16.i = getelementptr inbounds i64* %.pre.i, i32 %add14.i
-  %3 = load i64* %arrayidx16.i, align 4
-  %shl.i = shl i64 %3, %sh_prom18.i
-  %or.i = or i64 %shl.i, %shr.i
-  %arrayidx19.i = getelementptr inbounds i64* %0, i32 %i6.02.i
-  store i64 %or.i, i64* %arrayidx19.i, align 4
-  %inc21.i = add i32 %i6.02.i, 1
-  %cmp8.i = icmp ult i32 %inc21.i, %sub58.i
-  br i1 %cmp8.i, label %for.body9.i, label %_ZNK5APInt4lshrEj.exit
-
-_ZNK5APInt4lshrEj.exit:                           ; preds = %for.body9.i, %if.end.i
-  %call.i1 = tail call i8* @calloc(i32 1, i32 32) nounwind
-  %4 = getelementptr inbounds %struct.APInt* %agg.result, i32 0, i32 0
-  store i64* %0, i64** %4, align 4
-  ret void
-}
diff --git a/llvm/test/CodeGen/X86/atom-bypass-slow-division-64.ll b/llvm/test/CodeGen/X86/atom-bypass-slow-division-64.ll
index a3bbea3c996..26b9a1e60f1 100644
--- a/llvm/test/CodeGen/X86/atom-bypass-slow-division-64.ll
+++ b/llvm/test/CodeGen/X86/atom-bypass-slow-division-64.ll
@@ -4,8 +4,9 @@
 
 define i64 @Test_get_quotient(i64 %a, i64 %b) nounwind {
 ; CHECK: Test_get_quotient:
-; CHECK: orq %rsi, %rcx
-; CHECK-NEXT: testq $-65536, %rcx
+; CHECK: movq %rdi, %rax
+; CHECK: orq %rsi, %rax
+; CHECK-NEXT: testq $-65536, %rax
 ; CHECK-NEXT: je
 ; CHECK: idivq
 ; CHECK: ret
@@ -17,8 +18,9 @@ define i64 @Test_get_quotient(i64 %a, i64 %b) nounwind {
 
 define i64 @Test_get_remainder(i64 %a, i64 %b) nounwind {
 ; CHECK: Test_get_remainder:
-; CHECK: orq %rsi, %rcx
-; CHECK-NEXT: testq $-65536, %rcx
+; CHECK: movq %rdi, %rax
+; CHECK: orq %rsi, %rax
+; CHECK-NEXT: testq $-65536, %rax
 ; CHECK-NEXT: je
 ; CHECK: idivq
 ; CHECK: ret
@@ -30,8 +32,9 @@ define i64 @Test_get_remainder(i64 %a, i64 %b) nounwind {
 
 define i64 @Test_get_quotient_and_remainder(i64 %a, i64 %b) nounwind {
 ; CHECK: Test_get_quotient_and_remainder:
-; CHECK: orq %rsi, %rcx
-; CHECK-NEXT: testq $-65536, %rcx
+; CHECK: movq %rdi, %rax
+; CHECK: orq %rsi, %rax
+; CHECK-NEXT: testq $-65536, %rax
 ; CHECK-NEXT: je
 ; CHECK: idivq
 ; CHECK: divw
author	Benjamin Kramer <benny.kra@googlemail.com>	2013-06-17 19:00:36 +0000
committer	Benjamin Kramer <benny.kra@googlemail.com>	2013-06-17 19:00:36 +0000
commit	e2a1d89e140d0e67196e28d2977ef7b770453d6a (patch)
tree	ff47514e9634f91023bd4fc4d7dab9213ddfeae3 /llvm/test/CodeGen/X86
parent	36e5a7db2afad1f014b27acdc78d16a445084fb9 (diff)
download	bcm5719-llvm-e2a1d89e140d0e67196e28d2977ef7b770453d6a.tar.gz bcm5719-llvm-e2a1d89e140d0e67196e28d2977ef7b770453d6a.zip