diff options
| author | Andrew Trick <atrick@apple.com> | 2011-09-29 01:33:38 +0000 |
|---|---|---|
| committer | Andrew Trick <atrick@apple.com> | 2011-09-29 01:33:38 +0000 |
| commit | bc6de90a5f6091f3a380a33f380b1923b11740a7 (patch) | |
| tree | 530f6d51353a2ef93072e6467c4451d4c4111af1 /llvm/test/CodeGen | |
| parent | 37470d5bde4ddf5c5b94ebed1f5fb109a47295d1 (diff) | |
| download | bcm5719-llvm-bc6de90a5f6091f3a380a33f380b1923b11740a7.tar.gz bcm5719-llvm-bc6de90a5f6091f3a380a33f380b1923b11740a7.zip | |
LSR: rewrite inner loops only.
Rewriting the entire loop nest now requires -enable-lsr-nested.
See PR11035 for some performance data.
A few unit tests specifically test nested LSR, and are now under a flag.
llvm-svn: 140762
Diffstat (limited to 'llvm/test/CodeGen')
| -rw-r--r-- | llvm/test/CodeGen/ARM/lsr-on-unrolled-loops.ll | 5 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/change-compare-stride-0.ll | 5 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/change-compare-stride-1.ll | 5 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/iv-users-in-other-loops.ll | 6 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/loop-strength-reduce3.ll | 4 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/pr3495.ll | 8 |
6 files changed, 24 insertions, 9 deletions
diff --git a/llvm/test/CodeGen/ARM/lsr-on-unrolled-loops.ll b/llvm/test/CodeGen/ARM/lsr-on-unrolled-loops.ll index 140b02cb39e..47379016cf1 100644 --- a/llvm/test/CodeGen/ARM/lsr-on-unrolled-loops.ll +++ b/llvm/test/CodeGen/ARM/lsr-on-unrolled-loops.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=thumbv7-apple-darwin10 -mcpu=cortex-a8 < %s | FileCheck %s +; RUN: llc -mtriple=thumbv7-apple-darwin10 -mcpu=cortex-a8 -enable-lsr-nested < %s | FileCheck %s ; LSR should recognize that this is an unrolled loop which can use ; constant offset addressing, so that each of the following stores @@ -8,6 +8,9 @@ ; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #64] ; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #96] +; We can also save a register in the outer loop, but that requires +; performing LSR on the outer loop. + target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32" %0 = type { %1*, %3*, %6*, i8*, i32, i32, %8*, i32, i32, i32, i32, i32, i32, i32, double, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8**, i32, i32, i32, i32, i32, [64 x i32]*, [4 x %9*], [4 x %10*], [4 x %10*], i32, %11*, i32, i32, [16 x i8], [16 x i8], [16 x i8], i32, i32, i8, i8, i8, i16, i16, i32, i8, i32, %12*, i32, i32, i32, i32, i8*, i32, [4 x %11*], i32, i32, i32, [10 x i32], i32, i32, i32, i32, i32, %13*, %14*, %15*, %16*, %17*, %18*, %19*, %20*, %21*, %22*, %23* } diff --git a/llvm/test/CodeGen/X86/change-compare-stride-0.ll b/llvm/test/CodeGen/X86/change-compare-stride-0.ll index 3a383ee9c1d..439f7b0d4f6 100644 --- a/llvm/test/CodeGen/X86/change-compare-stride-0.ll +++ b/llvm/test/CodeGen/X86/change-compare-stride-0.ll @@ -1,4 +1,7 @@ -; RUN: llc < %s -march=x86 | FileCheck %s +; RUN: llc < %s -march=x86 -enable-lsr-nested | FileCheck %s +; +; Nested LSR is required to optimize this case. +; We do not expect to see this form of IR without -enable-iv-rewrite. define void @borf(i8* nocapture %in, i8* nocapture %out) nounwind { ; CHECK: borf: diff --git a/llvm/test/CodeGen/X86/change-compare-stride-1.ll b/llvm/test/CodeGen/X86/change-compare-stride-1.ll index eee3b79acfa..8b53ae2817c 100644 --- a/llvm/test/CodeGen/X86/change-compare-stride-1.ll +++ b/llvm/test/CodeGen/X86/change-compare-stride-1.ll @@ -1,4 +1,7 @@ -; RUN: llc < %s -march=x86-64 | FileCheck %s +; RUN: llc < %s -march=x86-64 -enable-lsr-nested | FileCheck %s +; +; Nested LSR is required to optimize this case. +; We do not expect to see this form of IR without -enable-iv-rewrite. define void @borf(i8* nocapture %in, i8* nocapture %out) nounwind { ; CHECK: borf: diff --git a/llvm/test/CodeGen/X86/iv-users-in-other-loops.ll b/llvm/test/CodeGen/X86/iv-users-in-other-loops.ll index 8385a29fa22..8f79fb8cde2 100644 --- a/llvm/test/CodeGen/X86/iv-users-in-other-loops.ll +++ b/llvm/test/CodeGen/X86/iv-users-in-other-loops.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86-64 -o %t +; RUN: llc < %s -march=x86-64 -enable-lsr-nested -o %t ; RUN: not grep inc %t ; RUN: grep dec %t | count 2 ; RUN: grep addq %t | count 12 @@ -11,6 +11,10 @@ ; to insert new induction variables. Previously it would create a ; flood of new induction variables. ; Also, the loop reversal should kick in once. +; +; In this example, performing LSR on the entire loop nest, +; as opposed to only the inner loop can further reduce induction variables, +; and their related instructions and registers. target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/CodeGen/X86/loop-strength-reduce3.ll b/llvm/test/CodeGen/X86/loop-strength-reduce3.ll index c45a3741119..d6c265f329a 100644 --- a/llvm/test/CodeGen/X86/loop-strength-reduce3.ll +++ b/llvm/test/CodeGen/X86/loop-strength-reduce3.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=x86 | grep cmp | grep 240 -; RUN: llc < %s -march=x86 | grep inc | count 1 +; RUN: llc < %s -march=x86 -enable-lsr-nested | grep cmp | grep 240 +; RUN: llc < %s -march=x86 -enable-lsr-nested | grep inc | count 1 define i32 @foo(i32 %A, i32 %B, i32 %C, i32 %D) nounwind { entry: diff --git a/llvm/test/CodeGen/X86/pr3495.ll b/llvm/test/CodeGen/X86/pr3495.ll index 3dcd6da0bb0..7efd35b8b6d 100644 --- a/llvm/test/CodeGen/X86/pr3495.ll +++ b/llvm/test/CodeGen/X86/pr3495.ll @@ -1,7 +1,9 @@ -; RUN: llc < %s -march=x86 -stats -regalloc=linearscan |& grep {Number of loads added} | grep 2 -; RUN: llc < %s -march=x86 -stats -regalloc=linearscan |& grep {Number of spill slots allocated} | grep 1 -; RUN: llc < %s -march=x86 -stats -regalloc=linearscan |& grep {Number of machine instrs printed} | grep 34 +; RUN: llc < %s -march=x86 -stats -regalloc=linearscan -enable-lsr-nested |& grep {Number of loads added} | grep 2 +; RUN: llc < %s -march=x86 -stats -regalloc=linearscan -enable-lsr-nested |& grep {Number of spill slots allocated} | grep 1 +; RUN: llc < %s -march=x86 -stats -regalloc=linearscan -enable-lsr-nested |& grep {Number of machine instrs printed} | grep 34 ; PR3495 +; +; Note: this should not spill at all with either good LSR or good regalloc. target triple = "i386-pc-linux-gnu" @x = external global [8 x i32], align 32 ; <[8 x i32]*> [#uses=1] |

