diff options
author | Arnold Schwaighofer <aschwaighofer@apple.com> | 2014-02-02 03:12:34 +0000 |
---|---|---|
committer | Arnold Schwaighofer <aschwaighofer@apple.com> | 2014-02-02 03:12:34 +0000 |
commit | 17455633c77d7bbdf7eaec852a4b6daef54b5f7f (patch) | |
tree | a6fb3be51e1303c18df9773f2555891f90870551 /llvm/test/Transforms/LoopVectorize | |
parent | 6e63dd27a27870f85d00df8ba40ce8ed6b07917f (diff) | |
download | bcm5719-llvm-17455633c77d7bbdf7eaec852a4b6daef54b5f7f.tar.gz bcm5719-llvm-17455633c77d7bbdf7eaec852a4b6daef54b5f7f.zip |
LoopVectorizer: Enable unrolling of conditional stores and the load/store
unrolling heuristic per default
Benchmarking on x86_64 (thanks Chandler!) and ARM has shown those options speed
up some benchmarks while not causing any interesting regressions.
llvm-svn: 200621
Diffstat (limited to 'llvm/test/Transforms/LoopVectorize')
-rw-r--r-- | llvm/test/Transforms/LoopVectorize/ARM/arm-unroll.ll | 3 |
1 files changed, 3 insertions, 0 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/arm-unroll.ll b/llvm/test/Transforms/LoopVectorize/ARM/arm-unroll.ll index 0b87e0e7405..8843fc2d2b1 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/arm-unroll.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/arm-unroll.ll @@ -47,6 +47,7 @@ define i32 @register_limit(i32* nocapture %A, i32 %n) { %sum.03 = phi i32 [ %7, %.lr.ph ], [ 0, %0 ] %sum.04 = phi i32 [ %8, %.lr.ph ], [ 0, %0 ] %sum.05 = phi i32 [ %9, %.lr.ph ], [ 0, %0 ] + %sum.06 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ] %2 = getelementptr inbounds i32* %A, i32 %i.02 %3 = load i32* %2, align 4 %4 = add nsw i32 %3, %sum.01 @@ -55,6 +56,7 @@ define i32 @register_limit(i32* nocapture %A, i32 %n) { %7 = add nsw i32 %3, %sum.03 %8 = add nsw i32 %3, %sum.04 %9 = add nsw i32 %3, %sum.05 + %10 = add nsw i32 %3, %sum.05 %exitcond = icmp eq i32 %5, %n br i1 %exitcond, label %._crit_edge, label %.lr.ph @@ -64,5 +66,6 @@ define i32 @register_limit(i32* nocapture %A, i32 %n) { %sum.2.lcssa = phi i32 [ 0, %0 ], [ %7, %.lr.ph ] %sum.4.lcssa = phi i32 [ 0, %0 ], [ %8, %.lr.ph ] %sum.5.lcssa = phi i32 [ 0, %0 ], [ %9, %.lr.ph ] + %sum.6.lcssa = phi i32 [ 0, %0 ], [ %10, %.lr.ph ] ret i32 %sum.0.lcssa } |