summaryrefslogtreecommitdiffstats
path: root/llvm/test/Transforms/LoopVectorize
diff options
context:
space:
mode:
authorDehao Chen <dehao@google.com>2017-02-18 03:46:51 +0000
committerDehao Chen <dehao@google.com>2017-02-18 03:46:51 +0000
commit7d230325efbd6bdbddd55750357597789fdfd154 (patch)
tree267ea5d84ae55c91da04c58aee19659d7505d4ce /llvm/test/Transforms/LoopVectorize
parent982bf827b57a727ceeec9bd34c25ccb2b0133a9d (diff)
downloadbcm5719-llvm-7d230325efbd6bdbddd55750357597789fdfd154.tar.gz
bcm5719-llvm-7d230325efbd6bdbddd55750357597789fdfd154.zip
Increases full-unroll threshold.
Summary: The default threshold for fully unroll is too conservative. This patch doubles the full-unroll threshold This change will affect the following speccpu2006 benchmarks (performance numbers were collected from Intel Sandybridge): Performance: 403 0.11% 433 0.51% 445 0.48% 447 3.50% 453 1.49% 464 0.75% Code size: 403 0.56% 433 0.96% 445 2.16% 447 2.96% 453 0.94% 464 8.02% The compiler time overhead is similar with code size. Reviewers: davidxl, mkuper, mzolotukhin, hfinkel, chandlerc Reviewed By: hfinkel, chandlerc Subscribers: mehdi_amini, zzheng, efriedma, haicheng, hfinkel, llvm-commits Differential Revision: https://reviews.llvm.org/D28368 llvm-svn: 295538
Diffstat (limited to 'llvm/test/Transforms/LoopVectorize')
-rw-r--r--llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll14
1 files changed, 12 insertions, 2 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll b/llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll
index 74c0c16086f..e1793bcc321 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll
@@ -1,13 +1,14 @@
; RUN: opt < %s -mcpu=corei7 -O1 -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O1
; RUN: opt < %s -mcpu=corei7 -O2 -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O2
-; RUN: opt < %s -mcpu=corei7 -O3 -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3
+; RUN: opt < %s -mcpu=corei7 -O3 -S -unroll-threshold=150 -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3
+; RUN: opt < %s -mcpu=corei7 -O3 -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3DEFAULT
; RUN: opt < %s -mcpu=corei7 -Os -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=Os
; RUN: opt < %s -mcpu=corei7 -Oz -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=Oz
; RUN: opt < %s -mcpu=corei7 -O1 -vectorize-loops -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O1VEC
; RUN: opt < %s -mcpu=corei7 -Oz -vectorize-loops -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=OzVEC
; RUN: opt < %s -mcpu=corei7 -O1 -loop-vectorize -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O1VEC2
; RUN: opt < %s -mcpu=corei7 -Oz -loop-vectorize -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=OzVEC2
-; RUN: opt < %s -mcpu=corei7 -O3 -disable-loop-vectorization -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3DIS
+; RUN: opt < %s -mcpu=corei7 -O3 -unroll-threshold=150 -disable-loop-vectorization -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3DIS
; This file tests the llvm.loop.vectorize.enable metadata forcing
; vectorization even when optimization levels are too low, or when
@@ -25,6 +26,9 @@ target triple = "x86_64-unknown-linux-gnu"
; O3-LABEL: @enabled(
; O3: store <4 x i32>
; O3: ret i32
+; O3DEFAULT-LABEL: @enabled(
+; O3DEFAULT: store <4 x i32>
+; O3DEFAULT: ret i32
; Pragma always wins!
; O3DIS-LABEL: @enabled(
; O3DIS: store <4 x i32>
@@ -77,6 +81,9 @@ for.end: ; preds = %for.body
; O3-LABEL: @nopragma(
; O3: store <4 x i32>
; O3: ret i32
+; O3DEFAULT-LABEL: @nopragma(
+; O3DEFAULT: store <4 x i32>
+; O3DEFAULT: ret i32
; O3DIS-LABEL: @nopragma(
; O3DIS-NOT: store <4 x i32>
; O3DIS: ret i32
@@ -128,6 +135,9 @@ for.end: ; preds = %for.body
; O3-LABEL: @disabled(
; O3-NOT: store <4 x i32>
; O3: ret i32
+; O3DEFAULT-LABEL: @disabled(
+; O3DEFAULT: store <4 x i32>
+; O3DEFAULT: ret i32
; O3DIS-LABEL: @disabled(
; O3DIS-NOT: store <4 x i32>
; O3DIS: ret i32
OpenPOWER on IntegriCloud