summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorHiroshi Yamauchi <yamauchi@google.com>2019-04-15 16:49:00 +0000
committerHiroshi Yamauchi <yamauchi@google.com>2019-04-15 16:49:00 +0000
commit09e539fcaebb6362795d352cdcf4a818cf4d0d6a (patch)
treea91b7d07d85c67b737db8e49e3f62f95dd512898 /llvm/test
parent64041d7b90714aa6f0542c64ec5bb327beecfc58 (diff)
downloadbcm5719-llvm-09e539fcaebb6362795d352cdcf4a818cf4d0d6a.tar.gz
bcm5719-llvm-09e539fcaebb6362795d352cdcf4a818cf4d0d6a.zip
[PGO] Profile guided code size optimization.
Summary: Enable some of the existing size optimizations for cold code under PGO. A ~5% code size saving in big internal app under PGO. The way it gets BFI/PSI is discussed in the RFC thread http://lists.llvm.org/pipermail/llvm-dev/2019-March/130894.html Note it doesn't currently touch loop passes. Reviewers: davidxl, eraman Reviewed By: eraman Subscribers: mgorny, javed.absar, smeenai, mehdi_amini, eraman, zzheng, steven_wu, dexonsmith, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D59514 llvm-svn: 358422
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/Other/new-pm-defaults.ll2
-rw-r--r--llvm/test/Other/new-pm-lto-defaults.ll1
-rw-r--r--llvm/test/Other/new-pm-thinlto-defaults.ll2
-rw-r--r--llvm/test/Other/opt-O2-pipeline.ll2
-rw-r--r--llvm/test/Other/opt-O3-pipeline.ll2
-rw-r--r--llvm/test/Other/opt-Os-pipeline.ll2
-rw-r--r--llvm/test/Transforms/ConstantHoisting/ARM/const-addr-no-neg-offset.ll59
-rw-r--r--llvm/test/Transforms/InstCombine/fputs-opt-size.ll33
-rw-r--r--llvm/test/Transforms/LoopLoadElim/opt-size.ll53
-rw-r--r--llvm/test/Transforms/LoopUnroll/unroll-opt-attribute.ll46
-rw-r--r--llvm/test/Transforms/LoopVectorize/optsize.ll43
11 files changed, 243 insertions, 2 deletions
diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll
index 7ca22bf8882..317bffcefdc 100644
--- a/llvm/test/Other/new-pm-defaults.ll
+++ b/llvm/test/Other/new-pm-defaults.ll
@@ -106,6 +106,7 @@
; CHECK-O-NEXT: Running pass: InstCombinePass
; CHECK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
; CHECK-O-NEXT: Running analysis: AAManager
+; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
; CHECK-EP-PEEPHOLE-NEXT: Running pass: NoOpFunctionPass
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
; CHECK-O-NEXT: Finished llvm::Function pass manager run.
@@ -245,7 +246,6 @@
; CHECK-O-NEXT: Running pass: SLPVectorizerPass
; CHECK-O-NEXT: Running pass: InstCombinePass
; CHECK-O-NEXT: Running pass: LoopUnrollPass
-; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
; CHECK-O-NEXT: Running pass: WarnMissedTransformationsPass
; CHECK-O-NEXT: Running pass: InstCombinePass
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}OptimizationRemarkEmitterAnalysis
diff --git a/llvm/test/Other/new-pm-lto-defaults.ll b/llvm/test/Other/new-pm-lto-defaults.ll
index a3eaa1e856a..1128ae3a850 100644
--- a/llvm/test/Other/new-pm-lto-defaults.ll
+++ b/llvm/test/Other/new-pm-lto-defaults.ll
@@ -69,6 +69,7 @@
; CHECK-O2-NEXT: Starting llvm::Function pass manager run.
; CHECK-O3-NEXT: Running pass: AggressiveInstCombinePass
; CHECK-O2-NEXT: Running pass: InstCombinePass
+; CHECK-O2-NEXT: Running analysis: OuterAnalysisManagerProxy
; CHECK-EP-Peephole-NEXT: Running pass: NoOpFunctionPass
; CHECK-O2-NEXT: Finished llvm::Function pass manager run.
; CHECK-O2-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}InlinerPass>
diff --git a/llvm/test/Other/new-pm-thinlto-defaults.ll b/llvm/test/Other/new-pm-thinlto-defaults.ll
index 9ad383a4e3b..079cea9255e 100644
--- a/llvm/test/Other/new-pm-thinlto-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-defaults.ll
@@ -88,6 +88,7 @@
; CHECK-O-NEXT: Running pass: InstCombinePass
; CHECK-PRELINK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
; CHECK-O-NEXT: Running analysis: AAManager
+; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
; CHECK-O-NEXT: Finished llvm::Function pass manager run.
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA
@@ -219,7 +220,6 @@
; CHECK-POSTLINK-O-NEXT: Running pass: SLPVectorizerPass
; CHECK-POSTLINK-O-NEXT: Running pass: InstCombinePass
; CHECK-POSTLINK-O-NEXT: Running pass: LoopUnrollPass
-; CHECK-POSTLINK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
; CHECK-POSTLINK-O-NEXT: Running pass: WarnMissedTransformationsPass
; CHECK-POSTLINK-O-NEXT: Running pass: InstCombinePass
; CHECK-POSTLINK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}OptimizationRemarkEmitterAnalysis
diff --git a/llvm/test/Other/opt-O2-pipeline.ll b/llvm/test/Other/opt-O2-pipeline.ll
index 1e48c86d16d..ca12d9ad663 100644
--- a/llvm/test/Other/opt-O2-pipeline.ll
+++ b/llvm/test/Other/opt-O2-pipeline.ll
@@ -214,6 +214,8 @@
; CHECK-NEXT: Scalar Evolution Analysis
; CHECK-NEXT: Function Alias Analysis Results
; CHECK-NEXT: Loop Access Analysis
+; CHECK-NEXT: Lazy Branch Probability Analysis
+; CHECK-NEXT: Lazy Block Frequency Analysis
; CHECK-NEXT: Loop Load Elimination
; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
; CHECK-NEXT: Function Alias Analysis Results
diff --git a/llvm/test/Other/opt-O3-pipeline.ll b/llvm/test/Other/opt-O3-pipeline.ll
index 1eb79159d0d..864b748df1b 100644
--- a/llvm/test/Other/opt-O3-pipeline.ll
+++ b/llvm/test/Other/opt-O3-pipeline.ll
@@ -219,6 +219,8 @@
; CHECK-NEXT: Scalar Evolution Analysis
; CHECK-NEXT: Function Alias Analysis Results
; CHECK-NEXT: Loop Access Analysis
+; CHECK-NEXT: Lazy Branch Probability Analysis
+; CHECK-NEXT: Lazy Block Frequency Analysis
; CHECK-NEXT: Loop Load Elimination
; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
; CHECK-NEXT: Function Alias Analysis Results
diff --git a/llvm/test/Other/opt-Os-pipeline.ll b/llvm/test/Other/opt-Os-pipeline.ll
index c44f6506c4e..a0240ac2977 100644
--- a/llvm/test/Other/opt-Os-pipeline.ll
+++ b/llvm/test/Other/opt-Os-pipeline.ll
@@ -201,6 +201,8 @@
; CHECK-NEXT: Scalar Evolution Analysis
; CHECK-NEXT: Function Alias Analysis Results
; CHECK-NEXT: Loop Access Analysis
+; CHECK-NEXT: Lazy Branch Probability Analysis
+; CHECK-NEXT: Lazy Block Frequency Analysis
; CHECK-NEXT: Loop Load Elimination
; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
; CHECK-NEXT: Function Alias Analysis Results
diff --git a/llvm/test/Transforms/ConstantHoisting/ARM/const-addr-no-neg-offset.ll b/llvm/test/Transforms/ConstantHoisting/ARM/const-addr-no-neg-offset.ll
index 6af2bb1d8ac..80428ad1cde 100644
--- a/llvm/test/Transforms/ConstantHoisting/ARM/const-addr-no-neg-offset.ll
+++ b/llvm/test/Transforms/ConstantHoisting/ARM/const-addr-no-neg-offset.ll
@@ -1,4 +1,6 @@
; RUN: opt -mtriple=arm-arm-none-eabi -consthoist -S < %s | FileCheck %s
+; RUN: opt -mtriple=arm-arm-none-eabi -consthoist -pgso -S < %s | FileCheck %s -check-prefix=PGSO
+; RUN: opt -mtriple=arm-arm-none-eabi -consthoist -pgso=false -S < %s | FileCheck %s -check-prefix=NPGSO
; There are different candidates here for the base constant: 1073876992 and
; 1073876996. But we don't want to see the latter because it results in
@@ -8,6 +10,7 @@ define void @foo() #0 {
entry:
; CHECK-LABEL: @foo
; CHECK-NOT: [[CONST1:%const_mat[0-9]*]] = add i32 %const, -4
+; CHECK-LABEL: @foo_pgso
%0 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096
%or = or i32 %0, 1
store volatile i32 %or, i32* inttoptr (i32 1073876992 to i32*), align 4096
@@ -40,3 +43,59 @@ entry:
}
attributes #0 = { minsize norecurse nounwind optsize readnone uwtable }
+
+define void @foo_pgso() #1 !prof !14 {
+entry:
+; PGSO-LABEL: @foo_pgso
+; PGSO-NOT: [[CONST2:%const_mat[0-9]*]] = add i32 %const, -4
+; NPGSO-LABEL: @foo_pgso
+; NPGSO: [[CONST2:%const_mat[0-9]*]] = add i32 %const, -4
+ %0 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096
+ %or = or i32 %0, 1
+ store volatile i32 %or, i32* inttoptr (i32 1073876992 to i32*), align 4096
+ %1 = load volatile i32, i32* inttoptr (i32 1073876996 to i32*), align 4
+ %and = and i32 %1, -117506048
+ store volatile i32 %and, i32* inttoptr (i32 1073876996 to i32*), align 4
+ %2 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096
+ %and1 = and i32 %2, -17367041
+ store volatile i32 %and1, i32* inttoptr (i32 1073876996 to i32*), align 4096
+ %3 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096
+ %and2 = and i32 %3, -262145
+ store volatile i32 %and2, i32* inttoptr (i32 1073876992 to i32*), align 4096
+ %4 = load volatile i32, i32* inttoptr (i32 1073876996 to i32*), align 4
+ %and3 = and i32 %4, -8323073
+ store volatile i32 %and3, i32* inttoptr (i32 1073876996 to i32*), align 4
+ store volatile i32 10420224, i32* inttoptr (i32 1073877000 to i32*), align 8
+ %5 = load volatile i32, i32* inttoptr (i32 1073876996 to i32*), align 4096
+ %or4 = or i32 %5, 65536
+ store volatile i32 %or4, i32* inttoptr (i32 1073876996 to i32*), align 4096
+ %6 = load volatile i32, i32* inttoptr (i32 1073881088 to i32*), align 8192
+ %or6.i.i = or i32 %6, 16
+ store volatile i32 %or6.i.i, i32* inttoptr (i32 1073881088 to i32*), align 8192
+ %7 = load volatile i32, i32* inttoptr (i32 1073881088 to i32*), align 8192
+ %and7.i.i = and i32 %7, -4
+ store volatile i32 %and7.i.i, i32* inttoptr (i32 1073881088 to i32*), align 8192
+ %8 = load volatile i32, i32* inttoptr (i32 1073881088 to i32*), align 8192
+ %or8.i.i = or i32 %8, 2
+ store volatile i32 %or8.i.i, i32* inttoptr (i32 1073881088 to i32*), align 8192
+ ret void
+}
+
+attributes #1 = { norecurse nounwind readnone uwtable } ; no optsize or minsize
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"ProfileSummary", !1}
+!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
+!2 = !{!"ProfileFormat", !"InstrProf"}
+!3 = !{!"TotalCount", i64 10000}
+!4 = !{!"MaxCount", i64 10}
+!5 = !{!"MaxInternalCount", i64 1}
+!6 = !{!"MaxFunctionCount", i64 1000}
+!7 = !{!"NumCounts", i64 3}
+!8 = !{!"NumFunctions", i64 3}
+!9 = !{!"DetailedSummary", !10}
+!10 = !{!11, !12, !13}
+!11 = !{i32 10000, i64 100, i32 1}
+!12 = !{i32 999000, i64 100, i32 1}
+!13 = !{i32 999999, i64 1, i32 2}
+!14 = !{!"function_entry_count", i64 0}
diff --git a/llvm/test/Transforms/InstCombine/fputs-opt-size.ll b/llvm/test/Transforms/InstCombine/fputs-opt-size.ll
index ea8ef4203e9..54ac96f9f86 100644
--- a/llvm/test/Transforms/InstCombine/fputs-opt-size.ll
+++ b/llvm/test/Transforms/InstCombine/fputs-opt-size.ll
@@ -2,6 +2,8 @@
; because it requires more arguments and thus extra MOVs are required.
;
; RUN: opt < %s -instcombine -S | FileCheck %s
+; RUN: opt < %s -instcombine -pgso -S | FileCheck %s -check-prefix=PGSO
+; RUN: opt < %s -instcombine -pgso=false -S | FileCheck %s -check-prefix=NPGSO
%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i32, i32, [40 x i8] }
%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
@@ -26,3 +28,34 @@ declare i32 @fputs(i8* nocapture readonly, %struct._IO_FILE* nocapture) local_un
attributes #0 = { nounwind optsize }
attributes #1 = { nounwind optsize }
+
+define i32 @main_pgso() local_unnamed_addr !prof !14 {
+entry:
+; PGSO-LABEL: @main_pgso(
+; PGSO-NOT: call i64 @fwrite
+; PGSO: call i32 @fputs
+; NPGSO-LABEL: @main_pgso(
+; NPGSO: call i64 @fwrite
+; NPGSO-NOT: call i32 @fputs
+
+ %call = tail call %struct._IO_FILE* @fopen(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i32 0, i32 0)) #2
+ %call1 = tail call i32 @fputs(i8* getelementptr inbounds ([27 x i8], [27 x i8]* @.str.2, i32 0, i32 0), %struct._IO_FILE* %call) #2
+ ret i32 0
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"ProfileSummary", !1}
+!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
+!2 = !{!"ProfileFormat", !"InstrProf"}
+!3 = !{!"TotalCount", i64 10000}
+!4 = !{!"MaxCount", i64 10}
+!5 = !{!"MaxInternalCount", i64 1}
+!6 = !{!"MaxFunctionCount", i64 1000}
+!7 = !{!"NumCounts", i64 3}
+!8 = !{!"NumFunctions", i64 3}
+!9 = !{!"DetailedSummary", !10}
+!10 = !{!11, !12, !13}
+!11 = !{i32 10000, i64 100, i32 1}
+!12 = !{i32 999000, i64 100, i32 1}
+!13 = !{i32 999999, i64 1, i32 2}
+!14 = !{!"function_entry_count", i64 0}
diff --git a/llvm/test/Transforms/LoopLoadElim/opt-size.ll b/llvm/test/Transforms/LoopLoadElim/opt-size.ll
index a6322d874f3..f9d82fec2a2 100644
--- a/llvm/test/Transforms/LoopLoadElim/opt-size.ll
+++ b/llvm/test/Transforms/LoopLoadElim/opt-size.ll
@@ -1,4 +1,6 @@
; RUN: opt -basicaa -loop-load-elim -S < %s | FileCheck %s
+; RUN: opt -basicaa -loop-load-elim -pgso -S < %s | FileCheck %s -check-prefix=PGSO
+; RUN: opt -basicaa -loop-load-elim -pgso=false -S < %s | FileCheck %s -check-prefix=NPGSO
; When optimizing for size don't eliminate in this loop because the loop would
; have to be versioned first because A and C may alias.
@@ -74,3 +76,54 @@ for.body: ; preds = %for.body, %entry
for.end: ; preds = %for.body
ret void
}
+
+
+; PGSO-LABEL: @f_pgso(
+; NPGSO-LABEL: @f_pgso(
+define void @f_pgso(i32* %A, i32* %B, i32* %C, i64 %N) !prof !14 {
+
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+
+ %Aidx_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next
+ %Bidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+ %Cidx = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
+ %Aidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+
+ %b = load i32, i32* %Bidx, align 4
+ %a_p1 = add i32 %b, 2
+ store i32 %a_p1, i32* %Aidx_next, align 4
+
+ %a = load i32, i32* %Aidx, align 4
+; PGSO: %c = mul i32 %a, 2
+; NPGSO-NOT: %c = mul i32 %a, 2
+ %c = mul i32 %a, 2
+ store i32 %c, i32* %Cidx, align 4
+
+ %exitcond = icmp eq i64 %indvars.iv.next, %N
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"ProfileSummary", !1}
+!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
+!2 = !{!"ProfileFormat", !"InstrProf"}
+!3 = !{!"TotalCount", i64 10000}
+!4 = !{!"MaxCount", i64 10}
+!5 = !{!"MaxInternalCount", i64 1}
+!6 = !{!"MaxFunctionCount", i64 1000}
+!7 = !{!"NumCounts", i64 3}
+!8 = !{!"NumFunctions", i64 3}
+!9 = !{!"DetailedSummary", !10}
+!10 = !{!11, !12, !13}
+!11 = !{i32 10000, i64 100, i32 1}
+!12 = !{i32 999000, i64 100, i32 1}
+!13 = !{i32 999999, i64 1, i32 2}
+!14 = !{!"function_entry_count", i64 0}
diff --git a/llvm/test/Transforms/LoopUnroll/unroll-opt-attribute.ll b/llvm/test/Transforms/LoopUnroll/unroll-opt-attribute.ll
index f4a76c71368..2f0eb756fb6 100644
--- a/llvm/test/Transforms/LoopUnroll/unroll-opt-attribute.ll
+++ b/llvm/test/Transforms/LoopUnroll/unroll-opt-attribute.ll
@@ -1,5 +1,7 @@
; RUN: opt < %s -S -loop-unroll -unroll-count=4 | FileCheck -check-prefix=CHECK_COUNT4 %s
; RUN: opt < %s -S -loop-unroll | FileCheck -check-prefix=CHECK_NOCOUNT %s
+; RUN: opt < %s -S -passes='require<profile-summary>,function(unroll)' -pgso | FileCheck -check-prefix=PGSO %s
+; RUN: opt < %s -S -passes='require<profile-summary>,function(unroll)' -pgso=false | FileCheck -check-prefix=NPGSO %s
;///////////////////// TEST 1 //////////////////////////////
@@ -128,3 +130,47 @@ for.end: ; preds = %for.body
; CHECK_NOCOUNT-LABEL: @Test4
; CHECK_NOCOUNT: phi
; CHECK_NOCOUNT: icmp
+
+;///////////////////// TEST 5 //////////////////////////////
+
+; This test shows that with PGO, this loop is cold and not unrolled.
+
+define i32 @Test5() !prof !14 {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %i.05 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %arrayidx = getelementptr inbounds [24 x i32], [24 x i32]* @tab, i32 0, i32 %i.05
+ store i32 %i.05, i32* %arrayidx, align 4
+ %inc = add nuw nsw i32 %i.05, 1
+ %exitcond = icmp eq i32 %inc, 24
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret i32 42
+}
+
+; PGSO-LABEL: @Test5
+; PGSO: phi
+; PGSO: icmp
+; NPGSO-LABEL: @Test5
+; NPGSO-NOT: phi
+; NPGSO-NOT: icmp
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"ProfileSummary", !1}
+!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
+!2 = !{!"ProfileFormat", !"InstrProf"}
+!3 = !{!"TotalCount", i64 10000}
+!4 = !{!"MaxCount", i64 10}
+!5 = !{!"MaxInternalCount", i64 1}
+!6 = !{!"MaxFunctionCount", i64 1000}
+!7 = !{!"NumCounts", i64 3}
+!8 = !{!"NumFunctions", i64 3}
+!9 = !{!"DetailedSummary", !10}
+!10 = !{!11, !12, !13}
+!11 = !{i32 10000, i64 100, i32 1}
+!12 = !{i32 999000, i64 100, i32 1}
+!13 = !{i32 999999, i64 1, i32 2}
+!14 = !{!"function_entry_count", i64 0}
diff --git a/llvm/test/Transforms/LoopVectorize/optsize.ll b/llvm/test/Transforms/LoopVectorize/optsize.ll
index 513657cd372..403c006eeb5 100644
--- a/llvm/test/Transforms/LoopVectorize/optsize.ll
+++ b/llvm/test/Transforms/LoopVectorize/optsize.ll
@@ -2,6 +2,8 @@
; loop with the optimize for size or the minimize size attributes.
; REQUIRES: asserts
; RUN: opt < %s -loop-vectorize -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -pgso -S | FileCheck %s -check-prefix=PGSO
+; RUN: opt < %s -loop-vectorize -pgso=false -S | FileCheck %s -check-prefix=NPGSO
target datalayout = "E-m:e-p:32:32-i64:32-f64:32:64-a:0:32-n32-S128"
@@ -36,6 +38,7 @@ define i32 @foo_minsize() #1 {
; CHECK-LABEL: @foo_minsize(
; CHECK-NOT: <2 x i8>
; CHECK-NOT: <4 x i8>
+; CHECK-LABEL: @foo_pgso(
entry:
br label %for.body
@@ -57,3 +60,43 @@ for.end: ; preds = %for.body
attributes #1 = { minsize }
+define i32 @foo_pgso() !prof !14 {
+; PGSO-LABEL: @foo_pgso(
+; PGSO-NOT: <{{[0-9]+}} x i8>
+; NPGSO-LABEL: @foo_pgso(
+; NPGSO: <{{[0-9]+}} x i8>
+
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
+ %0 = load i8, i8* %arrayidx, align 1
+ %cmp1 = icmp eq i8 %0, 0
+ %. = select i1 %cmp1, i8 2, i8 1
+ store i8 %., i8* %arrayidx, align 1
+ %inc = add nsw i32 %i.08, 1
+ %exitcond = icmp eq i32 %i.08, 202
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret i32 0
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"ProfileSummary", !1}
+!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
+!2 = !{!"ProfileFormat", !"InstrProf"}
+!3 = !{!"TotalCount", i64 10000}
+!4 = !{!"MaxCount", i64 10}
+!5 = !{!"MaxInternalCount", i64 1}
+!6 = !{!"MaxFunctionCount", i64 1000}
+!7 = !{!"NumCounts", i64 3}
+!8 = !{!"NumFunctions", i64 3}
+!9 = !{!"DetailedSummary", !10}
+!10 = !{!11, !12, !13}
+!11 = !{i32 10000, i64 100, i32 1}
+!12 = !{i32 999000, i64 100, i32 1}
+!13 = !{i32 999999, i64 1, i32 2}
+!14 = !{!"function_entry_count", i64 0}
OpenPOWER on IntegriCloud