diff options
author | Hiroshi Yamauchi <yamauchi@google.com> | 2019-04-15 16:49:00 +0000 |
---|---|---|
committer | Hiroshi Yamauchi <yamauchi@google.com> | 2019-04-15 16:49:00 +0000 |
commit | 09e539fcaebb6362795d352cdcf4a818cf4d0d6a (patch) | |
tree | a91b7d07d85c67b737db8e49e3f62f95dd512898 /llvm/test | |
parent | 64041d7b90714aa6f0542c64ec5bb327beecfc58 (diff) | |
download | bcm5719-llvm-09e539fcaebb6362795d352cdcf4a818cf4d0d6a.tar.gz bcm5719-llvm-09e539fcaebb6362795d352cdcf4a818cf4d0d6a.zip |
[PGO] Profile guided code size optimization.
Summary:
Enable some of the existing size optimizations for cold code under PGO.
A ~5% code size saving in big internal app under PGO.
The way it gets BFI/PSI is discussed in the RFC thread
http://lists.llvm.org/pipermail/llvm-dev/2019-March/130894.html
Note it doesn't currently touch loop passes.
Reviewers: davidxl, eraman
Reviewed By: eraman
Subscribers: mgorny, javed.absar, smeenai, mehdi_amini, eraman, zzheng, steven_wu, dexonsmith, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59514
llvm-svn: 358422
Diffstat (limited to 'llvm/test')
-rw-r--r-- | llvm/test/Other/new-pm-defaults.ll | 2 | ||||
-rw-r--r-- | llvm/test/Other/new-pm-lto-defaults.ll | 1 | ||||
-rw-r--r-- | llvm/test/Other/new-pm-thinlto-defaults.ll | 2 | ||||
-rw-r--r-- | llvm/test/Other/opt-O2-pipeline.ll | 2 | ||||
-rw-r--r-- | llvm/test/Other/opt-O3-pipeline.ll | 2 | ||||
-rw-r--r-- | llvm/test/Other/opt-Os-pipeline.ll | 2 | ||||
-rw-r--r-- | llvm/test/Transforms/ConstantHoisting/ARM/const-addr-no-neg-offset.ll | 59 | ||||
-rw-r--r-- | llvm/test/Transforms/InstCombine/fputs-opt-size.ll | 33 | ||||
-rw-r--r-- | llvm/test/Transforms/LoopLoadElim/opt-size.ll | 53 | ||||
-rw-r--r-- | llvm/test/Transforms/LoopUnroll/unroll-opt-attribute.ll | 46 | ||||
-rw-r--r-- | llvm/test/Transforms/LoopVectorize/optsize.ll | 43 |
11 files changed, 243 insertions, 2 deletions
diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll index 7ca22bf8882..317bffcefdc 100644 --- a/llvm/test/Other/new-pm-defaults.ll +++ b/llvm/test/Other/new-pm-defaults.ll @@ -106,6 +106,7 @@ ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis ; CHECK-O-NEXT: Running analysis: AAManager +; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy ; CHECK-EP-PEEPHOLE-NEXT: Running pass: NoOpFunctionPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Finished llvm::Function pass manager run. @@ -245,7 +246,6 @@ ; CHECK-O-NEXT: Running pass: SLPVectorizerPass ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O-NEXT: Running pass: LoopUnrollPass -; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: WarnMissedTransformationsPass ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}OptimizationRemarkEmitterAnalysis diff --git a/llvm/test/Other/new-pm-lto-defaults.ll b/llvm/test/Other/new-pm-lto-defaults.ll index a3eaa1e856a..1128ae3a850 100644 --- a/llvm/test/Other/new-pm-lto-defaults.ll +++ b/llvm/test/Other/new-pm-lto-defaults.ll @@ -69,6 +69,7 @@ ; CHECK-O2-NEXT: Starting llvm::Function pass manager run. ; CHECK-O3-NEXT: Running pass: AggressiveInstCombinePass ; CHECK-O2-NEXT: Running pass: InstCombinePass +; CHECK-O2-NEXT: Running analysis: OuterAnalysisManagerProxy ; CHECK-EP-Peephole-NEXT: Running pass: NoOpFunctionPass ; CHECK-O2-NEXT: Finished llvm::Function pass manager run. ; CHECK-O2-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}InlinerPass> diff --git a/llvm/test/Other/new-pm-thinlto-defaults.ll b/llvm/test/Other/new-pm-thinlto-defaults.ll index 9ad383a4e3b..079cea9255e 100644 --- a/llvm/test/Other/new-pm-thinlto-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-defaults.ll @@ -88,6 +88,7 @@ ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-PRELINK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis ; CHECK-O-NEXT: Running analysis: AAManager +; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Finished llvm::Function pass manager run. ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA @@ -219,7 +220,6 @@ ; CHECK-POSTLINK-O-NEXT: Running pass: SLPVectorizerPass ; CHECK-POSTLINK-O-NEXT: Running pass: InstCombinePass ; CHECK-POSTLINK-O-NEXT: Running pass: LoopUnrollPass -; CHECK-POSTLINK-O-NEXT: Running analysis: OuterAnalysisManagerProxy ; CHECK-POSTLINK-O-NEXT: Running pass: WarnMissedTransformationsPass ; CHECK-POSTLINK-O-NEXT: Running pass: InstCombinePass ; CHECK-POSTLINK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}OptimizationRemarkEmitterAnalysis diff --git a/llvm/test/Other/opt-O2-pipeline.ll b/llvm/test/Other/opt-O2-pipeline.ll index 1e48c86d16d..ca12d9ad663 100644 --- a/llvm/test/Other/opt-O2-pipeline.ll +++ b/llvm/test/Other/opt-O2-pipeline.ll @@ -214,6 +214,8 @@ ; CHECK-NEXT: Scalar Evolution Analysis ; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Loop Access Analysis +; CHECK-NEXT: Lazy Branch Probability Analysis +; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Loop Load Elimination ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results diff --git a/llvm/test/Other/opt-O3-pipeline.ll b/llvm/test/Other/opt-O3-pipeline.ll index 1eb79159d0d..864b748df1b 100644 --- a/llvm/test/Other/opt-O3-pipeline.ll +++ b/llvm/test/Other/opt-O3-pipeline.ll @@ -219,6 +219,8 @@ ; CHECK-NEXT: Scalar Evolution Analysis ; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Loop Access Analysis +; CHECK-NEXT: Lazy Branch Probability Analysis +; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Loop Load Elimination ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results diff --git a/llvm/test/Other/opt-Os-pipeline.ll b/llvm/test/Other/opt-Os-pipeline.ll index c44f6506c4e..a0240ac2977 100644 --- a/llvm/test/Other/opt-Os-pipeline.ll +++ b/llvm/test/Other/opt-Os-pipeline.ll @@ -201,6 +201,8 @@ ; CHECK-NEXT: Scalar Evolution Analysis ; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Loop Access Analysis +; CHECK-NEXT: Lazy Branch Probability Analysis +; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Loop Load Elimination ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results diff --git a/llvm/test/Transforms/ConstantHoisting/ARM/const-addr-no-neg-offset.ll b/llvm/test/Transforms/ConstantHoisting/ARM/const-addr-no-neg-offset.ll index 6af2bb1d8ac..80428ad1cde 100644 --- a/llvm/test/Transforms/ConstantHoisting/ARM/const-addr-no-neg-offset.ll +++ b/llvm/test/Transforms/ConstantHoisting/ARM/const-addr-no-neg-offset.ll @@ -1,4 +1,6 @@ ; RUN: opt -mtriple=arm-arm-none-eabi -consthoist -S < %s | FileCheck %s +; RUN: opt -mtriple=arm-arm-none-eabi -consthoist -pgso -S < %s | FileCheck %s -check-prefix=PGSO +; RUN: opt -mtriple=arm-arm-none-eabi -consthoist -pgso=false -S < %s | FileCheck %s -check-prefix=NPGSO ; There are different candidates here for the base constant: 1073876992 and ; 1073876996. But we don't want to see the latter because it results in @@ -8,6 +10,7 @@ define void @foo() #0 { entry: ; CHECK-LABEL: @foo ; CHECK-NOT: [[CONST1:%const_mat[0-9]*]] = add i32 %const, -4 +; CHECK-LABEL: @foo_pgso %0 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096 %or = or i32 %0, 1 store volatile i32 %or, i32* inttoptr (i32 1073876992 to i32*), align 4096 @@ -40,3 +43,59 @@ entry: } attributes #0 = { minsize norecurse nounwind optsize readnone uwtable } + +define void @foo_pgso() #1 !prof !14 { +entry: +; PGSO-LABEL: @foo_pgso +; PGSO-NOT: [[CONST2:%const_mat[0-9]*]] = add i32 %const, -4 +; NPGSO-LABEL: @foo_pgso +; NPGSO: [[CONST2:%const_mat[0-9]*]] = add i32 %const, -4 + %0 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096 + %or = or i32 %0, 1 + store volatile i32 %or, i32* inttoptr (i32 1073876992 to i32*), align 4096 + %1 = load volatile i32, i32* inttoptr (i32 1073876996 to i32*), align 4 + %and = and i32 %1, -117506048 + store volatile i32 %and, i32* inttoptr (i32 1073876996 to i32*), align 4 + %2 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096 + %and1 = and i32 %2, -17367041 + store volatile i32 %and1, i32* inttoptr (i32 1073876996 to i32*), align 4096 + %3 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096 + %and2 = and i32 %3, -262145 + store volatile i32 %and2, i32* inttoptr (i32 1073876992 to i32*), align 4096 + %4 = load volatile i32, i32* inttoptr (i32 1073876996 to i32*), align 4 + %and3 = and i32 %4, -8323073 + store volatile i32 %and3, i32* inttoptr (i32 1073876996 to i32*), align 4 + store volatile i32 10420224, i32* inttoptr (i32 1073877000 to i32*), align 8 + %5 = load volatile i32, i32* inttoptr (i32 1073876996 to i32*), align 4096 + %or4 = or i32 %5, 65536 + store volatile i32 %or4, i32* inttoptr (i32 1073876996 to i32*), align 4096 + %6 = load volatile i32, i32* inttoptr (i32 1073881088 to i32*), align 8192 + %or6.i.i = or i32 %6, 16 + store volatile i32 %or6.i.i, i32* inttoptr (i32 1073881088 to i32*), align 8192 + %7 = load volatile i32, i32* inttoptr (i32 1073881088 to i32*), align 8192 + %and7.i.i = and i32 %7, -4 + store volatile i32 %and7.i.i, i32* inttoptr (i32 1073881088 to i32*), align 8192 + %8 = load volatile i32, i32* inttoptr (i32 1073881088 to i32*), align 8192 + %or8.i.i = or i32 %8, 2 + store volatile i32 %or8.i.i, i32* inttoptr (i32 1073881088 to i32*), align 8192 + ret void +} + +attributes #1 = { norecurse nounwind readnone uwtable } ; no optsize or minsize + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"ProfileSummary", !1} +!1 = !{!2, !3, !4, !5, !6, !7, !8, !9} +!2 = !{!"ProfileFormat", !"InstrProf"} +!3 = !{!"TotalCount", i64 10000} +!4 = !{!"MaxCount", i64 10} +!5 = !{!"MaxInternalCount", i64 1} +!6 = !{!"MaxFunctionCount", i64 1000} +!7 = !{!"NumCounts", i64 3} +!8 = !{!"NumFunctions", i64 3} +!9 = !{!"DetailedSummary", !10} +!10 = !{!11, !12, !13} +!11 = !{i32 10000, i64 100, i32 1} +!12 = !{i32 999000, i64 100, i32 1} +!13 = !{i32 999999, i64 1, i32 2} +!14 = !{!"function_entry_count", i64 0} diff --git a/llvm/test/Transforms/InstCombine/fputs-opt-size.ll b/llvm/test/Transforms/InstCombine/fputs-opt-size.ll index ea8ef4203e9..54ac96f9f86 100644 --- a/llvm/test/Transforms/InstCombine/fputs-opt-size.ll +++ b/llvm/test/Transforms/InstCombine/fputs-opt-size.ll @@ -2,6 +2,8 @@ ; because it requires more arguments and thus extra MOVs are required. ; ; RUN: opt < %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -pgso -S | FileCheck %s -check-prefix=PGSO +; RUN: opt < %s -instcombine -pgso=false -S | FileCheck %s -check-prefix=NPGSO %struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i32, i32, [40 x i8] } %struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } @@ -26,3 +28,34 @@ declare i32 @fputs(i8* nocapture readonly, %struct._IO_FILE* nocapture) local_un attributes #0 = { nounwind optsize } attributes #1 = { nounwind optsize } + +define i32 @main_pgso() local_unnamed_addr !prof !14 { +entry: +; PGSO-LABEL: @main_pgso( +; PGSO-NOT: call i64 @fwrite +; PGSO: call i32 @fputs +; NPGSO-LABEL: @main_pgso( +; NPGSO: call i64 @fwrite +; NPGSO-NOT: call i32 @fputs + + %call = tail call %struct._IO_FILE* @fopen(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i32 0, i32 0)) #2 + %call1 = tail call i32 @fputs(i8* getelementptr inbounds ([27 x i8], [27 x i8]* @.str.2, i32 0, i32 0), %struct._IO_FILE* %call) #2 + ret i32 0 +} + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"ProfileSummary", !1} +!1 = !{!2, !3, !4, !5, !6, !7, !8, !9} +!2 = !{!"ProfileFormat", !"InstrProf"} +!3 = !{!"TotalCount", i64 10000} +!4 = !{!"MaxCount", i64 10} +!5 = !{!"MaxInternalCount", i64 1} +!6 = !{!"MaxFunctionCount", i64 1000} +!7 = !{!"NumCounts", i64 3} +!8 = !{!"NumFunctions", i64 3} +!9 = !{!"DetailedSummary", !10} +!10 = !{!11, !12, !13} +!11 = !{i32 10000, i64 100, i32 1} +!12 = !{i32 999000, i64 100, i32 1} +!13 = !{i32 999999, i64 1, i32 2} +!14 = !{!"function_entry_count", i64 0} diff --git a/llvm/test/Transforms/LoopLoadElim/opt-size.ll b/llvm/test/Transforms/LoopLoadElim/opt-size.ll index a6322d874f3..f9d82fec2a2 100644 --- a/llvm/test/Transforms/LoopLoadElim/opt-size.ll +++ b/llvm/test/Transforms/LoopLoadElim/opt-size.ll @@ -1,4 +1,6 @@ ; RUN: opt -basicaa -loop-load-elim -S < %s | FileCheck %s +; RUN: opt -basicaa -loop-load-elim -pgso -S < %s | FileCheck %s -check-prefix=PGSO +; RUN: opt -basicaa -loop-load-elim -pgso=false -S < %s | FileCheck %s -check-prefix=NPGSO ; When optimizing for size don't eliminate in this loop because the loop would ; have to be versioned first because A and C may alias. @@ -74,3 +76,54 @@ for.body: ; preds = %for.body, %entry for.end: ; preds = %for.body ret void } + + +; PGSO-LABEL: @f_pgso( +; NPGSO-LABEL: @f_pgso( +define void @f_pgso(i32* %A, i32* %B, i32* %C, i64 %N) !prof !14 { + +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + + %Aidx_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next + %Bidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv + %Cidx = getelementptr inbounds i32, i32* %C, i64 %indvars.iv + %Aidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + + %b = load i32, i32* %Bidx, align 4 + %a_p1 = add i32 %b, 2 + store i32 %a_p1, i32* %Aidx_next, align 4 + + %a = load i32, i32* %Aidx, align 4 +; PGSO: %c = mul i32 %a, 2 +; NPGSO-NOT: %c = mul i32 %a, 2 + %c = mul i32 %a, 2 + store i32 %c, i32* %Cidx, align 4 + + %exitcond = icmp eq i64 %indvars.iv.next, %N + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"ProfileSummary", !1} +!1 = !{!2, !3, !4, !5, !6, !7, !8, !9} +!2 = !{!"ProfileFormat", !"InstrProf"} +!3 = !{!"TotalCount", i64 10000} +!4 = !{!"MaxCount", i64 10} +!5 = !{!"MaxInternalCount", i64 1} +!6 = !{!"MaxFunctionCount", i64 1000} +!7 = !{!"NumCounts", i64 3} +!8 = !{!"NumFunctions", i64 3} +!9 = !{!"DetailedSummary", !10} +!10 = !{!11, !12, !13} +!11 = !{i32 10000, i64 100, i32 1} +!12 = !{i32 999000, i64 100, i32 1} +!13 = !{i32 999999, i64 1, i32 2} +!14 = !{!"function_entry_count", i64 0} diff --git a/llvm/test/Transforms/LoopUnroll/unroll-opt-attribute.ll b/llvm/test/Transforms/LoopUnroll/unroll-opt-attribute.ll index f4a76c71368..2f0eb756fb6 100644 --- a/llvm/test/Transforms/LoopUnroll/unroll-opt-attribute.ll +++ b/llvm/test/Transforms/LoopUnroll/unroll-opt-attribute.ll @@ -1,5 +1,7 @@ ; RUN: opt < %s -S -loop-unroll -unroll-count=4 | FileCheck -check-prefix=CHECK_COUNT4 %s
; RUN: opt < %s -S -loop-unroll | FileCheck -check-prefix=CHECK_NOCOUNT %s
+; RUN: opt < %s -S -passes='require<profile-summary>,function(unroll)' -pgso | FileCheck -check-prefix=PGSO %s
+; RUN: opt < %s -S -passes='require<profile-summary>,function(unroll)' -pgso=false | FileCheck -check-prefix=NPGSO %s
;///////////////////// TEST 1 //////////////////////////////
@@ -128,3 +130,47 @@ for.end: ; preds = %for.body ; CHECK_NOCOUNT-LABEL: @Test4
; CHECK_NOCOUNT: phi
; CHECK_NOCOUNT: icmp
+
+;///////////////////// TEST 5 //////////////////////////////
+
+; This test shows that with PGO, this loop is cold and not unrolled.
+
+define i32 @Test5() !prof !14 {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %i.05 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %arrayidx = getelementptr inbounds [24 x i32], [24 x i32]* @tab, i32 0, i32 %i.05
+ store i32 %i.05, i32* %arrayidx, align 4
+ %inc = add nuw nsw i32 %i.05, 1
+ %exitcond = icmp eq i32 %inc, 24
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret i32 42
+}
+
+; PGSO-LABEL: @Test5
+; PGSO: phi
+; PGSO: icmp
+; NPGSO-LABEL: @Test5
+; NPGSO-NOT: phi
+; NPGSO-NOT: icmp
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"ProfileSummary", !1}
+!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
+!2 = !{!"ProfileFormat", !"InstrProf"}
+!3 = !{!"TotalCount", i64 10000}
+!4 = !{!"MaxCount", i64 10}
+!5 = !{!"MaxInternalCount", i64 1}
+!6 = !{!"MaxFunctionCount", i64 1000}
+!7 = !{!"NumCounts", i64 3}
+!8 = !{!"NumFunctions", i64 3}
+!9 = !{!"DetailedSummary", !10}
+!10 = !{!11, !12, !13}
+!11 = !{i32 10000, i64 100, i32 1}
+!12 = !{i32 999000, i64 100, i32 1}
+!13 = !{i32 999999, i64 1, i32 2}
+!14 = !{!"function_entry_count", i64 0}
diff --git a/llvm/test/Transforms/LoopVectorize/optsize.ll b/llvm/test/Transforms/LoopVectorize/optsize.ll index 513657cd372..403c006eeb5 100644 --- a/llvm/test/Transforms/LoopVectorize/optsize.ll +++ b/llvm/test/Transforms/LoopVectorize/optsize.ll @@ -2,6 +2,8 @@ ; loop with the optimize for size or the minimize size attributes. ; REQUIRES: asserts ; RUN: opt < %s -loop-vectorize -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -pgso -S | FileCheck %s -check-prefix=PGSO +; RUN: opt < %s -loop-vectorize -pgso=false -S | FileCheck %s -check-prefix=NPGSO target datalayout = "E-m:e-p:32:32-i64:32-f64:32:64-a:0:32-n32-S128" @@ -36,6 +38,7 @@ define i32 @foo_minsize() #1 { ; CHECK-LABEL: @foo_minsize( ; CHECK-NOT: <2 x i8> ; CHECK-NOT: <4 x i8> +; CHECK-LABEL: @foo_pgso( entry: br label %for.body @@ -57,3 +60,43 @@ for.end: ; preds = %for.body attributes #1 = { minsize } +define i32 @foo_pgso() !prof !14 { +; PGSO-LABEL: @foo_pgso( +; PGSO-NOT: <{{[0-9]+}} x i8> +; NPGSO-LABEL: @foo_pgso( +; NPGSO: <{{[0-9]+}} x i8> + +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08 + %0 = load i8, i8* %arrayidx, align 1 + %cmp1 = icmp eq i8 %0, 0 + %. = select i1 %cmp1, i8 2, i8 1 + store i8 %., i8* %arrayidx, align 1 + %inc = add nsw i32 %i.08, 1 + %exitcond = icmp eq i32 %i.08, 202 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret i32 0 +} + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"ProfileSummary", !1} +!1 = !{!2, !3, !4, !5, !6, !7, !8, !9} +!2 = !{!"ProfileFormat", !"InstrProf"} +!3 = !{!"TotalCount", i64 10000} +!4 = !{!"MaxCount", i64 10} +!5 = !{!"MaxInternalCount", i64 1} +!6 = !{!"MaxFunctionCount", i64 1000} +!7 = !{!"NumCounts", i64 3} +!8 = !{!"NumFunctions", i64 3} +!9 = !{!"DetailedSummary", !10} +!10 = !{!11, !12, !13} +!11 = !{i32 10000, i64 100, i32 1} +!12 = !{i32 999000, i64 100, i32 1} +!13 = !{i32 999999, i64 1, i32 2} +!14 = !{!"function_entry_count", i64 0} |