summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorZaara Syeda <syzaara@ca.ibm.com>2018-01-17 18:22:55 +0000
committerZaara Syeda <syzaara@ca.ibm.com>2018-01-17 18:22:55 +0000
commit8e951fd2f6267275becabef2f7ad973a2af0e6a8 (patch)
tree98e6d8baf3a845f2bf255c9545c2a66fece7dc88 /llvm/test
parent2686e3cac64a7f9b3b7f7ace0abf226b149a7ad9 (diff)
downloadbcm5719-llvm-8e951fd2f6267275becabef2f7ad973a2af0e6a8.tar.gz
bcm5719-llvm-8e951fd2f6267275becabef2f7ad973a2af0e6a8.zip
[PowerPC] Add handling for ColdCC calling convention and a pass to mark
candidates with coldcc attribute. This patch adds support for the coldcc calling convention for Power. This changes the set of non-volatile registers. It includes a pass to stress test the implementation by marking all static directly called functions with the coldcc attribute through the option -enable-coldcc-stress-test. It also includes an option, -ppc-enable-coldcc, to add the coldcc attribute to functions which are cold at all call sites based on BlockFrequencyInfo when the containing function does not call any non cold functions. Differential Revision: https://reviews.llvm.org/D38413 llvm-svn: 322721
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/PowerPC/coldcc.ll46
-rw-r--r--llvm/test/CodeGen/PowerPC/coldcc2.ll42
-rw-r--r--llvm/test/Other/pass-pipelines.ll2
-rw-r--r--llvm/test/Transforms/GlobalOpt/PowerPC/coldcc_coldsites.ll81
-rw-r--r--llvm/test/Transforms/GlobalOpt/PowerPC/lit.local.cfg3
-rw-r--r--llvm/test/Transforms/GlobalOpt/coldcc_stress_test.ll48
6 files changed, 221 insertions, 1 deletions
diff --git a/llvm/test/CodeGen/PowerPC/coldcc.ll b/llvm/test/CodeGen/PowerPC/coldcc.ll
new file mode 100644
index 00000000000..056e944321f
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/coldcc.ll
@@ -0,0 +1,46 @@
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s -check-prefix=COLDCC
+
+define signext i32 @caller(i32 signext %a, i32 signext %b, i32 signext %cold) {
+entry:
+ %0 = tail call i32 asm "add $0, $1, $2", "=r,r,r,~{r14},~{r15},~{r16},~{r17},~{r18},~{r19},~{r20},~{r21},~{r22},~{r23},~{r24},~{r25},~{r26},~{r27},~{r28},~{r29},~{r30},~{r31}"(i32 %a, i32 %b)
+ %mul = mul nsw i32 %0, %cold
+ %tobool = icmp eq i32 %cold, 0
+ br i1 %tobool, label %if.end, label %if.then
+
+if.then: ; preds = %entry
+ %mul1 = mul nsw i32 %mul, %cold
+ %mul2 = mul nsw i32 %b, %a
+ %call = tail call coldcc signext i32 @callee(i32 signext %a, i32 signext %b)
+ %add = add i32 %mul2, %a
+ %add3 = add i32 %add, %mul
+ %add4 = add i32 %add3, %mul1
+ %add5 = add i32 %add4, %call
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ %f.0 = phi i32 [ %add5, %if.then ], [ %0, %entry ]
+ ret i32 %f.0
+}
+
+define internal coldcc signext i32 @callee(i32 signext %a, i32 signext %b) local_unnamed_addr #0 {
+entry:
+; COLDCC: @callee
+; COLDCC: std 6, -8(1)
+; COLDCC: std 7, -16(1)
+; COLDCC: std 8, -24(1)
+; COLDCC: std 9, -32(1)
+; COLDCC: std 10, -40(1)
+; COLDCC: ld 9, -32(1)
+; COLDCC: ld 8, -24(1)
+; COLDCC: ld 7, -16(1)
+; COLDCC: ld 10, -40(1)
+; COLDCC: ld 6, -8(1)
+ %0 = tail call i32 asm "add $0, $1, $2", "=r,r,r,~{r6},~{r7},~{r8},~{r9},~{r10}"(i32 %a, i32 %b)
+ %mul = mul nsw i32 %a, 3
+ %1 = mul i32 %b, -5
+ %add = add i32 %1, %mul
+ %sub = add i32 %add, %0
+ ret i32 %sub
+}
+
+attributes #0 = { noinline }
diff --git a/llvm/test/CodeGen/PowerPC/coldcc2.ll b/llvm/test/CodeGen/PowerPC/coldcc2.ll
new file mode 100644
index 00000000000..315198fca85
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/coldcc2.ll
@@ -0,0 +1,42 @@
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s -check-prefix=COLDCC
+
+%struct.MyStruct = type { i32, i32, i32, i32 }
+
+@caller.s = internal unnamed_addr global %struct.MyStruct zeroinitializer, align 8
+
+define signext i32 @caller(i32 signext %a, i32 signext %b, i32 signext %cold) {
+entry:
+; COLDCC: bl callee
+; COLDCC: ld 4, 40(1)
+; COLDCC: ld 5, 32(1)
+ %call = tail call coldcc { i64, i64 } @callee(i32 signext %a, i32 signext %b)
+ %0 = extractvalue { i64, i64 } %call, 0
+ %1 = extractvalue { i64, i64 } %call, 1
+ store i64 %0, i64* bitcast (%struct.MyStruct* @caller.s to i64*), align 8
+ store i64 %1, i64* bitcast (i32* getelementptr inbounds (%struct.MyStruct, %struct.MyStruct* @caller.s, i64 0, i32 2) to i64*), align 8
+ %2 = lshr i64 %1, 32
+ %3 = trunc i64 %2 to i32
+ %sub = sub nsw i32 0, %3
+ ret i32 %sub
+}
+
+define internal coldcc { i64, i64 } @callee(i32 signext %a, i32 signext %b) {
+entry:
+; COLDCC: std {{[0-9]+}}, 0(3)
+; COLDCC: std {{[0-9]+}}, 8(3)
+ %0 = tail call i32 asm "add $0, $1, $2", "=r,r,r,~{r6},~{r7},~{r8},~{r9},~{r10}"(i32 %a, i32 %b)
+ %mul = mul nsw i32 %a, 3
+ %1 = mul i32 %b, -5
+ %add = add i32 %1, %mul
+ %sub = add i32 %add, %0
+ %mul5 = mul nsw i32 %b, %a
+ %add6 = add nsw i32 %sub, %mul5
+ %retval.sroa.0.0.insert.ext = zext i32 %0 to i64
+ %retval.sroa.3.8.insert.ext = zext i32 %sub to i64
+ %retval.sroa.3.12.insert.ext = zext i32 %add6 to i64
+ %retval.sroa.3.12.insert.shift = shl nuw i64 %retval.sroa.3.12.insert.ext, 32
+ %retval.sroa.3.12.insert.insert = or i64 %retval.sroa.3.12.insert.shift, %retval.sroa.3.8.insert.ext
+ %.fca.0.insert = insertvalue { i64, i64 } undef, i64 %retval.sroa.0.0.insert.ext, 0
+ %.fca.1.insert = insertvalue { i64, i64 } %.fca.0.insert, i64 %retval.sroa.3.12.insert.insert, 1
+ ret { i64, i64 } %.fca.1.insert
+}
diff --git a/llvm/test/Other/pass-pipelines.ll b/llvm/test/Other/pass-pipelines.ll
index 9e5176eddaa..dddf1338a4d 100644
--- a/llvm/test/Other/pass-pipelines.ll
+++ b/llvm/test/Other/pass-pipelines.ll
@@ -93,7 +93,7 @@
; FIXME: There really shouldn't be another pass manager, especially one that
; just builds the domtree. It doesn't even run the verifier.
; CHECK-O2: Pass Arguments:
-; CHECK-O2-NEXT: FunctionPass Manager
+; CHECK-O2: FunctionPass Manager
; CHECK-O2-NEXT: Dominator Tree Construction
define void @foo() {
diff --git a/llvm/test/Transforms/GlobalOpt/PowerPC/coldcc_coldsites.ll b/llvm/test/Transforms/GlobalOpt/PowerPC/coldcc_coldsites.ll
new file mode 100644
index 00000000000..8fedf834f40
--- /dev/null
+++ b/llvm/test/Transforms/GlobalOpt/PowerPC/coldcc_coldsites.ll
@@ -0,0 +1,81 @@
+; RUN: opt -globalopt -mtriple=powerpc64le-unknown-linux-gnu -ppc-enable-coldcc -S < %s | FileCheck %s -check-prefix=COLDCC
+; RUN: opt -globalopt -S < %s | FileCheck %s -check-prefix=CHECK
+
+define signext i32 @caller(i32 signext %a, i32 signext %b, i32 signext %lim, i32 signext %i) local_unnamed_addr #0 !prof !30 {
+entry:
+; COLDCC: call coldcc signext i32 @callee
+; CHECK: call fastcc signext i32 @callee
+ %add = add nsw i32 %b, %a
+ %sub = add nsw i32 %lim, -1
+ %cmp = icmp eq i32 %sub, %i
+ br i1 %cmp, label %if.then, label %if.end, !prof !31
+
+if.then: ; preds = %entry
+ %call = tail call signext i32 @callee(i32 signext %a, i32 signext %b)
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ %f.0 = phi i32 [ %call, %if.then ], [ %add, %entry ]
+ ret i32 %f.0
+}
+
+define internal signext i32 @callee(i32 signext %a, i32 signext %b) unnamed_addr #0 {
+entry:
+ %0 = tail call i32 asm "add $0, $1, $2", "=r,r,r,~{r6},~{r7},~{r8},~{r9}"(i32 %a, i32 %b) #1, !srcloc !32
+ %mul = mul nsw i32 %a, 3
+ %mul1 = shl i32 %0, 1
+ %add = add nsw i32 %mul1, %mul
+ ret i32 %add
+}
+
+define signext i32 @main() local_unnamed_addr #0 !prof !33 {
+entry:
+ br label %for.body
+
+for.cond.cleanup: ; preds = %for.body
+ %add.lcssa = phi i32 [ %add, %for.body ]
+ ret i32 %add.lcssa
+
+for.body: ; preds = %for.body, %entry
+ %i.011 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %ret.010 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+ %call = tail call signext i32 @caller(i32 signext 4, i32 signext 5, i32 signext 10000000, i32 signext %i.011)
+ %add = add nsw i32 %call, %ret.010
+ %inc = add nuw nsw i32 %i.011, 1
+ %exitcond = icmp eq i32 %inc, 10000000
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body, !prof !34
+}
+attributes #0 = { noinline }
+
+!0 = !{i32 1, !"ProfileSummary", !1}
+!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
+!2 = !{!"ProfileFormat", !"InstrProf"}
+!3 = !{!"TotalCount", i64 20000003}
+!4 = !{!"MaxCount", i64 10000000}
+!5 = !{!"MaxInternalCount", i64 10000000}
+!6 = !{!"MaxFunctionCount", i64 10000000}
+!7 = !{!"NumCounts", i64 5}
+!8 = !{!"NumFunctions", i64 3}
+!9 = !{!"DetailedSummary", !10}
+!10 = !{!11, !12, !13, !14, !15, !16, !16, !17, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26}
+!11 = !{i32 10000, i64 10000000, i32 2}
+!12 = !{i32 100000, i64 10000000, i32 2}
+!13 = !{i32 200000, i64 10000000, i32 2}
+!14 = !{i32 300000, i64 10000000, i32 2}
+!15 = !{i32 400000, i64 10000000, i32 2}
+!16 = !{i32 500000, i64 10000000, i32 2}
+!17 = !{i32 600000, i64 10000000, i32 2}
+!18 = !{i32 700000, i64 10000000, i32 2}
+!19 = !{i32 800000, i64 10000000, i32 2}
+!20 = !{i32 900000, i64 10000000, i32 2}
+!21 = !{i32 950000, i64 10000000, i32 2}
+!22 = !{i32 990000, i64 10000000, i32 2}
+!23 = !{i32 999000, i64 10000000, i32 2}
+!24 = !{i32 999900, i64 10000000, i32 2}
+!25 = !{i32 999990, i64 10000000, i32 2}
+!26 = !{i32 999999, i64 10000000, i32 2}
+!30 = !{!"function_entry_count", i64 10000000}
+!31 = !{!"branch_weights", i32 2, i32 10000000}
+!32 = !{i32 59}
+!33 = !{!"function_entry_count", i64 1}
+!34 = !{!"branch_weights", i32 2, i32 10000001}
diff --git a/llvm/test/Transforms/GlobalOpt/PowerPC/lit.local.cfg b/llvm/test/Transforms/GlobalOpt/PowerPC/lit.local.cfg
new file mode 100644
index 00000000000..5d33887ff0a
--- /dev/null
+++ b/llvm/test/Transforms/GlobalOpt/PowerPC/lit.local.cfg
@@ -0,0 +1,3 @@
+if not 'PowerPC' in config.root.targets:
+ config.unsupported = True
+
diff --git a/llvm/test/Transforms/GlobalOpt/coldcc_stress_test.ll b/llvm/test/Transforms/GlobalOpt/coldcc_stress_test.ll
new file mode 100644
index 00000000000..80c9366af6f
--- /dev/null
+++ b/llvm/test/Transforms/GlobalOpt/coldcc_stress_test.ll
@@ -0,0 +1,48 @@
+; RUN: opt < %s -globalopt -S -enable-coldcc-stress-test -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s -check-prefix=COLDCC
+; RUN: opt < %s -globalopt -S | FileCheck %s -check-prefix=CHECK
+
+define internal i32 @callee_default(i32* %m) {
+; COLDCC-LABEL: define internal coldcc i32 @callee_default
+; CHECK-LABEL: define internal fastcc i32 @callee_default
+ %v = load i32, i32* %m
+ ret i32 %v
+}
+
+define internal fastcc i32 @callee_fastcc(i32* %m) {
+; COLDCC-LABEL: define internal fastcc i32 @callee_fastcc
+; CHECK-LABEL: define internal fastcc i32 @callee_fastcc
+ %v = load i32, i32* %m
+ ret i32 %v
+}
+
+define internal coldcc i32 @callee_coldcc(i32* %m) {
+; COLDCC-LABEL: define internal coldcc i32 @callee_coldcc
+; CHECK-LABEL: define internal coldcc i32 @callee_coldcc
+ %v = load i32, i32* %m
+ ret i32 %v
+}
+
+define i32 @callee(i32* %m) {
+ %v = load i32, i32* %m
+ ret i32 %v
+}
+
+define void @caller() {
+ %m = alloca i32
+ call i32 @callee_default(i32* %m)
+ call fastcc i32 @callee_fastcc(i32* %m)
+ call coldcc i32 @callee_coldcc(i32* %m)
+ call i32 @callee(i32* %m)
+ ret void
+}
+
+; COLDCC-LABEL: define void @caller()
+; COLDCC: call coldcc i32 @callee_default
+; COLDCC: call fastcc i32 @callee_fastcc
+; COLDCC: call coldcc i32 @callee_coldcc
+; COLDCC: call i32 @callee
+; CHECK-LABEL: define void @caller()
+; CHECK: call fastcc i32 @callee_default
+; CHECK: call fastcc i32 @callee_fastcc
+; CHECK: call coldcc i32 @callee_coldcc
+; CHECK: call i32 @callee
OpenPOWER on IntegriCloud