diff options
| author | Zaara Syeda <syzaara@ca.ibm.com> | 2018-01-17 18:22:55 +0000 |
|---|---|---|
| committer | Zaara Syeda <syzaara@ca.ibm.com> | 2018-01-17 18:22:55 +0000 |
| commit | 8e951fd2f6267275becabef2f7ad973a2af0e6a8 (patch) | |
| tree | 98e6d8baf3a845f2bf255c9545c2a66fece7dc88 /llvm/test | |
| parent | 2686e3cac64a7f9b3b7f7ace0abf226b149a7ad9 (diff) | |
| download | bcm5719-llvm-8e951fd2f6267275becabef2f7ad973a2af0e6a8.tar.gz bcm5719-llvm-8e951fd2f6267275becabef2f7ad973a2af0e6a8.zip | |
[PowerPC] Add handling for ColdCC calling convention and a pass to mark
candidates with coldcc attribute.
This patch adds support for the coldcc calling convention for Power.
This changes the set of non-volatile registers. It includes a pass to stress
test the implementation by marking all static directly called functions with
the coldcc attribute through the option -enable-coldcc-stress-test. It also
includes an option, -ppc-enable-coldcc, to add the coldcc attribute to
functions which are cold at all call sites based on BlockFrequencyInfo when
the containing function does not call any non cold functions.
Differential Revision: https://reviews.llvm.org/D38413
llvm-svn: 322721
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/CodeGen/PowerPC/coldcc.ll | 46 | ||||
| -rw-r--r-- | llvm/test/CodeGen/PowerPC/coldcc2.ll | 42 | ||||
| -rw-r--r-- | llvm/test/Other/pass-pipelines.ll | 2 | ||||
| -rw-r--r-- | llvm/test/Transforms/GlobalOpt/PowerPC/coldcc_coldsites.ll | 81 | ||||
| -rw-r--r-- | llvm/test/Transforms/GlobalOpt/PowerPC/lit.local.cfg | 3 | ||||
| -rw-r--r-- | llvm/test/Transforms/GlobalOpt/coldcc_stress_test.ll | 48 |
6 files changed, 221 insertions, 1 deletions
diff --git a/llvm/test/CodeGen/PowerPC/coldcc.ll b/llvm/test/CodeGen/PowerPC/coldcc.ll new file mode 100644 index 00000000000..056e944321f --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/coldcc.ll @@ -0,0 +1,46 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s -check-prefix=COLDCC + +define signext i32 @caller(i32 signext %a, i32 signext %b, i32 signext %cold) { +entry: + %0 = tail call i32 asm "add $0, $1, $2", "=r,r,r,~{r14},~{r15},~{r16},~{r17},~{r18},~{r19},~{r20},~{r21},~{r22},~{r23},~{r24},~{r25},~{r26},~{r27},~{r28},~{r29},~{r30},~{r31}"(i32 %a, i32 %b) + %mul = mul nsw i32 %0, %cold + %tobool = icmp eq i32 %cold, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %entry + %mul1 = mul nsw i32 %mul, %cold + %mul2 = mul nsw i32 %b, %a + %call = tail call coldcc signext i32 @callee(i32 signext %a, i32 signext %b) + %add = add i32 %mul2, %a + %add3 = add i32 %add, %mul + %add4 = add i32 %add3, %mul1 + %add5 = add i32 %add4, %call + br label %if.end + +if.end: ; preds = %entry, %if.then + %f.0 = phi i32 [ %add5, %if.then ], [ %0, %entry ] + ret i32 %f.0 +} + +define internal coldcc signext i32 @callee(i32 signext %a, i32 signext %b) local_unnamed_addr #0 { +entry: +; COLDCC: @callee +; COLDCC: std 6, -8(1) +; COLDCC: std 7, -16(1) +; COLDCC: std 8, -24(1) +; COLDCC: std 9, -32(1) +; COLDCC: std 10, -40(1) +; COLDCC: ld 9, -32(1) +; COLDCC: ld 8, -24(1) +; COLDCC: ld 7, -16(1) +; COLDCC: ld 10, -40(1) +; COLDCC: ld 6, -8(1) + %0 = tail call i32 asm "add $0, $1, $2", "=r,r,r,~{r6},~{r7},~{r8},~{r9},~{r10}"(i32 %a, i32 %b) + %mul = mul nsw i32 %a, 3 + %1 = mul i32 %b, -5 + %add = add i32 %1, %mul + %sub = add i32 %add, %0 + ret i32 %sub +} + +attributes #0 = { noinline } diff --git a/llvm/test/CodeGen/PowerPC/coldcc2.ll b/llvm/test/CodeGen/PowerPC/coldcc2.ll new file mode 100644 index 00000000000..315198fca85 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/coldcc2.ll @@ -0,0 +1,42 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s -check-prefix=COLDCC + +%struct.MyStruct = type { i32, i32, i32, i32 } + +@caller.s = internal unnamed_addr global %struct.MyStruct zeroinitializer, align 8 + +define signext i32 @caller(i32 signext %a, i32 signext %b, i32 signext %cold) { +entry: +; COLDCC: bl callee +; COLDCC: ld 4, 40(1) +; COLDCC: ld 5, 32(1) + %call = tail call coldcc { i64, i64 } @callee(i32 signext %a, i32 signext %b) + %0 = extractvalue { i64, i64 } %call, 0 + %1 = extractvalue { i64, i64 } %call, 1 + store i64 %0, i64* bitcast (%struct.MyStruct* @caller.s to i64*), align 8 + store i64 %1, i64* bitcast (i32* getelementptr inbounds (%struct.MyStruct, %struct.MyStruct* @caller.s, i64 0, i32 2) to i64*), align 8 + %2 = lshr i64 %1, 32 + %3 = trunc i64 %2 to i32 + %sub = sub nsw i32 0, %3 + ret i32 %sub +} + +define internal coldcc { i64, i64 } @callee(i32 signext %a, i32 signext %b) { +entry: +; COLDCC: std {{[0-9]+}}, 0(3) +; COLDCC: std {{[0-9]+}}, 8(3) + %0 = tail call i32 asm "add $0, $1, $2", "=r,r,r,~{r6},~{r7},~{r8},~{r9},~{r10}"(i32 %a, i32 %b) + %mul = mul nsw i32 %a, 3 + %1 = mul i32 %b, -5 + %add = add i32 %1, %mul + %sub = add i32 %add, %0 + %mul5 = mul nsw i32 %b, %a + %add6 = add nsw i32 %sub, %mul5 + %retval.sroa.0.0.insert.ext = zext i32 %0 to i64 + %retval.sroa.3.8.insert.ext = zext i32 %sub to i64 + %retval.sroa.3.12.insert.ext = zext i32 %add6 to i64 + %retval.sroa.3.12.insert.shift = shl nuw i64 %retval.sroa.3.12.insert.ext, 32 + %retval.sroa.3.12.insert.insert = or i64 %retval.sroa.3.12.insert.shift, %retval.sroa.3.8.insert.ext + %.fca.0.insert = insertvalue { i64, i64 } undef, i64 %retval.sroa.0.0.insert.ext, 0 + %.fca.1.insert = insertvalue { i64, i64 } %.fca.0.insert, i64 %retval.sroa.3.12.insert.insert, 1 + ret { i64, i64 } %.fca.1.insert +} diff --git a/llvm/test/Other/pass-pipelines.ll b/llvm/test/Other/pass-pipelines.ll index 9e5176eddaa..dddf1338a4d 100644 --- a/llvm/test/Other/pass-pipelines.ll +++ b/llvm/test/Other/pass-pipelines.ll @@ -93,7 +93,7 @@ ; FIXME: There really shouldn't be another pass manager, especially one that ; just builds the domtree. It doesn't even run the verifier. ; CHECK-O2: Pass Arguments: -; CHECK-O2-NEXT: FunctionPass Manager +; CHECK-O2: FunctionPass Manager ; CHECK-O2-NEXT: Dominator Tree Construction define void @foo() { diff --git a/llvm/test/Transforms/GlobalOpt/PowerPC/coldcc_coldsites.ll b/llvm/test/Transforms/GlobalOpt/PowerPC/coldcc_coldsites.ll new file mode 100644 index 00000000000..8fedf834f40 --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/PowerPC/coldcc_coldsites.ll @@ -0,0 +1,81 @@ +; RUN: opt -globalopt -mtriple=powerpc64le-unknown-linux-gnu -ppc-enable-coldcc -S < %s | FileCheck %s -check-prefix=COLDCC +; RUN: opt -globalopt -S < %s | FileCheck %s -check-prefix=CHECK + +define signext i32 @caller(i32 signext %a, i32 signext %b, i32 signext %lim, i32 signext %i) local_unnamed_addr #0 !prof !30 { +entry: +; COLDCC: call coldcc signext i32 @callee +; CHECK: call fastcc signext i32 @callee + %add = add nsw i32 %b, %a + %sub = add nsw i32 %lim, -1 + %cmp = icmp eq i32 %sub, %i + br i1 %cmp, label %if.then, label %if.end, !prof !31 + +if.then: ; preds = %entry + %call = tail call signext i32 @callee(i32 signext %a, i32 signext %b) + br label %if.end + +if.end: ; preds = %if.then, %entry + %f.0 = phi i32 [ %call, %if.then ], [ %add, %entry ] + ret i32 %f.0 +} + +define internal signext i32 @callee(i32 signext %a, i32 signext %b) unnamed_addr #0 { +entry: + %0 = tail call i32 asm "add $0, $1, $2", "=r,r,r,~{r6},~{r7},~{r8},~{r9}"(i32 %a, i32 %b) #1, !srcloc !32 + %mul = mul nsw i32 %a, 3 + %mul1 = shl i32 %0, 1 + %add = add nsw i32 %mul1, %mul + ret i32 %add +} + +define signext i32 @main() local_unnamed_addr #0 !prof !33 { +entry: + br label %for.body + +for.cond.cleanup: ; preds = %for.body + %add.lcssa = phi i32 [ %add, %for.body ] + ret i32 %add.lcssa + +for.body: ; preds = %for.body, %entry + %i.011 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %ret.010 = phi i32 [ 0, %entry ], [ %add, %for.body ] + %call = tail call signext i32 @caller(i32 signext 4, i32 signext 5, i32 signext 10000000, i32 signext %i.011) + %add = add nsw i32 %call, %ret.010 + %inc = add nuw nsw i32 %i.011, 1 + %exitcond = icmp eq i32 %inc, 10000000 + br i1 %exitcond, label %for.cond.cleanup, label %for.body, !prof !34 +} +attributes #0 = { noinline } + +!0 = !{i32 1, !"ProfileSummary", !1} +!1 = !{!2, !3, !4, !5, !6, !7, !8, !9} +!2 = !{!"ProfileFormat", !"InstrProf"} +!3 = !{!"TotalCount", i64 20000003} +!4 = !{!"MaxCount", i64 10000000} +!5 = !{!"MaxInternalCount", i64 10000000} +!6 = !{!"MaxFunctionCount", i64 10000000} +!7 = !{!"NumCounts", i64 5} +!8 = !{!"NumFunctions", i64 3} +!9 = !{!"DetailedSummary", !10} +!10 = !{!11, !12, !13, !14, !15, !16, !16, !17, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26} +!11 = !{i32 10000, i64 10000000, i32 2} +!12 = !{i32 100000, i64 10000000, i32 2} +!13 = !{i32 200000, i64 10000000, i32 2} +!14 = !{i32 300000, i64 10000000, i32 2} +!15 = !{i32 400000, i64 10000000, i32 2} +!16 = !{i32 500000, i64 10000000, i32 2} +!17 = !{i32 600000, i64 10000000, i32 2} +!18 = !{i32 700000, i64 10000000, i32 2} +!19 = !{i32 800000, i64 10000000, i32 2} +!20 = !{i32 900000, i64 10000000, i32 2} +!21 = !{i32 950000, i64 10000000, i32 2} +!22 = !{i32 990000, i64 10000000, i32 2} +!23 = !{i32 999000, i64 10000000, i32 2} +!24 = !{i32 999900, i64 10000000, i32 2} +!25 = !{i32 999990, i64 10000000, i32 2} +!26 = !{i32 999999, i64 10000000, i32 2} +!30 = !{!"function_entry_count", i64 10000000} +!31 = !{!"branch_weights", i32 2, i32 10000000} +!32 = !{i32 59} +!33 = !{!"function_entry_count", i64 1} +!34 = !{!"branch_weights", i32 2, i32 10000001} diff --git a/llvm/test/Transforms/GlobalOpt/PowerPC/lit.local.cfg b/llvm/test/Transforms/GlobalOpt/PowerPC/lit.local.cfg new file mode 100644 index 00000000000..5d33887ff0a --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/PowerPC/lit.local.cfg @@ -0,0 +1,3 @@ +if not 'PowerPC' in config.root.targets: + config.unsupported = True + diff --git a/llvm/test/Transforms/GlobalOpt/coldcc_stress_test.ll b/llvm/test/Transforms/GlobalOpt/coldcc_stress_test.ll new file mode 100644 index 00000000000..80c9366af6f --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/coldcc_stress_test.ll @@ -0,0 +1,48 @@ +; RUN: opt < %s -globalopt -S -enable-coldcc-stress-test -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s -check-prefix=COLDCC +; RUN: opt < %s -globalopt -S | FileCheck %s -check-prefix=CHECK + +define internal i32 @callee_default(i32* %m) { +; COLDCC-LABEL: define internal coldcc i32 @callee_default +; CHECK-LABEL: define internal fastcc i32 @callee_default + %v = load i32, i32* %m + ret i32 %v +} + +define internal fastcc i32 @callee_fastcc(i32* %m) { +; COLDCC-LABEL: define internal fastcc i32 @callee_fastcc +; CHECK-LABEL: define internal fastcc i32 @callee_fastcc + %v = load i32, i32* %m + ret i32 %v +} + +define internal coldcc i32 @callee_coldcc(i32* %m) { +; COLDCC-LABEL: define internal coldcc i32 @callee_coldcc +; CHECK-LABEL: define internal coldcc i32 @callee_coldcc + %v = load i32, i32* %m + ret i32 %v +} + +define i32 @callee(i32* %m) { + %v = load i32, i32* %m + ret i32 %v +} + +define void @caller() { + %m = alloca i32 + call i32 @callee_default(i32* %m) + call fastcc i32 @callee_fastcc(i32* %m) + call coldcc i32 @callee_coldcc(i32* %m) + call i32 @callee(i32* %m) + ret void +} + +; COLDCC-LABEL: define void @caller() +; COLDCC: call coldcc i32 @callee_default +; COLDCC: call fastcc i32 @callee_fastcc +; COLDCC: call coldcc i32 @callee_coldcc +; COLDCC: call i32 @callee +; CHECK-LABEL: define void @caller() +; CHECK: call fastcc i32 @callee_default +; CHECK: call fastcc i32 @callee_fastcc +; CHECK: call coldcc i32 @callee_coldcc +; CHECK: call i32 @callee |

