From 7745dbc5c4193f0f49c98b28f9af64741a3b23ac Mon Sep 17 00:00:00 2001 From: Cong Hou Date: Mon, 19 Oct 2015 23:16:40 +0000 Subject: Enhance loop rotation with existence of profile data in MachineBlockPlacement pass. Currently, in MachineBlockPlacement pass the loop is rotated to let the best exit to be the last BB in the loop chain, to maximize the fall-through from the loop to outside. With profile data, we can determine the cost in terms of missed fall through opportunities when rotating a loop chain and select the best rotation. Basically, there are three kinds of cost to consider for each rotation: 1. The possibly missed fall through edge (if it exists) from BB out of the loop to the loop header. 2. The possibly missed fall through edges (if they exist) from the loop exits to BB out of the loop. 3. The missed fall through edge (if it exists) from the last BB to the first BB in the loop chain. Therefore, the cost for a given rotation is the sum of costs listed above. We select the best rotation with the smallest cost. This is only for PGO mode when we have more precise edge frequencies. Differential revision: http://reviews.llvm.org/D10717 llvm-svn: 250754 --- .../CodeGen/X86/code_placement_loop_rotation2.ll | 122 +++++++++++++++++++++ 1 file changed, 122 insertions(+) create mode 100644 llvm/test/CodeGen/X86/code_placement_loop_rotation2.ll (limited to 'llvm/test/CodeGen/X86/code_placement_loop_rotation2.ll') diff --git a/llvm/test/CodeGen/X86/code_placement_loop_rotation2.ll b/llvm/test/CodeGen/X86/code_placement_loop_rotation2.ll new file mode 100644 index 00000000000..6d8b3c99cd0 --- /dev/null +++ b/llvm/test/CodeGen/X86/code_placement_loop_rotation2.ll @@ -0,0 +1,122 @@ +; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux < %s | FileCheck %s -check-prefix=CHECK +; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux -precise-rotation-cost < %s | FileCheck %s -check-prefix=CHECK-PROFILE + +define void @foo() { +; Test a nested loop case when profile data is not available. +; +; CHECK-LABEL: foo: +; CHECK: callq b +; CHECK: callq c +; CHECK: callq d +; CHECK: callq e +; CHECK: callq f +; CHECK: callq g +; CHECK: callq h + +entry: + br label %header + +header: + call void @b() + %call = call zeroext i1 @a() + br i1 %call, label %if.then, label %if.else, !prof !2 + +if.then: + br label %header2 + +header2: + call void @c() + %call1 = call zeroext i1 @a() + br i1 %call1, label %if.then2, label %if.else2, !prof !2 + +if.then2: + call void @d() + br label %if.end2 + +if.else2: + call void @e() + br label %if.end2 + +if.end2: + call void @f() + %call2 = call zeroext i1 @a() + br i1 %call2, label %header2, label %if.end + +if.else: + call void @g() + br label %if.end + +if.end: + call void @h() + %call3 = call zeroext i1 @a() + br i1 %call3, label %header, label %end + +end: + ret void +} + +define void @bar() !prof !1 { +; Test a nested loop case when profile data is available. +; +; CHECK-PROFILE-LABEL: bar: +; CHECK-PROFILE: callq e +; CHECK-PROFILE: callq f +; CHECK-PROFILE: callq c +; CHECK-PROFILE: callq d +; CHECK-PROFILE: callq h +; CHECK-PROFILE: callq b +; CHECK-PROFILE: callq g + +entry: + br label %header + +header: + call void @b() + %call = call zeroext i1 @a() + br i1 %call, label %if.then, label %if.else, !prof !2 + +if.then: + br label %header2 + +header2: + call void @c() + %call1 = call zeroext i1 @a() + br i1 %call1, label %if.then2, label %if.else2, !prof !2 + +if.then2: + call void @d() + br label %if.end2 + +if.else2: + call void @e() + br label %if.end2 + +if.end2: + call void @f() + %call2 = call zeroext i1 @a() + br i1 %call2, label %header2, label %if.end + +if.else: + call void @g() + br label %if.end + +if.end: + call void @h() + %call3 = call zeroext i1 @a() + br i1 %call3, label %header, label %end + +end: + ret void +} + +declare zeroext i1 @a() +declare void @b() +declare void @c() +declare void @d() +declare void @e() +declare void @f() +declare void @g() +declare void @h() + +!1 = !{!"function_entry_count", i64 1} +!2 = !{!"branch_weights", i32 16, i32 16} -- cgit v1.2.3