From c5ab1ab79786c415082d9342b37bd8b7611f4516 Mon Sep 17 00:00:00 2001 From: Hiroshi Inoue Date: Mon, 5 Feb 2018 12:25:29 +0000 Subject: [PowerPC] Check hot loop exit edge in PPCCTRLoops PPCCTRLoops transform loops using mtctr/bdnz instructions if loop trip count is known and big enough to compensate for the cost of mtctr. But if there is a loop exit edge which is known to be frequently taken (by builtin_expect or by PGO), we should not transform the loop to avoid the cost of mtctr instruction. Here is an example of a loop with hot exit edge: for (unsigned i = 0; i < TripCount; i++) { // do something if (__builtin_expect(check(), 1)) break; // do something } Differential Revision: https://reviews.llvm.org/D42637 llvm-svn: 324229 --- llvm/test/CodeGen/PowerPC/ctrloops-hot-exit.ll | 187 +++++++++++++++++++++++++ 1 file changed, 187 insertions(+) create mode 100644 llvm/test/CodeGen/PowerPC/ctrloops-hot-exit.ll (limited to 'llvm/test/CodeGen/PowerPC') diff --git a/llvm/test/CodeGen/PowerPC/ctrloops-hot-exit.ll b/llvm/test/CodeGen/PowerPC/ctrloops-hot-exit.ll new file mode 100644 index 00000000000..6bb4113f07e --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/ctrloops-hot-exit.ll @@ -0,0 +1,187 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 < %s | FileCheck %s + +; If there is an exit edge known to be frequently taken, +; we should not transform this loop. + +; A loop having a hot exit edge (exit in false branch) +define signext i64 @func() { +; CHECK: @func +; CHECK-NOT: mtctr +; CHECK-NOT: bdnz + +entry: + %a = alloca [1000 x i32], align 4 + %0 = bitcast [1000 x i32]* %a to i8* + br label %for.body + +for.body: + %i.013 = phi i64 [ 0, %entry ], [ %inc, %if.end ] + %b.012 = phi i64 [ 0, %entry ], [ %xor, %if.end ] + %arrayidx = getelementptr inbounds [1000 x i32], [1000 x i32]* %a, i64 0, i64 %i.013 + %1 = load i32, i32* %arrayidx, align 4 + %tobool = icmp eq i32 %1, 0 + br i1 %tobool, label %if.end, label %cleanup, !prof !1 + +if.end: + %xor = xor i64 %i.013, %b.012 + %inc = add nuw nsw i64 %i.013, 1 + %cmp = icmp ult i64 %inc, 1000 + br i1 %cmp, label %for.body, label %cleanup + +cleanup: + %res = phi i64 [ %b.012, %for.body ], [ %xor, %if.end ] + ret i64 %res +} + +; A loop having a cold exit edge (exit in false branch) +define signext i64 @func2() { +; CHECK: @func2 +; CHECK: mtctr +; CHECK: bdnz + +entry: + %a = alloca [1000 x i32], align 4 + %0 = bitcast [1000 x i32]* %a to i8* + br label %for.body + +for.body: + %i.013 = phi i64 [ 0, %entry ], [ %inc, %if.end ] + %b.012 = phi i64 [ 0, %entry ], [ %xor, %if.end ] + %arrayidx = getelementptr inbounds [1000 x i32], [1000 x i32]* %a, i64 0, i64 %i.013 + %1 = load i32, i32* %arrayidx, align 4 + %tobool = icmp eq i32 %1, 0 + br i1 %tobool, label %if.end, label %cleanup, !prof !2 + +if.end: + %xor = xor i64 %i.013, %b.012 + %inc = add nuw nsw i64 %i.013, 1 + %cmp = icmp ult i64 %inc, 1000 + br i1 %cmp, label %for.body, label %cleanup + +cleanup: + %res = phi i64 [ %b.012, %for.body ], [ %xor, %if.end ] + ret i64 %res +} + +; A loop having an exit edge without profile data (exit in false branch) +define signext i64 @func3() { +; CHECK: @func3 +; CHECK: mtctr +; CHECK: bdnz + +entry: + %a = alloca [1000 x i32], align 4 + %0 = bitcast [1000 x i32]* %a to i8* + br label %for.body + +for.body: + %i.013 = phi i64 [ 0, %entry ], [ %inc, %if.end ] + %b.012 = phi i64 [ 0, %entry ], [ %xor, %if.end ] + %arrayidx = getelementptr inbounds [1000 x i32], [1000 x i32]* %a, i64 0, i64 %i.013 + %1 = load i32, i32* %arrayidx, align 4 + %tobool = icmp eq i32 %1, 0 + br i1 %tobool, label %if.end, label %cleanup + +if.end: + %xor = xor i64 %i.013, %b.012 + %inc = add nuw nsw i64 %i.013, 1 + %cmp = icmp ult i64 %inc, 1000 + br i1 %cmp, label %for.body, label %cleanup + +cleanup: + %res = phi i64 [ %b.012, %for.body ], [ %xor, %if.end ] + ret i64 %res +} + +; A loop having a hot exit edge (exit in true branch) +define signext i64 @func4() { +; CHECK: @func4 +; CHECK-NOT: mtctr +; CHECK-NOT: bdnz + +entry: + %a = alloca [1000 x i32], align 4 + %0 = bitcast [1000 x i32]* %a to i8* + br label %for.body + +for.body: + %i.013 = phi i64 [ 0, %entry ], [ %inc, %if.end ] + %b.012 = phi i64 [ 0, %entry ], [ %xor, %if.end ] + %arrayidx = getelementptr inbounds [1000 x i32], [1000 x i32]* %a, i64 0, i64 %i.013 + %1 = load i32, i32* %arrayidx, align 4 + %tobool = icmp ne i32 %1, 0 + br i1 %tobool, label %cleanup, label %if.end, !prof !2 + +if.end: + %xor = xor i64 %i.013, %b.012 + %inc = add nuw nsw i64 %i.013, 1 + %cmp = icmp ult i64 %inc, 1000 + br i1 %cmp, label %for.body, label %cleanup + +cleanup: + %res = phi i64 [ %b.012, %for.body ], [ %xor, %if.end ] + ret i64 %res +} + +; A loop having a cold exit edge (exit in true branch) +define signext i64 @func5() { +; CHECK: @func5 +; CHECK: mtctr +; CHECK: bdnz + +entry: + %a = alloca [1000 x i32], align 4 + %0 = bitcast [1000 x i32]* %a to i8* + br label %for.body + +for.body: + %i.013 = phi i64 [ 0, %entry ], [ %inc, %if.end ] + %b.012 = phi i64 [ 0, %entry ], [ %xor, %if.end ] + %arrayidx = getelementptr inbounds [1000 x i32], [1000 x i32]* %a, i64 0, i64 %i.013 + %1 = load i32, i32* %arrayidx, align 4 + %tobool = icmp ne i32 %1, 0 + br i1 %tobool, label %cleanup, label %if.end, !prof !1 + +if.end: + %xor = xor i64 %i.013, %b.012 + %inc = add nuw nsw i64 %i.013, 1 + %cmp = icmp ult i64 %inc, 1000 + br i1 %cmp, label %for.body, label %cleanup + +cleanup: + %res = phi i64 [ %b.012, %for.body ], [ %xor, %if.end ] + ret i64 %res +} + +; A loop having an exit edge without profile data (exit in true branch) +define signext i64 @func6() { +; CHECK: @func6 +; CHECK: mtctr +; CHECK: bdnz + +entry: + %a = alloca [1000 x i32], align 4 + %0 = bitcast [1000 x i32]* %a to i8* + br label %for.body + +for.body: + %i.013 = phi i64 [ 0, %entry ], [ %inc, %if.end ] + %b.012 = phi i64 [ 0, %entry ], [ %xor, %if.end ] + %arrayidx = getelementptr inbounds [1000 x i32], [1000 x i32]* %a, i64 0, i64 %i.013 + %1 = load i32, i32* %arrayidx, align 4 + %tobool = icmp ne i32 %1, 0 + br i1 %tobool, label %cleanup, label %if.end + +if.end: + %xor = xor i64 %i.013, %b.012 + %inc = add nuw nsw i64 %i.013, 1 + %cmp = icmp ult i64 %inc, 1000 + br i1 %cmp, label %for.body, label %cleanup + +cleanup: + %res = phi i64 [ %b.012, %for.body ], [ %xor, %if.end ] + ret i64 %res +} + +!1 = !{!"branch_weights", i32 1, i32 2000} +!2 = !{!"branch_weights", i32 2000, i32 1} -- cgit v1.2.3