[LV] Avoid rounding errors for predicated instruction costs

This patch modifies the cost calculation of predicated instructions (div and rem) to avoid the accumulation of rounding errors due to multiple truncating integer divisions. The calculation for predicated stores will be addressed in a follow-on patch since we currently don't scale the cost of predicated stores by block probability. Differential Revision: https://reviews.llvm.org/D25333 llvm-svn: 284123
author: Matthew Simpson <mssimpso@codeaurora.org> 2016-10-13 14:19:48 +0000
committer: Matthew Simpson <mssimpso@codeaurora.org> 2016-10-13 14:19:48 +0000
commit: 6cdb5a6f9663ae62e5d1f140f8e3698163f3c3fa (patch)
tree: 9c5e45fe9668e405acac90c0cbb178f34ef1975f /llvm/test/Transforms/LoopVectorize
parent: cb59b5257c488da28e495d6c9803332326488dab (diff)
download: bcm5719-llvm-6cdb5a6f9663ae62e5d1f140f8e3698163f3c3fa.tar.gz
bcm5719-llvm-6cdb5a6f9663ae62e5d1f140f8e3698163f3c3fa.zip
1 files changed, 53 insertions, 0 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/predication_costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/predication_costs.ll
new file mode 100644
index 00000000000..150073aa2ab
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/predication_costs.ll
@@ -0,0 +1,53 @@
+; REQUIRES: asserts
+; RUN: opt < %s -force-vector-width=2 -loop-vectorize -debug-only=loop-vectorize -disable-output 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+; Check predication-related cost calculations, including scalarization overhead
+; and block probability scaling. Note that the functionality being tested is
+; not specific to AArch64. We specify a target to get actual values for the
+; instruction costs.
+
+; CHECK-LABEL: predicated_udiv
+;
+; This test checks that we correctly compute the cost of the predicated udiv
+; instruction. If we assume the block probability is 50%, we compute the cost
+; as:
+;
+; Cost for vector lane zero:
+;   (udiv(1) + 2 * extractelement(0) + insertelement(0)) / 2 = 0
+; Cost for vector lane one:
+;   (udiv(1) + 2 * extractelement(3) + insertelement(3)) / 2 = 5
+;
+; CHECK: Found an estimated cost of 5 for VF 2 For instruction: %tmp4 = udiv i32 %tmp2, %tmp3
+; CHECK: Scalarizing and predicating: %tmp4 = udiv i32 %tmp2, %tmp3
+;
+define i32 @predicated_udiv(i32* %a, i32* %b, i1 %c, i64 %n) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.inc ]
+  %r = phi i32 [ 0, %entry ], [ %tmp6, %for.inc ]
+  %tmp0 = getelementptr inbounds i32, i32* %a, i64 %i
+  %tmp1 = getelementptr inbounds i32, i32* %b, i64 %i
+  %tmp2 = load i32, i32* %tmp0, align 4
+  %tmp3 = load i32, i32* %tmp1, align 4
+  br i1 %c, label %if.then, label %for.inc
+
+if.then:
+  %tmp4 = udiv i32 %tmp2, %tmp3
+  br label %for.inc
+
+for.inc:
+  %tmp5 = phi i32 [ %tmp3, %for.body ], [ %tmp4, %if.then]
+  %tmp6 = add i32 %r, %tmp5
+  %i.next = add nuw nsw i64 %i, 1
+  %cond = icmp slt i64 %i.next, %n
+  br i1 %cond, label %for.body, label %for.end
+
+for.end:
+  %tmp7 = phi i32 [ %tmp6, %for.inc ]
+  ret i32 %tmp7
+}
author	Matthew Simpson <mssimpso@codeaurora.org>	2016-10-13 14:19:48 +0000
committer	Matthew Simpson <mssimpso@codeaurora.org>	2016-10-13 14:19:48 +0000
commit	6cdb5a6f9663ae62e5d1f140f8e3698163f3c3fa (patch)
tree	9c5e45fe9668e405acac90c0cbb178f34ef1975f /llvm/test/Transforms/LoopVectorize
parent	cb59b5257c488da28e495d6c9803332326488dab (diff)
download	bcm5719-llvm-6cdb5a6f9663ae62e5d1f140f8e3698163f3c3fa.tar.gz bcm5719-llvm-6cdb5a6f9663ae62e5d1f140f8e3698163f3c3fa.zip