summaryrefslogtreecommitdiffstats
path: root/llvm/test/Transforms/LoopVectorize
diff options
context:
space:
mode:
authorMatthew Simpson <mssimpso@codeaurora.org>2016-10-13 14:19:48 +0000
committerMatthew Simpson <mssimpso@codeaurora.org>2016-10-13 14:19:48 +0000
commit6cdb5a6f9663ae62e5d1f140f8e3698163f3c3fa (patch)
tree9c5e45fe9668e405acac90c0cbb178f34ef1975f /llvm/test/Transforms/LoopVectorize
parentcb59b5257c488da28e495d6c9803332326488dab (diff)
downloadbcm5719-llvm-6cdb5a6f9663ae62e5d1f140f8e3698163f3c3fa.tar.gz
bcm5719-llvm-6cdb5a6f9663ae62e5d1f140f8e3698163f3c3fa.zip
[LV] Avoid rounding errors for predicated instruction costs
This patch modifies the cost calculation of predicated instructions (div and rem) to avoid the accumulation of rounding errors due to multiple truncating integer divisions. The calculation for predicated stores will be addressed in a follow-on patch since we currently don't scale the cost of predicated stores by block probability. Differential Revision: https://reviews.llvm.org/D25333 llvm-svn: 284123
Diffstat (limited to 'llvm/test/Transforms/LoopVectorize')
-rw-r--r--llvm/test/Transforms/LoopVectorize/AArch64/predication_costs.ll53
1 files changed, 53 insertions, 0 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/predication_costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/predication_costs.ll
new file mode 100644
index 00000000000..150073aa2ab
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/predication_costs.ll
@@ -0,0 +1,53 @@
+; REQUIRES: asserts
+; RUN: opt < %s -force-vector-width=2 -loop-vectorize -debug-only=loop-vectorize -disable-output 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+; Check predication-related cost calculations, including scalarization overhead
+; and block probability scaling. Note that the functionality being tested is
+; not specific to AArch64. We specify a target to get actual values for the
+; instruction costs.
+
+; CHECK-LABEL: predicated_udiv
+;
+; This test checks that we correctly compute the cost of the predicated udiv
+; instruction. If we assume the block probability is 50%, we compute the cost
+; as:
+;
+; Cost for vector lane zero:
+; (udiv(1) + 2 * extractelement(0) + insertelement(0)) / 2 = 0
+; Cost for vector lane one:
+; (udiv(1) + 2 * extractelement(3) + insertelement(3)) / 2 = 5
+;
+; CHECK: Found an estimated cost of 5 for VF 2 For instruction: %tmp4 = udiv i32 %tmp2, %tmp3
+; CHECK: Scalarizing and predicating: %tmp4 = udiv i32 %tmp2, %tmp3
+;
+define i32 @predicated_udiv(i32* %a, i32* %b, i1 %c, i64 %n) {
+entry:
+ br label %for.body
+
+for.body:
+ %i = phi i64 [ 0, %entry ], [ %i.next, %for.inc ]
+ %r = phi i32 [ 0, %entry ], [ %tmp6, %for.inc ]
+ %tmp0 = getelementptr inbounds i32, i32* %a, i64 %i
+ %tmp1 = getelementptr inbounds i32, i32* %b, i64 %i
+ %tmp2 = load i32, i32* %tmp0, align 4
+ %tmp3 = load i32, i32* %tmp1, align 4
+ br i1 %c, label %if.then, label %for.inc
+
+if.then:
+ %tmp4 = udiv i32 %tmp2, %tmp3
+ br label %for.inc
+
+for.inc:
+ %tmp5 = phi i32 [ %tmp3, %for.body ], [ %tmp4, %if.then]
+ %tmp6 = add i32 %r, %tmp5
+ %i.next = add nuw nsw i64 %i, 1
+ %cond = icmp slt i64 %i.next, %n
+ br i1 %cond, label %for.body, label %for.end
+
+for.end:
+ %tmp7 = phi i32 [ %tmp6, %for.inc ]
+ ret i32 %tmp7
+}
OpenPOWER on IntegriCloud