summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorMatthew Simpson <mssimpso@codeaurora.org>2016-06-10 14:33:30 +0000
committerMatthew Simpson <mssimpso@codeaurora.org>2016-06-10 14:33:30 +0000
commit12b9c5ba989fe637427429095f57c4028c380391 (patch)
tree29d936fba8ec010f564f05f6fe66361bf46124a6 /llvm/test
parent330c0b6c8c539589b26acb56c0d87011bac004bd (diff)
downloadbcm5719-llvm-12b9c5ba989fe637427429095f57c4028c380391.tar.gz
bcm5719-llvm-12b9c5ba989fe637427429095f57c4028c380391.zip
Reapply "[TTI] Refine default cost for interleaved load groups with gaps"
This reapplies commit r272385 with a fix. The build was failing when compiled with gcc, but not with clang. With the fix, we now get the data layout from the current TTI implementation, which will hopefully solve the issue. llvm-svn: 272395
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll42
1 files changed, 42 insertions, 0 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll
index a0e741a3cdb..df1f9c61940 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll
@@ -14,6 +14,7 @@ entry:
; 8xi8 and 16xi8 are valid i8 vector types, so the cost of the interleaved
; access group is 2.
+; CHECK: LV: Checking a loop in "test_byte_interleaved_cost"
; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction: %tmp = load i8, i8* %arrayidx0, align 4
; CHECK: LV: Found an estimated cost of 2 for VF 16 For instruction: %tmp = load i8, i8* %arrayidx0, align 4
@@ -37,3 +38,44 @@ for.body: ; preds = %for.body, %entry
for.end: ; preds = %for.body
ret void
}
+
+%ig.factor.8 = type { double*, double, double, double, double, double, double, double }
+define double @wide_interleaved_group(%ig.factor.8* %s, double %a, double %b, i32 %n) {
+entry:
+ br label %for.body
+
+; Check the default cost of a strided load with a factor that is greater than
+; the maximum allowed. In this test, the interleave factor would be 8, which is
+; not supported.
+
+; CHECK: LV: Checking a loop in "wide_interleaved_group"
+; CHECK: LV: Found an estimated cost of 6 for VF 2 For instruction: %1 = load double, double* %0, align 8
+; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %5 = load double, double* %4, align 8
+; CHECK: LV: Found an estimated cost of 10 for VF 2 For instruction: store double %9, double* %10, align 8
+
+for.body:
+ %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+ %r = phi double [ 0.000000e+00, %entry ], [ %12, %for.body ]
+ %0 = getelementptr inbounds %ig.factor.8, %ig.factor.8* %s, i64 %i, i32 2
+ %1 = load double, double* %0, align 8
+ %2 = fcmp fast olt double %1, %a
+ %3 = select i1 %2, double 0.000000e+00, double %1
+ %4 = getelementptr inbounds %ig.factor.8, %ig.factor.8* %s, i64 %i, i32 6
+ %5 = load double, double* %4, align 8
+ %6 = fcmp fast olt double %5, %a
+ %7 = select i1 %6, double 0.000000e+00, double %5
+ %8 = fmul fast double %7, %b
+ %9 = fadd fast double %8, %3
+ %10 = getelementptr inbounds %ig.factor.8, %ig.factor.8* %s, i64 %i, i32 3
+ store double %9, double* %10, align 8
+ %11 = fmul fast double %9, %9
+ %12 = fadd fast double %11, %r
+ %i.next = add nuw nsw i64 %i, 1
+ %13 = trunc i64 %i.next to i32
+ %cond = icmp eq i32 %13, %n
+ br i1 %cond, label %for.exit, label %for.body
+
+for.exit:
+ %r.lcssa = phi double [ %12, %for.body ]
+ ret double %r.lcssa
+}
OpenPOWER on IntegriCloud