Reapply "[TTI] Refine default cost for interleaved load groups with gaps"

This reapplies commit r272385 with a fix. The build was failing when compiled with gcc, but not with clang. With the fix, we now get the data layout from the current TTI implementation, which will hopefully solve the issue. llvm-svn: 272395
author: Matthew Simpson <mssimpso@codeaurora.org> 2016-06-10 14:33:30 +0000
committer: Matthew Simpson <mssimpso@codeaurora.org> 2016-06-10 14:33:30 +0000
commit: 12b9c5ba989fe637427429095f57c4028c380391 (patch)
tree: 29d936fba8ec010f564f05f6fe66361bf46124a6 /llvm/test
parent: 330c0b6c8c539589b26acb56c0d87011bac004bd (diff)
download: bcm5719-llvm-12b9c5ba989fe637427429095f57c4028c380391.tar.gz
bcm5719-llvm-12b9c5ba989fe637427429095f57c4028c380391.zip
1 files changed, 42 insertions, 0 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll
index a0e741a3cdb..df1f9c61940 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll
@@ -14,6 +14,7 @@ entry:
 ; 8xi8 and 16xi8 are valid i8 vector types, so the cost of the interleaved
 ; access group is 2.
 
+; CHECK: LV: Checking a loop in "test_byte_interleaved_cost"
 ; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction:   %tmp = load i8, i8* %arrayidx0, align 4
 ; CHECK: LV: Found an estimated cost of 2 for VF 16 For instruction:   %tmp = load i8, i8* %arrayidx0, align 4
 
@@ -37,3 +38,44 @@ for.body:                                         ; preds = %for.body, %entry
 for.end:                                          ; preds = %for.body
   ret void
 }
+
+%ig.factor.8 = type { double*, double, double, double, double, double, double, double }
+define double @wide_interleaved_group(%ig.factor.8* %s, double %a, double %b, i32 %n) {
+entry:
+  br label %for.body
+
+; Check the default cost of a strided load with a factor that is greater than
+; the maximum allowed. In this test, the interleave factor would be 8, which is
+; not supported.
+
+; CHECK: LV: Checking a loop in "wide_interleaved_group"
+; CHECK: LV: Found an estimated cost of 6 for VF 2 For instruction:   %1 = load double, double* %0, align 8
+; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction:   %5 = load double, double* %4, align 8
+; CHECK: LV: Found an estimated cost of 10 for VF 2 For instruction:   store double %9, double* %10, align 8
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+  %r = phi double [ 0.000000e+00, %entry ], [ %12, %for.body ]
+  %0 = getelementptr inbounds %ig.factor.8, %ig.factor.8* %s, i64 %i, i32 2
+  %1 = load double, double* %0, align 8
+  %2 = fcmp fast olt double %1, %a
+  %3 = select i1 %2, double 0.000000e+00, double %1
+  %4 = getelementptr inbounds %ig.factor.8, %ig.factor.8* %s, i64 %i, i32 6
+  %5 = load double, double* %4, align 8
+  %6 = fcmp fast olt double %5, %a
+  %7 = select i1 %6, double 0.000000e+00, double %5
+  %8 = fmul fast double %7, %b
+  %9 = fadd fast double %8, %3
+  %10 = getelementptr inbounds %ig.factor.8, %ig.factor.8* %s, i64 %i, i32 3
+  store double %9, double* %10, align 8
+  %11 = fmul fast double %9, %9
+  %12 = fadd fast double %11, %r
+  %i.next = add nuw nsw i64 %i, 1
+  %13 = trunc i64 %i.next to i32
+  %cond = icmp eq i32 %13, %n
+  br i1 %cond, label %for.exit, label %for.body
+
+for.exit:
+  %r.lcssa = phi double [ %12, %for.body ]
+  ret double %r.lcssa
+}
author	Matthew Simpson <mssimpso@codeaurora.org>	2016-06-10 14:33:30 +0000
committer	Matthew Simpson <mssimpso@codeaurora.org>	2016-06-10 14:33:30 +0000
commit	12b9c5ba989fe637427429095f57c4028c380391 (patch)
tree	29d936fba8ec010f564f05f6fe66361bf46124a6 /llvm/test
parent	330c0b6c8c539589b26acb56c0d87011bac004bd (diff)
download	bcm5719-llvm-12b9c5ba989fe637427429095f57c4028c380391.tar.gz bcm5719-llvm-12b9c5ba989fe637427429095f57c4028c380391.zip