summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFlorian Hahn <florian.hahn@arm.com>2018-04-24 16:55:32 +0000
committerFlorian Hahn <florian.hahn@arm.com>2018-04-24 16:55:32 +0000
commitceee7889472456e370f4e943ca587c03f2de16be (patch)
treecf89b18831b33f86ce7260aeb93cc9c08242ed89
parentf0945aa0e02b5191b323dfde4aa84e9c0506df04 (diff)
downloadbcm5719-llvm-ceee7889472456e370f4e943ca587c03f2de16be.tar.gz
bcm5719-llvm-ceee7889472456e370f4e943ca587c03f2de16be.zip
[LoopInterchange] Make isProfitableForVectorization slightly more conservative.
After D43236, we started interchanging loops with empty dependence matrices. In isProfitableForVectorization, we try to determine if interchanging makes the loop dependences more friendly to the vectorizer. If there are no dependences, we should not interchange, based on that heuristic. Reviewers: efriedma, mcrosier, karthikthecool, blitz.opensource Reviewed By: mcrosier Differential Revision: https://reviews.llvm.org/D45208 llvm-svn: 330738
-rw-r--r--llvm/lib/Transforms/Scalar/LoopInterchange.cpp3
-rw-r--r--llvm/test/Transforms/LoopInterchange/interchange-no-deps.ll93
2 files changed, 80 insertions, 16 deletions
diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index a641afeda03..5dcb630c814 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -1112,7 +1112,8 @@ static bool isProfitableForVectorization(unsigned InnerLoopId,
}
// If outer loop has dependence and inner loop is loop independent then it is
// profitable to interchange to enable parallelism.
- return true;
+ // If there are no dependences, interchanging will not improve anything.
+ return !DepMatrix.empty();
}
bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId,
diff --git a/llvm/test/Transforms/LoopInterchange/interchange-no-deps.ll b/llvm/test/Transforms/LoopInterchange/interchange-no-deps.ll
index e737bab33aa..5d1b4f2620c 100644
--- a/llvm/test/Transforms/LoopInterchange/interchange-no-deps.ll
+++ b/llvm/test/Transforms/LoopInterchange/interchange-no-deps.ll
@@ -1,31 +1,94 @@
-; RUN: opt < %s -loop-interchange -verify-dom-info -verify-loop-info -S \
-; RUN: -pass-remarks=loop-interchange 2>&1 | FileCheck %s
-; CHECK: Loop interchanged with enclosing loop.
+; RUN: opt < %s -loop-interchange -simplifycfg -pass-remarks-output=%t \
+; RUN: -pass-remarks=loop-interchange -pass-remarks-missed=loop-interchange -stats -S 2>&1 \
+; RUN: | FileCheck -check-prefix=STATS %s
+; RUN: FileCheck -input-file %t %s
-; no_deps_interchange just access a single nested array and can be interchange.
-define i32 @no_deps_interchange([1024 x i32]* nocapture %Arr, i32 %k) local_unnamed_addr #0 {
+; no_deps_interchange just accesses a single nested array and can be interchange.
+; CHECK: Name: Interchanged
+; CHECK-NEXT: Function: no_deps_interchange
+define i32 @no_deps_interchange([1024 x i32]* nocapture %Arr) local_unnamed_addr #0 {
entry:
- br label %for.body
+ br label %for1.header
-for.body: ; preds = %entry, %for.cond.cleanup3
- %indvars.iv19 = phi i64 [ 0, %entry ], [ %indvars.iv.next20, %for.cond.cleanup3 ]
- br label %for.body4
+for1.header: ; preds = %entry, %for1.inc
+ %indvars.iv19 = phi i64 [ 0, %entry ], [ %indvars.iv.next20, %for1.inc ]
+ br label %for2
-for.body4: ; preds = %for.body, %for.body4
- %indvars.iv = phi i64 [ 0, %for.body ], [ %indvars.iv.next, %for.body4 ]
+for2: ; preds = %for1.header, %for2
+ %indvars.iv = phi i64 [ 0, %for1.header ], [ %indvars.iv.next, %for2 ]
%arrayidx6 = getelementptr inbounds [1024 x i32], [1024 x i32]* %Arr, i64 %indvars.iv, i64 %indvars.iv19
store i32 0, i32* %arrayidx6, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp ne i64 %indvars.iv.next, 1024
- br i1 %exitcond, label %for.body4, label %for.cond.cleanup3
+ br i1 %exitcond, label %for2, label %for1.inc
-for.cond.cleanup3: ; preds = %for.body4
+for1.inc:
%indvars.iv.next20 = add nuw nsw i64 %indvars.iv19, 1
%exitcond21 = icmp ne i64 %indvars.iv.next20, 1024
- br i1 %exitcond21, label %for.body, label %for.cond.cleanup
+ br i1 %exitcond21, label %for1.header, label %exit
+exit: ; preds = %for1.inc
+ ret i32 0
+
+}
+
+; Only the inner loop induction variable is used for memory accesses.
+; Interchanging is not beneficial.
+; CHECK: Name: InterchangeNotProfitable
+; CHECK-NEXT: Function: no_bad_order
+define i32 @no_bad_order(i32* %Arr) {
+entry:
+ br label %for1.header
+
+for1.header: ; preds = %entry, %for1.inc
+ %indvars.iv19 = phi i64 [ 0, %entry ], [ %indvars.iv.next20, %for1.inc ]
+ br label %for2
+
+for2: ; preds = %for1.header, %for2
+ %indvars.iv = phi i64 [ 0, %for1.header ], [ %indvars.iv.next, %for2 ]
+ %arrayidx6 = getelementptr inbounds i32, i32* %Arr, i64 %indvars.iv
+ store i32 0, i32* %arrayidx6, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp ne i64 %indvars.iv.next, 1024
+ br i1 %exitcond, label %for2, label %for1.inc
-for.cond.cleanup: ; preds = %for.cond.cleanup3
+for1.inc:
+ %indvars.iv.next20 = add nuw nsw i64 %indvars.iv19, 1
+ %exitcond21 = icmp ne i64 %indvars.iv.next20, 1024
+ br i1 %exitcond21, label %for1.header, label %exit
+
+exit: ; preds = %for1.inc
ret i32 0
}
+
+; No memory access using any induction variables, interchanging not beneficial.
+; CHECK: Name: InterchangeNotProfitable
+; CHECK-NEXT: Function: no_mem_instrs
+define i32 @no_mem_instrs(i64* %ptr) {
+entry:
+ br label %for1.header
+
+for1.header: ; preds = %entry, %for1.inc
+ %indvars.iv19 = phi i64 [ 0, %entry ], [ %indvars.iv.next20, %for1.inc ]
+ br label %for2
+
+for2: ; preds = %for1.header, %for2
+ %indvars.iv = phi i64 [ 0, %for1.header ], [ %indvars.iv.next, %for2 ]
+ store i64 %indvars.iv, i64* %ptr, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp ne i64 %indvars.iv.next, 1024
+ br i1 %exitcond, label %for2, label %for1.inc
+
+for1.inc:
+ %indvars.iv.next20 = add nuw nsw i64 %indvars.iv19, 1
+ %exitcond21 = icmp ne i64 %indvars.iv.next20, 1024
+ br i1 %exitcond21, label %for1.header, label %exit
+
+exit: ; preds = %for1.inc
+ ret i32 0
+}
+
+
+; Check stats, we interchanged 1 out of 3 loops.
+; STATS: 1 loop-interchange - Number of loops interchanged
OpenPOWER on IntegriCloud