[LoopInterchange] Make isProfitableForVectorization slightly more conservative.

After D43236, we started interchanging loops with empty dependence matrices. In isProfitableForVectorization, we try to determine if interchanging makes the loop dependences more friendly to the vectorizer. If there are no dependences, we should not interchange, based on that heuristic. Reviewers: efriedma, mcrosier, karthikthecool, blitz.opensource Reviewed By: mcrosier Differential Revision: https://reviews.llvm.org/D45208 llvm-svn: 330738
author: Florian Hahn <florian.hahn@arm.com> 2018-04-24 16:55:32 +0000
committer: Florian Hahn <florian.hahn@arm.com> 2018-04-24 16:55:32 +0000
commit: ceee7889472456e370f4e943ca587c03f2de16be (patch)
tree: cf89b18831b33f86ce7260aeb93cc9c08242ed89
parent: f0945aa0e02b5191b323dfde4aa84e9c0506df04 (diff)
download: bcm5719-llvm-ceee7889472456e370f4e943ca587c03f2de16be.tar.gz
bcm5719-llvm-ceee7889472456e370f4e943ca587c03f2de16be.zip
2 files changed, 80 insertions, 16 deletions
diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index a641afeda03..5dcb630c814 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -1112,7 +1112,8 @@ static bool isProfitableForVectorization(unsigned InnerLoopId,
   }
   // If outer loop has dependence and inner loop is loop independent then it is
   // profitable to interchange to enable parallelism.
-  return true;
+  // If there are no dependences, interchanging will not improve anything.
+  return !DepMatrix.empty();
 }
 
 bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId,
diff --git a/llvm/test/Transforms/LoopInterchange/interchange-no-deps.ll b/llvm/test/Transforms/LoopInterchange/interchange-no-deps.ll
index e737bab33aa..5d1b4f2620c 100644
--- a/llvm/test/Transforms/LoopInterchange/interchange-no-deps.ll
+++ b/llvm/test/Transforms/LoopInterchange/interchange-no-deps.ll
@@ -1,31 +1,94 @@
-; RUN: opt < %s -loop-interchange -verify-dom-info -verify-loop-info -S \
-; RUN:     -pass-remarks=loop-interchange 2>&1 | FileCheck %s
-; CHECK: Loop interchanged with enclosing loop.
+; RUN: opt < %s -loop-interchange -simplifycfg -pass-remarks-output=%t \
+; RUN:     -pass-remarks=loop-interchange -pass-remarks-missed=loop-interchange -stats -S 2>&1 \
+; RUN:     | FileCheck -check-prefix=STATS %s
+; RUN: FileCheck -input-file %t %s
 
-; no_deps_interchange just access a single nested array and can be interchange.
 
-define i32 @no_deps_interchange([1024 x i32]* nocapture %Arr, i32 %k) local_unnamed_addr #0 {
+; no_deps_interchange just accesses a single nested array and can be interchange.
+; CHECK:      Name:       Interchanged
+; CHECK-NEXT: Function:   no_deps_interchange
+define i32 @no_deps_interchange([1024 x i32]* nocapture %Arr) local_unnamed_addr #0 {
 entry:
-  br label %for.body
+  br label %for1.header
 
-for.body:                                         ; preds = %entry, %for.cond.cleanup3
-  %indvars.iv19 = phi i64 [ 0, %entry ], [ %indvars.iv.next20, %for.cond.cleanup3 ]
-  br label %for.body4
+for1.header:                                         ; preds = %entry, %for1.inc
+  %indvars.iv19 = phi i64 [ 0, %entry ], [ %indvars.iv.next20, %for1.inc ]
+  br label %for2
 
-for.body4:                                        ; preds = %for.body, %for.body4
-  %indvars.iv = phi i64 [ 0, %for.body ], [ %indvars.iv.next, %for.body4 ]
+for2:                                        ; preds = %for1.header, %for2
+  %indvars.iv = phi i64 [ 0, %for1.header ], [ %indvars.iv.next, %for2 ]
   %arrayidx6 = getelementptr inbounds [1024 x i32], [1024 x i32]* %Arr, i64 %indvars.iv, i64 %indvars.iv19
   store i32 0, i32* %arrayidx6, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp ne i64 %indvars.iv.next, 1024
-  br i1 %exitcond, label %for.body4, label %for.cond.cleanup3
+  br i1 %exitcond, label %for2, label %for1.inc
 
-for.cond.cleanup3:                                ; preds = %for.body4
+for1.inc:
   %indvars.iv.next20 = add nuw nsw i64 %indvars.iv19, 1
   %exitcond21 = icmp ne i64 %indvars.iv.next20, 1024
-  br i1 %exitcond21, label %for.body, label %for.cond.cleanup
+  br i1 %exitcond21, label %for1.header, label %exit
 
+exit:                                 ; preds = %for1.inc
+  ret i32 0
+
+}
+
+; Only the inner loop induction variable is used for memory accesses.
+; Interchanging is not beneficial.
+; CHECK:      Name:       InterchangeNotProfitable
+; CHECK-NEXT: Function:   no_bad_order
+define i32 @no_bad_order(i32* %Arr) {
+entry:
+  br label %for1.header
+
+for1.header:                                         ; preds = %entry, %for1.inc
+  %indvars.iv19 = phi i64 [ 0, %entry ], [ %indvars.iv.next20, %for1.inc ]
+  br label %for2
+
+for2:                                        ; preds = %for1.header, %for2
+  %indvars.iv = phi i64 [ 0, %for1.header ], [ %indvars.iv.next, %for2 ]
+  %arrayidx6 = getelementptr inbounds i32, i32* %Arr, i64 %indvars.iv
+  store i32 0, i32* %arrayidx6, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp ne i64 %indvars.iv.next, 1024
+  br i1 %exitcond, label %for2, label %for1.inc
 
-for.cond.cleanup:                                 ; preds = %for.cond.cleanup3
+for1.inc:
+  %indvars.iv.next20 = add nuw nsw i64 %indvars.iv19, 1
+  %exitcond21 = icmp ne i64 %indvars.iv.next20, 1024
+  br i1 %exitcond21, label %for1.header, label %exit
+
+exit:                                 ; preds = %for1.inc
   ret i32 0
 }
+
+; No memory access using any induction variables, interchanging not beneficial.
+; CHECK:      Name:        InterchangeNotProfitable
+; CHECK-NEXT: Function:    no_mem_instrs
+define i32 @no_mem_instrs(i64* %ptr) {
+entry:
+  br label %for1.header
+
+for1.header:                                         ; preds = %entry, %for1.inc
+  %indvars.iv19 = phi i64 [ 0, %entry ], [ %indvars.iv.next20, %for1.inc ]
+  br label %for2
+
+for2:                                        ; preds = %for1.header, %for2
+  %indvars.iv = phi i64 [ 0, %for1.header ], [ %indvars.iv.next, %for2 ]
+  store i64 %indvars.iv, i64* %ptr, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp ne i64 %indvars.iv.next, 1024
+  br i1 %exitcond, label %for2, label %for1.inc
+
+for1.inc:
+  %indvars.iv.next20 = add nuw nsw i64 %indvars.iv19, 1
+  %exitcond21 = icmp ne i64 %indvars.iv.next20, 1024
+  br i1 %exitcond21, label %for1.header, label %exit
+
+exit:                                 ; preds = %for1.inc
+  ret i32 0
+}
+
+
+; Check stats, we interchanged 1 out of 3 loops.
+; STATS: 1 loop-interchange - Number of loops interchanged
author	Florian Hahn <florian.hahn@arm.com>	2018-04-24 16:55:32 +0000
committer	Florian Hahn <florian.hahn@arm.com>	2018-04-24 16:55:32 +0000
commit	ceee7889472456e370f4e943ca587c03f2de16be (patch)
tree	cf89b18831b33f86ce7260aeb93cc9c08242ed89
parent	f0945aa0e02b5191b323dfde4aa84e9c0506df04 (diff)
download	bcm5719-llvm-ceee7889472456e370f4e943ca587c03f2de16be.tar.gz bcm5719-llvm-ceee7889472456e370f4e943ca587c03f2de16be.zip