summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSam Parker <sam.parker@arm.com>2019-03-11 13:19:46 +0000
committerSam Parker <sam.parker@arm.com>2019-03-11 13:19:46 +0000
commit52760bf43525018bf19b5d3533d3e0b907bb8dae (patch)
tree4dfe9ad887deafb6950f448a5aba1a74567745e8
parentb60aea41315c2a4a432483e57ea5065655bf4e95 (diff)
downloadbcm5719-llvm-52760bf43525018bf19b5d3533d3e0b907bb8dae.tar.gz
bcm5719-llvm-52760bf43525018bf19b5d3533d3e0b907bb8dae.zip
[CGP] Limit distance between overflow math and cmp
Inserting an overflowing arithmetic intrinsic can increase register pressure by producing two values at a point where only one is needed, while the second use maybe several blocks away. This increase in pressure is likely to be more detrimental on performance than rematerialising one of the original instructions. So, check that the arithmetic and compare instructions are no further apart than their immediate successor/predecessor. Differential Revision: https://reviews.llvm.org/D59024 llvm-svn: 355823
-rw-r--r--llvm/lib/CodeGen/CodeGenPrepare.cpp11
-rw-r--r--llvm/test/Transforms/CodeGenPrepare/ARM/overflow-intrinsics.ll56
2 files changed, 67 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 80335f55c77..af1b65253f8 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -1180,6 +1180,17 @@ static bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, CmpInst *Cmp,
bool MathDominates = DT.dominates(BO, Cmp);
if (!MathDominates && !DT.dominates(Cmp, BO))
return false;
+
+ // Check that the insertion doesn't create a value that is live across more
+ // than two blocks, so to minimise the increase in register pressure.
+ if (BO->getParent() != Cmp->getParent()) {
+ BasicBlock *Dominator = MathDominates ? BO->getParent() : Cmp->getParent();
+ BasicBlock *Dominated = MathDominates ? Cmp->getParent() : BO->getParent();
+ auto Successors = successors(Dominator);
+ if (llvm::find(Successors, Dominated) == Successors.end())
+ return false;
+ }
+
InsertPt = MathDominates ? cast<Instruction>(BO) : cast<Instruction>(Cmp);
}
diff --git a/llvm/test/Transforms/CodeGenPrepare/ARM/overflow-intrinsics.ll b/llvm/test/Transforms/CodeGenPrepare/ARM/overflow-intrinsics.ll
new file mode 100644
index 00000000000..3fbc2133141
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/ARM/overflow-intrinsics.ll
@@ -0,0 +1,56 @@
+; RUN: opt -codegenprepare -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv8m.main-arm-none-eabi"
+
+; CHECK-LABEL: uadd_overflow_too_far_cmp_dom
+; CHECK-NOT: with.overflow.i32
+define i32 @uadd_overflow_too_far_cmp_dom(i32 %arg0) {
+entry:
+ %cmp = icmp ne i32 %arg0, 0
+ br i1 %cmp, label %if.else, label %if.then
+
+if.then:
+ call void @foo()
+ br label %exit
+
+if.else:
+ call void @bar()
+ br label %if.end
+
+if.end:
+ %dec = add nsw i32 %arg0, -1
+ br label %exit
+
+exit:
+ %res = phi i32 [ %arg0, %if.then ], [ %dec, %if.end ]
+ ret i32 %res
+}
+
+; CHECK-LABEL: uadd_overflow_too_far_math_dom
+; CHECK-NOT: with.overflow.i32
+define i32 @uadd_overflow_too_far_math_dom(i32 %arg0, i32 %arg1) {
+entry:
+ %dec = add nsw i32 %arg0, -1
+ %cmp = icmp ugt i32 %arg0, 1
+ br i1 %cmp, label %if.else, label %if.then
+
+if.then:
+ call void @foo()
+ br label %if.end
+
+if.else:
+ call void @bar()
+ br label %if.end
+
+if.end:
+ %cmp.i.i = icmp ne i32 %arg0, 0
+ %tobool = zext i1 %cmp.i.i to i32
+ br label %exit
+
+exit:
+ ret i32 %tobool
+}
+
+declare void @foo()
+declare void @bar()
OpenPOWER on IntegriCloud