summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPhilip Reames <listmail@philipreames.com>2018-08-09 20:18:42 +0000
committerPhilip Reames <listmail@philipreames.com>2018-08-09 20:18:42 +0000
commitca256d93fb352746a9f1912e1838804a0e1efc4f (patch)
tree8da7883319415a77cda4e04b97e468b85be3e87c
parented4f51755ec7d72a290e6055e5eddc50a93360e3 (diff)
downloadbcm5719-llvm-ca256d93fb352746a9f1912e1838804a0e1efc4f.tar.gz
bcm5719-llvm-ca256d93fb352746a9f1912e1838804a0e1efc4f.zip
[LICM] hoist fences out of loops w/o memory operations
The motivating case is an otherwise dead loop with a fence in it. At the moment, this goes all the way through the optimizer and we end up emitting an entirely pointless loop on x86. This case may seem a bit contrived, but we've seen it in real code as the result of otherwise reasonable lowering strategies combined w/thread local memory optimizations (such as escape analysis). To handle this simple case, we can teach LICM to hoist must execute fences when there is no other memory operation within the loop. Differential Revision: https://reviews.llvm.org/D50489 llvm-svn: 339378
-rw-r--r--llvm/include/llvm/Analysis/AliasSetTracker.h14
-rw-r--r--llvm/lib/Transforms/Scalar/LICM.cpp15
-rw-r--r--llvm/test/Transforms/LICM/fence.ll14
3 files changed, 37 insertions, 6 deletions
diff --git a/llvm/include/llvm/Analysis/AliasSetTracker.h b/llvm/include/llvm/Analysis/AliasSetTracker.h
index c9680ff40d1..0e6d2297df1 100644
--- a/llvm/include/llvm/Analysis/AliasSetTracker.h
+++ b/llvm/include/llvm/Analysis/AliasSetTracker.h
@@ -224,6 +224,20 @@ public:
// track of the list's exact size.
unsigned size() { return SetSize; }
+ /// If this alias set is known to contain a single instruction and *only* a
+ /// single unique instruction, return it. Otherwise, return nullptr.
+ Instruction* getUniqueInstruction() {
+ if (size() != 0)
+ // Can't track source of pointer, might be many instruction
+ return nullptr;
+ if (AliasAny)
+ // May have collapses alias set
+ return nullptr;
+ if (1 != UnknownInsts.size())
+ return nullptr;
+ return cast<Instruction>(UnknownInsts[0]);
+ }
+
void print(raw_ostream &OS) const;
void dump() const;
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index d4ef9ade6f3..dbb9bc6318d 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -582,6 +582,7 @@ namespace {
bool isHoistableAndSinkableInst(Instruction &I) {
// Only these instructions are hoistable/sinkable.
return (isa<LoadInst>(I) || isa<CallInst>(I) ||
+ isa<FenceInst>(I) ||
isa<BinaryOperator>(I) || isa<CastInst>(I) ||
isa<SelectInst>(I) || isa<GetElementPtrInst>(I) ||
isa<CmpInst>(I) || isa<InsertElementInst>(I) ||
@@ -684,6 +685,20 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
// sink the call.
return false;
+ } else if (auto *FI = dyn_cast<FenceInst>(&I)) {
+ // Fences alias (most) everything to provide ordering. For the moment,
+ // just give up if there are any other memory operations in the loop.
+ auto Begin = CurAST->begin();
+ assert(Begin != CurAST->end() && "must contain FI");
+ if (std::next(Begin) != CurAST->end())
+ // constant memory for instance, TODO: handle better
+ return false;
+ auto *UniqueI = Begin->getUniqueInstruction();
+ if (!UniqueI)
+ // other memory op, give up
+ return false;
+ assert(UniqueI == FI && "AS must contain FI");
+ return true;
}
assert(!I.mayReadOrWriteMemory() && "unhandled aliasing");
diff --git a/llvm/test/Transforms/LICM/fence.ll b/llvm/test/Transforms/LICM/fence.ll
index 1240f1e1e08..f1dfe9e3511 100644
--- a/llvm/test/Transforms/LICM/fence.ll
+++ b/llvm/test/Transforms/LICM/fence.ll
@@ -3,8 +3,8 @@
define void @test1(i64 %n) {
; CHECK-LABEL: @test1
-; CHECK-LABEL: loop:
; CHECK: fence
+; CHECK-LABEL: loop:
entry:
br label %loop
loop:
@@ -19,8 +19,8 @@ exit:
define void @test2(i64 %n) {
; CHECK-LABEL: @test2
-; CHECK-LABEL: loop:
; CHECK: fence
+; CHECK-LABEL: loop:
entry:
br label %loop
loop:
@@ -35,8 +35,8 @@ exit:
define void @test3(i64 %n) {
; CHECK-LABEL: @test3
-; CHECK-LABEL: loop:
; CHECK: fence
+; CHECK-LABEL: loop:
entry:
br label %loop
loop:
@@ -51,8 +51,8 @@ exit:
define void @test4(i64 %n) {
; CHECK-LABEL: @test4
-; CHECK-LABEL: loop:
; CHECK: fence
+; CHECK-LABEL: loop:
entry:
br label %loop
loop:
@@ -99,8 +99,10 @@ exit:
ret void
}
-define void @testfp1(i64 %n, i64* %p) {
-; CHECK-LABEL: @testfp1
+; Note: While a false negative for LICM on it's own, O3 does get this
+; case by combining the fences.
+define void @testfn1(i64 %n, i64* %p) {
+; CHECK-LABEL: @testfn1
; CHECK-LABEL: loop:
; CHECK: fence
entry:
OpenPOWER on IntegriCloud