diff options
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/include/llvm/Analysis/AliasSetTracker.h | 4 | ||||
-rw-r--r-- | llvm/lib/Analysis/AliasSetTracker.cpp | 7 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/LoadCombine.cpp | 23 | ||||
-rw-r--r-- | llvm/test/Transforms/LoadCombine/load-combine-aa.ll | 39 | ||||
-rw-r--r-- | llvm/test/Transforms/LoadCombine/load-combine-assume.ll | 44 |
5 files changed, 112 insertions, 5 deletions
diff --git a/llvm/include/llvm/Analysis/AliasSetTracker.h b/llvm/include/llvm/Analysis/AliasSetTracker.h index 6dcd4a0441f..403a2b586f6 100644 --- a/llvm/include/llvm/Analysis/AliasSetTracker.h +++ b/llvm/include/llvm/Analysis/AliasSetTracker.h @@ -370,6 +370,10 @@ public: /// alias sets. bool containsPointer(Value *P, uint64_t Size, const AAMDNodes &AAInfo) const; + /// Return true if the specified instruction "may" (or must) alias one of the + /// members in any of the sets. + bool containsUnknown(Instruction *I) const; + /// getAliasAnalysis - Return the underlying alias analysis object used by /// this tracker. AliasAnalysis &getAliasAnalysis() const { return AA; } diff --git a/llvm/lib/Analysis/AliasSetTracker.cpp b/llvm/lib/Analysis/AliasSetTracker.cpp index 843a5413209..9d941e55797 100644 --- a/llvm/lib/Analysis/AliasSetTracker.cpp +++ b/llvm/lib/Analysis/AliasSetTracker.cpp @@ -242,7 +242,12 @@ bool AliasSetTracker::containsPointer(Value *Ptr, uint64_t Size, return false; } - +bool AliasSetTracker::containsUnknown(Instruction *Inst) const { + for (const_iterator I = begin(), E = end(); I != E; ++I) + if (!I->Forward && I->aliasesUnknownInst(Inst, AA)) + return true; + return false; +} AliasSet *AliasSetTracker::findAliasSetForUnknownInst(Instruction *Inst) { AliasSet *FoundSet = nullptr; diff --git a/llvm/lib/Transforms/Scalar/LoadCombine.cpp b/llvm/lib/Transforms/Scalar/LoadCombine.cpp index 648626a7c12..11e4d7606d9 100644 --- a/llvm/lib/Transforms/Scalar/LoadCombine.cpp +++ b/llvm/lib/Transforms/Scalar/LoadCombine.cpp @@ -15,6 +15,8 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AliasSetTracker.h" #include "llvm/Analysis/TargetFolder.h" #include "llvm/Pass.h" #include "llvm/IR/DataLayout.h" @@ -51,11 +53,12 @@ struct LoadPOPPair { class LoadCombine : public BasicBlockPass { LLVMContext *C; const DataLayout *DL; + AliasAnalysis *AA; public: LoadCombine() : BasicBlockPass(ID), - C(nullptr), DL(nullptr) { + C(nullptr), DL(nullptr), AA(nullptr) { initializeSROAPass(*PassRegistry::getPassRegistry()); } @@ -225,19 +228,23 @@ bool LoadCombine::runOnBasicBlock(BasicBlock &BB) { if (skipOptnoneFunction(BB) || !DL) return false; + AA = &getAnalysis<AliasAnalysis>(); + IRBuilder<true, TargetFolder> TheBuilder(BB.getContext(), TargetFolder(DL)); Builder = &TheBuilder; DenseMap<const Value *, SmallVector<LoadPOPPair, 8>> LoadMap; + AliasSetTracker AST(*AA); bool Combined = false; unsigned Index = 0; for (auto &I : BB) { - if (I.mayWriteToMemory() || I.mayThrow()) { + if (I.mayThrow() || (I.mayWriteToMemory() && AST.containsUnknown(&I))) { if (combineLoads(LoadMap)) Combined = true; LoadMap.clear(); + AST.clear(); continue; } LoadInst *LI = dyn_cast<LoadInst>(&I); @@ -250,6 +257,7 @@ bool LoadCombine::runOnBasicBlock(BasicBlock &BB) { if (!POP.Pointer) continue; LoadMap[POP.Pointer].push_back(LoadPOPPair(LI, POP, Index++)); + AST.add(LI); } if (combineLoads(LoadMap)) Combined = true; @@ -258,6 +266,9 @@ bool LoadCombine::runOnBasicBlock(BasicBlock &BB) { void LoadCombine::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); + + AU.addRequired<AliasAnalysis>(); + AU.addPreserved<AliasAnalysis>(); } char LoadCombine::ID = 0; @@ -266,5 +277,9 @@ BasicBlockPass *llvm::createLoadCombinePass() { return new LoadCombine(); } -INITIALIZE_PASS(LoadCombine, "load-combine", "Combine Adjacent Loads", false, - false) +INITIALIZE_PASS_BEGIN(LoadCombine, "load-combine", "Combine Adjacent Loads", + false, false) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(LoadCombine, "load-combine", "Combine Adjacent Loads", + false, false) + diff --git a/llvm/test/Transforms/LoadCombine/load-combine-aa.ll b/llvm/test/Transforms/LoadCombine/load-combine-aa.ll new file mode 100644 index 00000000000..3542dcebf5e --- /dev/null +++ b/llvm/test/Transforms/LoadCombine/load-combine-aa.ll @@ -0,0 +1,39 @@ +; RUN: opt -basicaa -load-combine -instcombine -S < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i64 @test1(i32* nocapture readonly noalias %a, i32* nocapture readonly noalias %b) { +; CHECK-LABEL: @test1 + +; CHECK: load i64* +; CHECK: ret i64 + + %load1 = load i32* %a, align 4 + %conv = zext i32 %load1 to i64 + %arrayidx1 = getelementptr inbounds i32* %a, i64 1 + store i32 %load1, i32* %b, align 4 + %load2 = load i32* %arrayidx1, align 4 + %conv2 = zext i32 %load2 to i64 + %shl = shl nuw i64 %conv2, 32 + %add = or i64 %shl, %conv + ret i64 %add +} + +define i64 @test2(i32* nocapture readonly %a, i32* nocapture readonly %b) { +; CHECK-LABEL: @test2 + +; CHECK: load i32* +; CHECK: load i32* +; CHECK: ret i64 + + %load1 = load i32* %a, align 4 + %conv = zext i32 %load1 to i64 + %arrayidx1 = getelementptr inbounds i32* %a, i64 1 + store i32 %load1, i32* %b, align 4 + %load2 = load i32* %arrayidx1, align 4 + %conv2 = zext i32 %load2 to i64 + %shl = shl nuw i64 %conv2, 32 + %add = or i64 %shl, %conv + ret i64 %add +} + diff --git a/llvm/test/Transforms/LoadCombine/load-combine-assume.ll b/llvm/test/Transforms/LoadCombine/load-combine-assume.ll new file mode 100644 index 00000000000..94f630072ad --- /dev/null +++ b/llvm/test/Transforms/LoadCombine/load-combine-assume.ll @@ -0,0 +1,44 @@ +; RUN: opt -basicaa -load-combine -instcombine -S < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare void @llvm.assume(i1) nounwind + +; 'load' before the 'call' gets optimized: +define i64 @test1(i32* nocapture readonly %a, i1 %b) { +; CHECK-LABEL: @test1 + +; CHECK-DAG: load i64* %1, align 4 +; CHECK-DAG: tail call void @llvm.assume(i1 %b) +; CHECK: ret i64 + + %load1 = load i32* %a, align 4 + %conv = zext i32 %load1 to i64 + %arrayidx1 = getelementptr inbounds i32* %a, i64 1 + %load2 = load i32* %arrayidx1, align 4 + tail call void @llvm.assume(i1 %b) + %conv2 = zext i32 %load2 to i64 + %shl = shl nuw i64 %conv2, 32 + %add = or i64 %shl, %conv + ret i64 %add +} + +; 'call' before the 'load' doesn't get optimized: +define i64 @test2(i32* nocapture readonly %a, i1 %b) { +; CHECK-LABEL: @test2 + +; CHECK-DAG: load i64* %1, align 4 +; CHECK-DAG: tail call void @llvm.assume(i1 %b) +; CHECK: ret i64 + + %load1 = load i32* %a, align 4 + %conv = zext i32 %load1 to i64 + %arrayidx1 = getelementptr inbounds i32* %a, i64 1 + tail call void @llvm.assume(i1 %b) + %load2 = load i32* %arrayidx1, align 4 + %conv2 = zext i32 %load2 to i64 + %shl = shl nuw i64 %conv2, 32 + %add = or i64 %shl, %conv + ret i64 %add +} + |