diff options
author | Raphael Isemann <teemperor@gmail.com> | 2017-07-09 21:14:36 +0000 |
---|---|---|
committer | Raphael Isemann <teemperor@gmail.com> | 2017-07-09 21:14:36 +0000 |
commit | 4eac9f05452b4404c36ff7c842fbf13351f73339 (patch) | |
tree | e2ef58fd73c4c07da3578c2ff0f7da429cb783b2 | |
parent | c4b0ccd0492350eaefe141ee66df84dca109a620 (diff) | |
download | bcm5719-llvm-4eac9f05452b4404c36ff7c842fbf13351f73339.tar.gz bcm5719-llvm-4eac9f05452b4404c36ff7c842fbf13351f73339.zip |
[analyzer] Faster hashing of subsequences in CompoundStmts.
Summary: This patches improves the hashing subsequences in CompoundStmts by incrementally hashing all subsequences with the same starting position. This results in a reduction of the time for this constraint while running over SQLite from 1.10 seconds to 0.55 seconds (-50%).
Reviewers: NoQ
Reviewed By: NoQ
Subscribers: cfe-commits, xazax.hun, v.g.vassilev
Differential Revision: https://reviews.llvm.org/D34364
llvm-svn: 307509
-rw-r--r-- | clang/lib/Analysis/CloneDetection.cpp | 29 |
1 files changed, 20 insertions, 9 deletions
diff --git a/clang/lib/Analysis/CloneDetection.cpp b/clang/lib/Analysis/CloneDetection.cpp index e698d3e5c56..5ea74989a7e 100644 --- a/clang/lib/Analysis/CloneDetection.cpp +++ b/clang/lib/Analysis/CloneDetection.cpp @@ -239,16 +239,27 @@ size_t RecursiveCloneTypeIIConstraint::saveHash( } if (CS) { - for (unsigned Length = 2; Length <= CS->size(); ++Length) { - for (unsigned Pos = 0; Pos <= CS->size() - Length; ++Pos) { - llvm::MD5 Hash; - for (unsigned i = Pos; i < Pos + Length; ++i) { - size_t ChildHash = ChildHashes[i]; - Hash.update(StringRef(reinterpret_cast<char *>(&ChildHash), - sizeof(ChildHash))); + // If we're in a CompoundStmt, we hash all possible combinations of child + // statements to find clones in those subsequences. + // We first go through every possible starting position of a subsequence. + for (unsigned Pos = 0; Pos < CS->size(); ++Pos) { + // Then we try all possible lengths this subsequence could have and + // reuse the same hash object to make sure we only hash every child + // hash exactly once. + llvm::MD5 Hash; + for (unsigned Length = 1; Length <= CS->size() - Pos; ++Length) { + // Grab the current child hash and put it into our hash. We do + // -1 on the index because we start counting the length at 1. + size_t ChildHash = ChildHashes[Pos + Length - 1]; + Hash.update( + StringRef(reinterpret_cast<char *>(&ChildHash), sizeof(ChildHash))); + // If we have at least two elements in our subsequence, we can start + // saving it. + if (Length > 1) { + llvm::MD5 SubHash = Hash; + StmtsByHash.push_back(std::make_pair( + createHash(SubHash), StmtSequence(CS, D, Pos, Pos + Length))); } - StmtsByHash.push_back(std::make_pair( - createHash(Hash), StmtSequence(CS, D, Pos, Pos + Length))); } } } |