summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRaphael Isemann <teemperor@gmail.com>2017-07-09 21:14:36 +0000
committerRaphael Isemann <teemperor@gmail.com>2017-07-09 21:14:36 +0000
commit4eac9f05452b4404c36ff7c842fbf13351f73339 (patch)
treee2ef58fd73c4c07da3578c2ff0f7da429cb783b2
parentc4b0ccd0492350eaefe141ee66df84dca109a620 (diff)
downloadbcm5719-llvm-4eac9f05452b4404c36ff7c842fbf13351f73339.tar.gz
bcm5719-llvm-4eac9f05452b4404c36ff7c842fbf13351f73339.zip
[analyzer] Faster hashing of subsequences in CompoundStmts.
Summary: This patches improves the hashing subsequences in CompoundStmts by incrementally hashing all subsequences with the same starting position. This results in a reduction of the time for this constraint while running over SQLite from 1.10 seconds to 0.55 seconds (-50%). Reviewers: NoQ Reviewed By: NoQ Subscribers: cfe-commits, xazax.hun, v.g.vassilev Differential Revision: https://reviews.llvm.org/D34364 llvm-svn: 307509
-rw-r--r--clang/lib/Analysis/CloneDetection.cpp29
1 files changed, 20 insertions, 9 deletions
diff --git a/clang/lib/Analysis/CloneDetection.cpp b/clang/lib/Analysis/CloneDetection.cpp
index e698d3e5c56..5ea74989a7e 100644
--- a/clang/lib/Analysis/CloneDetection.cpp
+++ b/clang/lib/Analysis/CloneDetection.cpp
@@ -239,16 +239,27 @@ size_t RecursiveCloneTypeIIConstraint::saveHash(
}
if (CS) {
- for (unsigned Length = 2; Length <= CS->size(); ++Length) {
- for (unsigned Pos = 0; Pos <= CS->size() - Length; ++Pos) {
- llvm::MD5 Hash;
- for (unsigned i = Pos; i < Pos + Length; ++i) {
- size_t ChildHash = ChildHashes[i];
- Hash.update(StringRef(reinterpret_cast<char *>(&ChildHash),
- sizeof(ChildHash)));
+ // If we're in a CompoundStmt, we hash all possible combinations of child
+ // statements to find clones in those subsequences.
+ // We first go through every possible starting position of a subsequence.
+ for (unsigned Pos = 0; Pos < CS->size(); ++Pos) {
+ // Then we try all possible lengths this subsequence could have and
+ // reuse the same hash object to make sure we only hash every child
+ // hash exactly once.
+ llvm::MD5 Hash;
+ for (unsigned Length = 1; Length <= CS->size() - Pos; ++Length) {
+ // Grab the current child hash and put it into our hash. We do
+ // -1 on the index because we start counting the length at 1.
+ size_t ChildHash = ChildHashes[Pos + Length - 1];
+ Hash.update(
+ StringRef(reinterpret_cast<char *>(&ChildHash), sizeof(ChildHash)));
+ // If we have at least two elements in our subsequence, we can start
+ // saving it.
+ if (Length > 1) {
+ llvm::MD5 SubHash = Hash;
+ StmtsByHash.push_back(std::make_pair(
+ createHash(SubHash), StmtSequence(CS, D, Pos, Pos + Length)));
}
- StmtsByHash.push_back(std::make_pair(
- createHash(Hash), StmtSequence(CS, D, Pos, Pos + Length)));
}
}
}
OpenPOWER on IntegriCloud