summaryrefslogtreecommitdiffstats
path: root/clang/lib/Serialization/ASTWriter.cpp
diff options
context:
space:
mode:
authorBruno Cardoso Lopes <bruno.cardoso@gmail.com>2019-10-15 14:23:55 +0000
committerBruno Cardoso Lopes <bruno.cardoso@gmail.com>2019-10-15 14:23:55 +0000
commit1731fc88d1fa1fa55edd056db73a339b415dd5d6 (patch)
treefa2fcf134084dc7c324391f98a5d60e0caac519a /clang/lib/Serialization/ASTWriter.cpp
parentce00cd6ae845677ef95f8d62a3745f6e13125882 (diff)
downloadbcm5719-llvm-1731fc88d1fa1fa55edd056db73a339b415dd5d6.tar.gz
bcm5719-llvm-1731fc88d1fa1fa55edd056db73a339b415dd5d6.zip
Reapply: [Modules][PCH] Hash input files content
Summary: When files often get touched during builds, the mtime based validation leads to different problems in implicit modules builds, even when the content doesn't actually change: - Modules only: module invalidation due to out of date files. Usually causing rebuild traffic. - Modules + PCH: build failures because clang cannot rebuild a module if it comes from building a PCH. - PCH: build failures because clang cannot rebuild a PCH in case one of the input headers has different mtime. This patch proposes hashing the content of input files (headers and module maps), which is performed during serialization time. When looking at input files for validation, clang only computes the hash in case there's a mtime mismatch. I've tested a couple of different hash algorithms availble in LLVM in face of building modules+pch for `#import <Cocoa/Cocoa.h>`: - `hash_code`: performace diff within the noise, total module cache increased by 0.07%. - `SHA1`: 5% slowdown. Haven't done real size measurements, but it'd be BLOCK_ID+20 bytes per input file, instead of BLOCK_ID+8 bytes from `hash_code`. - `MD5`: 3% slowdown. Like above, but BLOCK_ID+16 bytes per input file. Given the numbers above, the patch uses `hash_code`. The patch also improves invalidation error msgs to point out which type of problem the user is facing: "mtime", "size" or "content". rdar://problem/29320105 Reviewers: dexonsmith, arphaman, rsmith, aprantl Subscribers: jkorous, cfe-commits, ributzka Tags: #clang Differential Revision: https://reviews.llvm.org/D67249 > llvm-svn: 374841 llvm-svn: 374895
Diffstat (limited to 'clang/lib/Serialization/ASTWriter.cpp')
-rw-r--r--clang/lib/Serialization/ASTWriter.cpp59
1 files changed, 48 insertions, 11 deletions
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index 3badff45fb4..5e9e650fce8 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -1099,6 +1099,7 @@ void ASTWriter::WriteBlockInfoBlock() {
BLOCK(INPUT_FILES_BLOCK);
RECORD(INPUT_FILE);
+ RECORD(INPUT_FILE_HASH);
// AST Top-Level Block.
BLOCK(AST_BLOCK);
@@ -1764,6 +1765,7 @@ struct InputFileEntry {
bool IsTransient;
bool BufferOverridden;
bool IsTopLevelModuleMap;
+ uint32_t ContentHash[2];
};
} // namespace
@@ -1787,6 +1789,13 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // File name
unsigned IFAbbrevCode = Stream.EmitAbbrev(std::move(IFAbbrev));
+ // Create input file hash abbreviation.
+ auto IFHAbbrev = std::make_shared<BitCodeAbbrev>();
+ IFHAbbrev->Add(BitCodeAbbrevOp(INPUT_FILE_HASH));
+ IFHAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
+ IFHAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
+ unsigned IFHAbbrevCode = Stream.EmitAbbrev(std::move(IFHAbbrev));
+
// Get all ContentCache objects for files, sorted by whether the file is a
// system one or not. System files go at the back, users files at the front.
std::deque<InputFileEntry> SortedFiles;
@@ -1810,6 +1819,25 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
Entry.BufferOverridden = Cache->BufferOverridden;
Entry.IsTopLevelModuleMap = isModuleMap(File.getFileCharacteristic()) &&
File.getIncludeLoc().isInvalid();
+
+ auto ContentHash = hash_code(-1);
+ if (PP->getHeaderSearchInfo()
+ .getHeaderSearchOpts()
+ .ValidateASTInputFilesContent) {
+ auto *MemBuff = Cache->getRawBuffer();
+ if (MemBuff)
+ ContentHash = hash_value(MemBuff->getBuffer());
+ else
+ // FIXME: The path should be taken from the FileEntryRef.
+ PP->Diag(SourceLocation(), diag::err_module_unable_to_hash_content)
+ << Entry.File->getName();
+ }
+ auto CH = llvm::APInt(64, ContentHash);
+ Entry.ContentHash[0] =
+ static_cast<uint32_t>(CH.getLoBits(32).getZExtValue());
+ Entry.ContentHash[1] =
+ static_cast<uint32_t>(CH.getHiBits(32).getZExtValue());
+
if (Entry.IsSystemFile)
SortedFiles.push_back(Entry);
else
@@ -1834,17 +1862,26 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
// Emit size/modification time for this file.
// And whether this file was overridden.
- RecordData::value_type Record[] = {
- INPUT_FILE,
- InputFileOffsets.size(),
- (uint64_t)Entry.File->getSize(),
- (uint64_t)getTimestampForOutput(Entry.File),
- Entry.BufferOverridden,
- Entry.IsTransient,
- Entry.IsTopLevelModuleMap};
-
- // FIXME: The path should be taken from the FileEntryRef.
- EmitRecordWithPath(IFAbbrevCode, Record, Entry.File->getName());
+ {
+ RecordData::value_type Record[] = {
+ INPUT_FILE,
+ InputFileOffsets.size(),
+ (uint64_t)Entry.File->getSize(),
+ (uint64_t)getTimestampForOutput(Entry.File),
+ Entry.BufferOverridden,
+ Entry.IsTransient,
+ Entry.IsTopLevelModuleMap};
+
+ // FIXME: The path should be taken from the FileEntryRef.
+ EmitRecordWithPath(IFAbbrevCode, Record, Entry.File->getName());
+ }
+
+ // Emit content hash for this file.
+ {
+ RecordData::value_type Record[] = {INPUT_FILE_HASH, Entry.ContentHash[0],
+ Entry.ContentHash[1]};
+ Stream.EmitRecordWithAbbrev(IFHAbbrevCode, Record);
+ }
}
Stream.ExitBlock();
OpenPOWER on IntegriCloud