diff options
author | Chris Lattner <sabre@nondot.org> | 2009-01-26 00:43:02 +0000 |
---|---|---|
committer | Chris Lattner <sabre@nondot.org> | 2009-01-26 00:43:02 +0000 |
commit | 4fa23625abb476ac89b84d06ee2a45ef822eaf15 (patch) | |
tree | 005a1b93adaf38a3e73ea0b512bafce88140568f | |
parent | cf8e1fa58e25fbc49b2358994e5f83498aff5dd3 (diff) | |
download | bcm5719-llvm-4fa23625abb476ac89b84d06ee2a45ef822eaf15.tar.gz bcm5719-llvm-4fa23625abb476ac89b84d06ee2a45ef822eaf15.zip |
Check in the long promised SourceLocation rewrite. This lays the
ground work for implementing #line, and fixes the "out of macro ID's"
problem.
There is nothing particularly tricky about the code, other than the
very performance sensitive SourceManager::getFileID() method.
llvm-svn: 62978
-rw-r--r-- | clang/include/clang/Basic/SourceLocation.h | 109 | ||||
-rw-r--r-- | clang/include/clang/Basic/SourceManager.h | 399 | ||||
-rw-r--r-- | clang/include/clang/Lex/Lexer.h | 7 | ||||
-rw-r--r-- | clang/lib/Basic/SourceManager.cpp | 375 | ||||
-rw-r--r-- | clang/lib/Lex/Lexer.cpp | 25 | ||||
-rw-r--r-- | clang/lib/Lex/PPMacroExpansion.cpp | 11 | ||||
-rw-r--r-- | clang/lib/Lex/PTHLexer.cpp | 5 | ||||
-rw-r--r-- | clang/lib/Lex/ScratchBuffer.cpp | 2 | ||||
-rw-r--r-- | clang/lib/Lex/TokenLexer.cpp | 7 | ||||
-rw-r--r-- | clang/lib/Rewrite/HTMLRewrite.cpp | 3 | ||||
-rw-r--r-- | clang/lib/Rewrite/Rewriter.cpp | 2 |
11 files changed, 516 insertions, 429 deletions
diff --git a/clang/include/clang/Basic/SourceLocation.h b/clang/include/clang/Basic/SourceLocation.h index 8def5c5c1a5..6da9a87ee3a 100644 --- a/clang/include/clang/Basic/SourceLocation.h +++ b/clang/include/clang/Basic/SourceLocation.h @@ -45,12 +45,12 @@ public: bool operator>(const FileID &RHS) const { return RHS < *this; } bool operator>=(const FileID &RHS) const { return RHS <= *this; } - static FileID getSentinel() { return Create(~0U); } + static FileID getSentinel() { return get(~0U); } unsigned getHashValue() const { return ID; } private: friend class SourceManager; - static FileID Create(unsigned V) { + static FileID get(unsigned V) { FileID F; F.ID = V; return F; @@ -66,33 +66,14 @@ class SourceLocation { unsigned ID; friend class SourceManager; enum { - // FileID Layout: - // bit 31: 0 -> FileID, 1 -> MacroID (invalid for FileID) - // 30...17 -> ChunkID of location, index into SourceManager table. - ChunkIDBits = 14, - // 0...16 -> Index into the chunk of the specified ChunkID. - FilePosBits = 32-1-ChunkIDBits, - - // MacroID Layout: - // bit 31: 1 -> MacroID, 0 -> FileID (invalid for MacroID) - - // bit 29,30: unused. - - // bits 28...9 -> MacroID number. - MacroIDBits = 20, - // bits 8...0 -> Macro spelling offset - MacroSpellingOffsBits = 9, - - - // Useful constants. - ChunkSize = (1 << FilePosBits) + MacroIDBit = 1U << 31 }; public: SourceLocation() : ID(0) {} // 0 is an invalid FileID. - bool isFileID() const { return (ID >> 31) == 0; } - bool isMacroID() const { return (ID >> 31) != 0; } + bool isFileID() const { return (ID & MacroIDBit) == 0; } + bool isMacroID() const { return (ID & MacroIDBit) != 0; } /// isValid - Return true if this is a valid SourceLocation object. Invalid /// SourceLocations are often used when events have no corresponding location @@ -102,86 +83,34 @@ public: bool isInvalid() const { return ID == 0; } private: - /// getChunkID - Return the chunk identifier for this SourceLocation. This - /// ChunkID can be used with the SourceManager object to obtain an entire - /// include stack for a file position reference. - unsigned getChunkID() const { - assert(isFileID() && "can't get the file id of a non-file sloc!"); - return ID >> FilePosBits; + /// getOffset - Return the index for SourceManager's SLocEntryTable table, + /// note that this is not an index *into* it though. + unsigned getOffset() const { + return ID & ~MacroIDBit; } - unsigned getMacroID() const { - assert(isMacroID() && "Is not a macro id!"); - return (ID >> MacroSpellingOffsBits) & ((1 << MacroIDBits)-1); - } - - static SourceLocation getFileLoc(unsigned ChunkID, unsigned FilePos) { + static SourceLocation getFileLoc(unsigned ID) { + assert((ID & MacroIDBit) == 0 && "Ran out of source locations!"); SourceLocation L; - // If a FilePos is larger than (1<<FilePosBits), the SourceManager makes - // enough consequtive ChunkIDs that we have one for each chunk. - if (FilePos >= ChunkSize) { - ChunkID += FilePos >> FilePosBits; - FilePos &= ChunkSize-1; - } - - // FIXME: Find a way to handle out of ChunkID bits! Maybe MaxFileID is an - // escape of some sort? - assert(ChunkID < (1 << ChunkIDBits) && "Out of ChunkID's"); - - L.ID = (ChunkID << FilePosBits) | FilePos; + L.ID = ID; return L; } - static bool isValidMacroSpellingOffs(int Val) { - if (Val >= 0) - return Val < (1 << (MacroSpellingOffsBits-1)); - return -Val <= (1 << (MacroSpellingOffsBits-1)); - } - - static SourceLocation getMacroLoc(unsigned MacroID, int SpellingOffs) { - assert(MacroID < (1 << MacroIDBits) && "Too many macros!"); - assert(isValidMacroSpellingOffs(SpellingOffs) &&"spelling offs too large!"); - - // Mask off sign bits. - SpellingOffs &= (1 << MacroSpellingOffsBits)-1; - + static SourceLocation getMacroLoc(unsigned ID) { + assert((ID & MacroIDBit) == 0 && "Ran out of source locations!"); SourceLocation L; - L.ID = (1 << 31) | - (MacroID << MacroSpellingOffsBits) | - SpellingOffs; + L.ID = MacroIDBit | ID; return L; } - - /// getRawFilePos - Return the byte offset from the start of the file-chunk - /// referred to by ChunkID. This method should not be used to get the offset - /// from the start of the file, instead you should use - /// SourceManager::getDecomposedFileLoc. This method will be - // incorrect for large files. - unsigned getRawFilePos() const { - assert(isFileID() && "can't get the file id of a non-file sloc!"); - return ID & (ChunkSize-1); - } - - int getMacroSpellingOffs() const { - assert(isMacroID() && "Is not a macro id!"); - int Val = ID & ((1 << MacroSpellingOffsBits)-1); - // Sign extend it properly. - unsigned ShAmt = sizeof(int)*8 - MacroSpellingOffsBits; - return (Val << ShAmt) >> ShAmt; - } public: /// getFileLocWithOffset - Return a source location with the specified offset /// from this file SourceLocation. SourceLocation getFileLocWithOffset(int Offset) const { - unsigned ChunkID = getChunkID(); - Offset += getRawFilePos(); - // Handle negative offsets correctly. - while (Offset < 0) { - --ChunkID; - Offset += ChunkSize; - } - return getFileLoc(ChunkID, Offset); + assert(((getOffset()+Offset) & MacroIDBit) == 0 && "invalid location"); + SourceLocation L; + L.ID = ID+Offset; + return L; } /// getRawEncoding - When a SourceLocation itself cannot be used, this returns diff --git a/clang/include/clang/Basic/SourceManager.h b/clang/include/clang/Basic/SourceManager.h index c8362d890e5..17c39f8016e 100644 --- a/clang/include/clang/Basic/SourceManager.h +++ b/clang/include/clang/Basic/SourceManager.h @@ -116,99 +116,113 @@ namespace SrcMgr { ContentCache &operator=(const ContentCache& RHS); }; - /// FileIDInfo - Information about a FileID, basically just the logical file - /// that it represents and include stack information. A File SourceLocation - /// is a byte offset from the start of this. + /// FileInfo - Information about a FileID, basically just the logical file + /// that it represents and include stack information. /// - /// FileID's are used to compute the location of a character in memory as well - /// as the instantiation source location, which can be differ from the - /// spelling location. It is different when #line's are active or when macros - /// have been expanded. + /// Each FileInfo has include stack information, indicating where it came + /// from. This information encodes the #include chain that a token was + /// instantiated from. The main include file has an invalid IncludeLoc. /// - /// Each FileID has include stack information, indicating where it came from. - /// For the primary translation unit, it comes from SourceLocation() aka 0. - /// This information encodes the #include chain that a token was instantiated - /// from. + /// FileInfos contain a "ContentCache *", with the contents of the file. /// - /// FileIDInfos contain a "ContentCache *", describing the source file, - /// and a Chunk number, which allows a SourceLocation to index into very - /// large files (those which there are not enough FilePosBits to address). - /// - struct FileIDInfo { - private: + class FileInfo { /// IncludeLoc - The location of the #include that brought in this file. - /// This SourceLocation object has an invalid SLOC for the main file. - SourceLocation IncludeLoc; - - /// ChunkNo - Really large buffers are broken up into chunks that are - /// each (1 << SourceLocation::FilePosBits) in size. This specifies the - /// chunk number of this FileID. - unsigned ChunkNo : 30; - - /// FileCharacteristic - This is an instance of CharacteristicKind, - /// indicating whether this is a system header dir or not. - unsigned FileCharacteristic : 2; + /// This is an invalid SLOC for the main file (top of the #include chain). + unsigned IncludeLoc; // Really a SourceLocation /// Content - Information about the source buffer itself. const ContentCache *Content; + /// FileCharacteristic - This is an instance of CharacteristicKind, + /// indicating whether this is a system header dir or not. + unsigned FileCharacteristic : 2; public: - /// get - Return a FileIDInfo object. - static FileIDInfo get(SourceLocation IL, unsigned CN, - const ContentCache *Con, - CharacteristicKind FileCharacter) { - FileIDInfo X; - X.IncludeLoc = IL; - X.ChunkNo = CN; + /// get - Return a FileInfo object. + static FileInfo get(SourceLocation IL, const ContentCache *Con, + CharacteristicKind FileCharacter) { + FileInfo X; + X.IncludeLoc = IL.getRawEncoding(); X.Content = Con; X.FileCharacteristic = FileCharacter; return X; } - SourceLocation getIncludeLoc() const { return IncludeLoc; } - unsigned getChunkNo() const { return ChunkNo; } + SourceLocation getIncludeLoc() const { + return SourceLocation::getFromRawEncoding(IncludeLoc); + } const ContentCache* getContentCache() const { return Content; } - + /// getCharacteristic - Return whether this is a system header or not. CharacteristicKind getFileCharacteristic() const { return (CharacteristicKind)FileCharacteristic; } - - /// Emit - Emit this FileIDInfo to Bitcode. - void Emit(llvm::Serializer& S) const; - - /// ReadVal - Reconstitute a FileIDInfo from Bitcode. - static FileIDInfo ReadVal(llvm::Deserializer& S); }; - /// MacroIDInfo - Macro SourceLocations refer to these records by their ID. - /// Each MacroIDInfo encodes the Instantiation location - where the macro was - /// instantiated, and the SpellingLoc - where the actual character data for - /// the token came from. An actual macro SourceLocation stores deltas from - /// these positions. - class MacroIDInfo { - SourceLocation InstantiationLoc, SpellingLoc; + /// InstantiationInfo - Each InstantiationInfo encodes the Instantiation + /// location - where the token was ultimately instantiated, and the + /// SpellingLoc - where the actual character data for the token came from. + class InstantiationInfo { + unsigned InstantiationLoc, SpellingLoc; // Really these are SourceLocations. public: - SourceLocation getInstantiationLoc() const { return InstantiationLoc; } - SourceLocation getSpellingLoc() const { return SpellingLoc; } + SourceLocation getInstantiationLoc() const { + return SourceLocation::getFromRawEncoding(InstantiationLoc); + } + SourceLocation getSpellingLoc() const { + return SourceLocation::getFromRawEncoding(SpellingLoc); + } - /// get - Return a MacroID for a macro expansion. VL specifies + /// get - Return a InstantiationInfo for an expansion. VL specifies /// the instantiation location (where the macro is expanded), and SL /// specifies the spelling location (where the characters from the token /// come from). Both VL and PL refer to normal File SLocs. - static MacroIDInfo get(SourceLocation VL, SourceLocation SL) { - MacroIDInfo X; - X.InstantiationLoc = VL; - X.SpellingLoc = SL; + static InstantiationInfo get(SourceLocation IL, SourceLocation SL) { + InstantiationInfo X; + X.InstantiationLoc = IL.getRawEncoding(); + X.SpellingLoc = SL.getRawEncoding(); return X; } + }; + + /// SLocEntry - This is a discriminated union of FileInfo and + /// InstantiationInfo. SourceManager keeps an array of these objects, and + /// they are uniquely identified by the FileID datatype. + class SLocEntry { + unsigned Offset; // low bit is set for instantiation info. + union { + FileInfo File; + InstantiationInfo Instantiation; + }; + public: + unsigned getOffset() const { return Offset >> 1; } - /// Emit - Emit this MacroIDInfo to Bitcode. - void Emit(llvm::Serializer& S) const; + bool isInstantiation() const { return Offset & 1; } + bool isFile() const { return !isInstantiation(); } + + const FileInfo &getFile() const { + assert(isFile() && "Not a file SLocEntry!"); + return File; + } + + const InstantiationInfo &getInstantiation() const { + assert(isInstantiation() && "Not an instantiation SLocEntry!"); + return Instantiation; + } - /// ReadVal - Reconstitute a MacroIDInfo from Bitcode. - static MacroIDInfo ReadVal(llvm::Deserializer& S); + static SLocEntry get(unsigned Offset, const FileInfo &FI) { + SLocEntry E; + E.Offset = Offset << 1; + E.File = FI; + return E; + } + + static SLocEntry get(unsigned Offset, const InstantiationInfo &II) { + SLocEntry E; + E.Offset = (Offset << 1) | 1; + E.Instantiation = II; + return E; + } }; + } // end SrcMgr namespace. } // end clang namespace @@ -247,12 +261,17 @@ class SourceManager { /// stored ContentCache objects are NULL, as they do not refer to a file. std::list<SrcMgr::ContentCache> MemBufferInfos; - /// FileIDs - Information about each FileID. FileID #0 is not valid, so all - /// entries are off by one. - std::vector<SrcMgr::FileIDInfo> FileIDs; + /// SLocEntryTable - This is an array of SLocEntry's that we have created. + /// FileID is an index into this vector. This array is sorted by the offset. + std::vector<SrcMgr::SLocEntry> SLocEntryTable; + /// NextOffset - This is the next available offset that a new SLocEntry can + /// start at. It is SLocEntryTable.back().getOffset()+size of back() entry. + unsigned NextOffset; - /// MacroIDs - Information about each MacroID. - std::vector<SrcMgr::MacroIDInfo> MacroIDs; + /// LastFileIDLookup - This is a one-entry cache to speed up getFileID. + /// LastFileIDLookup records the last FileID looked up or created, because it + /// is very common to look up many tokens from the same file. + mutable FileID LastFileIDLookup; /// LastLineNo - These ivars serve as a cache used in the getLineNumber /// method which is used to speedup getLineNumber calls to nearby locations. @@ -264,19 +283,28 @@ class SourceManager { /// MainFileID - The file ID for the main source file of the translation unit. FileID MainFileID; + // Statistics for -print-stats. + mutable unsigned NumLinearScans, NumBinaryProbes; + // SourceManager doesn't support copy construction. explicit SourceManager(const SourceManager&); void operator=(const SourceManager&); public: - SourceManager() {} + SourceManager() : NumLinearScans(0), NumBinaryProbes(0) { + clearIDTables(); + } ~SourceManager() {} void clearIDTables() { MainFileID = FileID(); - FileIDs.clear(); - MacroIDs.clear(); + SLocEntryTable.clear(); LastLineNoFileIDQuery = FileID(); LastLineNoContentCache = 0; + LastFileIDLookup = FileID(); + + // Use up FileID #0 as an invalid instantiation. + NextOffset = 0; + createInstantiationLoc(SourceLocation(), SourceLocation(), 1); } //===--------------------------------------------------------------------===// @@ -295,7 +323,7 @@ public: } //===--------------------------------------------------------------------===// - // Methods to create new FileID's. + // Methods to create new FileID's and instantiations. //===--------------------------------------------------------------------===// /// createFileID - Create a new FileID that represents the specified file @@ -303,7 +331,7 @@ public: /// error and translates NULL into standard input. FileID createFileID(const FileEntry *SourceFile, SourceLocation IncludePos, SrcMgr::CharacteristicKind FileCharacter) { - const SrcMgr::ContentCache *IR = getContentCache(SourceFile); + const SrcMgr::ContentCache *IR = getOrCreateContentCache(SourceFile); if (IR == 0) return FileID(); // Error opening file? return createFileID(IR, IncludePos, FileCharacter); } @@ -325,6 +353,13 @@ public: return MainFileID; } + /// createInstantiationLoc - Return a new SourceLocation that encodes the fact + /// that a token at Loc should actually be referenced from InstantiationLoc. + /// TokLength is the length of the token being instantiated. + SourceLocation createInstantiationLoc(SourceLocation Loc, + SourceLocation InstantiationLoc, + unsigned TokLength); + //===--------------------------------------------------------------------===// // FileID manipulation methods. //===--------------------------------------------------------------------===// @@ -332,12 +367,12 @@ public: /// getBuffer - Return the buffer for the specified FileID. /// const llvm::MemoryBuffer *getBuffer(FileID FID) const { - return getContentCache(FID)->getBuffer(); + return getSLocEntry(FID).getFile().getContentCache()->getBuffer(); } /// getFileEntryForID - Returns the FileEntry record for the provided FileID. const FileEntry *getFileEntryForID(FileID FID) const { - return getContentCache(FID)->Entry; + return getSLocEntry(FID).getFile().getContentCache()->Entry; } /// getBufferData - Return a pointer to the start and end of the source buffer @@ -349,26 +384,112 @@ public: // SourceLocation manipulation methods. //===--------------------------------------------------------------------===// + /// getFileIDSlow - Return the FileID for a SourceLocation. This is a very + /// hot method that is used for all SourceManager queries that start with a + /// SourceLocation object. It is responsible for finding the entry in + /// SLocEntryTable which contains the specified location. + /// + FileID getFileID(SourceLocation SpellingLoc) const { + unsigned SLocOffset = SpellingLoc.getOffset(); + + // If our one-entry cache covers this offset, just return it. + if (isOffsetInFileID(LastFileIDLookup, SLocOffset)) + return LastFileIDLookup; + + return getFileIDSlow(SLocOffset); + } + /// getLocForStartOfFile - Return the source location corresponding to the /// first byte of the specified file. SourceLocation getLocForStartOfFile(FileID FID) const { - return SourceLocation::getFileLoc(FID.ID, 0); + assert(FID.ID < SLocEntryTable.size() && SLocEntryTable[FID.ID].isFile()); + unsigned FileOffset = SLocEntryTable[FID.ID].getOffset(); + return SourceLocation::getFileLoc(FileOffset); } - /// getInstantiationLoc - Return a new SourceLocation that encodes the fact - /// that a token at Loc should actually be referenced from InstantiationLoc. - SourceLocation getInstantiationLoc(SourceLocation Loc, - SourceLocation InstantiationLoc); - - /// getIncludeLoc - Return the location of the #include for the specified + /// getIncludeLoc - Return the location of the #include for the specified /// SourceLocation. If this is a macro expansion, this transparently figures /// out which file includes the file being expanded into. SourceLocation getIncludeLoc(SourceLocation ID) const { - return getFIDInfo(getInstantiationLoc(ID).getChunkID())->getIncludeLoc(); + return getSLocEntry(getFileID(getInstantiationLoc(ID))) + .getFile().getIncludeLoc(); + } + + /// Given a SourceLocation object, return the instantiation location + /// referenced by the ID. + SourceLocation getInstantiationLoc(SourceLocation Loc) const { + // File locations work! + if (Loc.isFileID()) return Loc; + + std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc); + Loc = getSLocEntry(LocInfo.first).getInstantiation().getInstantiationLoc(); + return Loc.getFileLocWithOffset(LocInfo.second); + } + + /// getSpellingLoc - Given a SourceLocation object, return the spelling + /// location referenced by the ID. This is the place where the characters + /// that make up the lexed token can be found. + SourceLocation getSpellingLoc(SourceLocation Loc) const { + // File locations work! + if (Loc.isFileID()) return Loc; + + std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc); + Loc = getSLocEntry(LocInfo.first).getInstantiation().getSpellingLoc(); + return Loc.getFileLocWithOffset(LocInfo.second); + } + + /// getDecomposedLoc - Decompose the specified location into a raw FileID + + /// Offset pair. The first element is the FileID, the second is the + /// offset from the start of the buffer of the location. + std::pair<FileID, unsigned> getDecomposedLoc(SourceLocation Loc) const { + FileID FID = getFileID(Loc); + return std::make_pair(FID, Loc.getOffset()-getSLocEntry(FID).getOffset()); + } + + /// getDecomposedInstantiationLoc - Decompose the specified location into a + /// raw FileID + Offset pair. If the location is an instantiation record, + /// walk through it until we find the final location instantiated. + std::pair<FileID, unsigned> + getDecomposedInstantiationLoc(SourceLocation Loc) const { + FileID FID = getFileID(Loc); + const SrcMgr::SLocEntry *E = &getSLocEntry(FID); + + unsigned Offset = Loc.getOffset()-E->getOffset(); + if (Loc.isFileID()) + return std::make_pair(FID, Offset); + + return getDecomposedInstantiationLocSlowCase(E, Offset); + } + + /// getDecomposedSpellingLoc - Decompose the specified location into a raw + /// FileID + Offset pair. If the location is an instantiation record, walk + /// through it until we find its spelling record. + std::pair<FileID, unsigned> + getDecomposedSpellingLoc(SourceLocation Loc) const { + FileID FID = getFileID(Loc); + const SrcMgr::SLocEntry *E = &getSLocEntry(FID); + + unsigned Offset = Loc.getOffset()-E->getOffset(); + if (Loc.isFileID()) + return std::make_pair(FID, Offset); + return getDecomposedSpellingLocSlowCase(E, Offset); + } + + /// getFullFilePos - This (efficient) method returns the offset from the start + /// of the file that the specified spelling SourceLocation represents. This + /// returns the location of the actual character data, not the instantiation + /// position. + unsigned getFullFilePos(SourceLocation SpellingLoc) const { + return getDecomposedLoc(SpellingLoc).second; } + + //===--------------------------------------------------------------------===// + // Queries about the code at a SourceLocation. + //===--------------------------------------------------------------------===// + /// getCharacterData - Return a pointer to the start of the specified location - /// in the appropriate MemoryBuffer. + /// in the appropriate spelling MemoryBuffer. const char *getCharacterData(SourceLocation SL) const; /// getColumnNumber - Return the column # for the specified file position. @@ -391,7 +512,7 @@ public: /// line offsets for the MemoryBuffer, so this is not cheap: use only when /// about to emit a diagnostic. unsigned getLineNumber(SourceLocation Loc) const; - + unsigned getInstantiationLineNumber(SourceLocation Loc) const { return getLineNumber(getInstantiationLoc(Loc)); } @@ -399,65 +520,18 @@ public: return getLineNumber(getSpellingLoc(Loc)); } + // FIXME: This should handle #line. + SrcMgr::CharacteristicKind getFileCharacteristic(SourceLocation Loc) const { + FileID FID = getFileID(getSpellingLoc(Loc)); + return getSLocEntry(FID).getFile().getFileCharacteristic(); + } + /// getSourceName - This method returns the name of the file or buffer that /// the SourceLocation specifies. This can be modified with #line directives, /// etc. const char *getSourceName(SourceLocation Loc) const; - - /// Given a SourceLocation object, return the instantiation location - /// referenced by the ID. - SourceLocation getInstantiationLoc(SourceLocation Loc) const { - // File locations work. - if (Loc.isFileID()) return Loc; - - return MacroIDs[Loc.getMacroID()].getInstantiationLoc(); - } - /// getSpellingLoc - Given a SourceLocation object, return the spelling - /// location referenced by the ID. This is the place where the characters - /// that make up the lexed token can be found. - SourceLocation getSpellingLoc(SourceLocation Loc) const { - // File locations work! - if (Loc.isFileID()) return Loc; - - // Look up the macro token's spelling location. - SourceLocation PLoc = MacroIDs[Loc.getMacroID()].getSpellingLoc(); - return PLoc.getFileLocWithOffset(Loc.getMacroSpellingOffs()); - } - - /// getDecomposedFileLoc - Decompose the specified file location into a raw - /// FileID + Offset pair. The first element is the FileID, the second is the - /// offset from the start of the buffer of the location. - std::pair<FileID, unsigned> getDecomposedFileLoc(SourceLocation Loc) const { - assert(Loc.isFileID() && "Isn't a File SourceLocation"); - - // TODO: Add a flag "is first chunk" to SLOC. - const SrcMgr::FileIDInfo *FIDInfo = getFIDInfo(Loc.getChunkID()); - - // If this file has been split up into chunks, factor in the chunk number - // that the FileID references. - unsigned ChunkNo = FIDInfo->getChunkNo(); - unsigned Offset = Loc.getRawFilePos(); - Offset += (ChunkNo << SourceLocation::FilePosBits); - - assert(Loc.getChunkID() >= ChunkNo && "Unexpected offset"); - - return std::make_pair(FileID::Create(Loc.getChunkID()-ChunkNo), Offset); - } - /// getFileID - Return the FileID for a SourceLocation. - /// - FileID getFileID(SourceLocation SpellingLoc) const { - return getDecomposedFileLoc(SpellingLoc).first; - } - - /// getFullFilePos - This (efficient) method returns the offset from the start - /// of the file that the specified spelling SourceLocation represents. This - /// returns the location of the actual character data, not the instantiation - /// position. - unsigned getFullFilePos(SourceLocation SpellingLoc) const { - return getDecomposedFileLoc(SpellingLoc).second; - } /// isFromSameFile - Returns true if both SourceLocations correspond to /// the same file. @@ -470,15 +544,11 @@ public: bool isFromMainFile(SourceLocation Loc) const { return getFileID(Loc) == getMainFileID(); } - + /// isInSystemHeader - Returns if a SourceLocation is in a system header. bool isInSystemHeader(SourceLocation Loc) const { return getFileCharacteristic(Loc) != SrcMgr::C_User; } - SrcMgr::CharacteristicKind getFileCharacteristic(SourceLocation Loc) const { - return getFIDInfo(getSpellingLoc(Loc).getChunkID()) - ->getFileCharacteristic(); - } //===--------------------------------------------------------------------===// // Other miscellaneous methods. @@ -503,6 +573,19 @@ public: private: friend struct SrcMgr::ContentCache; // Used for deserialization. + /// isOffsetInFileID - Return true if the specified FileID contains the + /// specified SourceLocation offset. This is a very hot method. + inline bool isOffsetInFileID(FileID FID, unsigned SLocOffset) const { + const SrcMgr::SLocEntry &Entry = getSLocEntry(FID); + // If the entry is after the offset, it can't contain it. + if (SLocOffset < Entry.getOffset()) return false; + + // If this is the last entry than it does. Otherwise, the entry after it + // has to not include it. + if (FID.ID+1 == SLocEntryTable.size()) return true; + return SLocOffset < SLocEntryTable[FID.ID+1].getOffset(); + } + /// createFileID - Create a new fileID for the specified ContentCache and /// include position. This works regardless of whether the ContentCache /// corresponds to a file or some other input source. @@ -510,33 +593,27 @@ private: SourceLocation IncludePos, SrcMgr::CharacteristicKind DirCharacter); - /// getContentCache - Create or return a cached ContentCache for the specified - /// file. This returns null on failure. - const SrcMgr::ContentCache* getContentCache(const FileEntry *SourceFile); + const SrcMgr::ContentCache * + getOrCreateContentCache(const FileEntry *SourceFile); /// createMemBufferContentCache - Create a new ContentCache for the specified /// memory buffer. const SrcMgr::ContentCache* createMemBufferContentCache(const llvm::MemoryBuffer *Buf); - const SrcMgr::FileIDInfo *getFIDInfo(unsigned FID) const { - assert(FID-1 < FileIDs.size() && "Invalid FileID!"); - return &FileIDs[FID-1]; - } - const SrcMgr::FileIDInfo *getFIDInfo(FileID FID) const { - return getFIDInfo(FID.ID); + const SrcMgr::SLocEntry &getSLocEntry(FileID FID) const { + assert(FID.ID < SLocEntryTable.size() && "Invalid id"); + return SLocEntryTable[FID.ID]; } - const SrcMgr::ContentCache *getContentCache(FileID FID) const { - return getContentCache(getFIDInfo(FID.ID)); - } - - /// Return the ContentCache structure for the specified FileID. - /// This is always the physical reference for the ID. - const SrcMgr::ContentCache* - getContentCache(const SrcMgr::FileIDInfo* FIDInfo) const { - return FIDInfo->getContentCache(); - } + FileID getFileIDSlow(unsigned SLocOffset) const; + + std::pair<FileID, unsigned> + getDecomposedInstantiationLocSlowCase(const SrcMgr::SLocEntry *E, + unsigned Offset) const; + std::pair<FileID, unsigned> + getDecomposedSpellingLocSlowCase(const SrcMgr::SLocEntry *E, + unsigned Offset) const; }; diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h index 42cdd9170a8..296fca11151 100644 --- a/clang/include/clang/Lex/Lexer.h +++ b/clang/include/clang/Lex/Lexer.h @@ -190,7 +190,7 @@ public: /// getSourceLocation - Return a source location identifier for the specified /// offset in the current file. - SourceLocation getSourceLocation(const char *Loc) const; + SourceLocation getSourceLocation(const char *Loc, unsigned TokLen = 1) const; /// getSourceLocation - Return a source location for the next character in /// the current file. @@ -228,8 +228,9 @@ private: /// TokEnd. void FormTokenWithChars(Token &Result, const char *TokEnd, tok::TokenKind Kind) { - Result.setLocation(getSourceLocation(BufferPtr)); - Result.setLength(TokEnd-BufferPtr); + unsigned TokLen = TokEnd-BufferPtr; + Result.setLength(TokLen); + Result.setLocation(getSourceLocation(BufferPtr, TokLen)); Result.setKind(Kind); BufferPtr = TokEnd; } diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp index 35c350ebbac..e30e2a81315 100644 --- a/clang/lib/Basic/SourceManager.cpp +++ b/clang/lib/Basic/SourceManager.cpp @@ -24,6 +24,10 @@ using namespace clang; using namespace SrcMgr; using llvm::MemoryBuffer; +//===--------------------------------------------------------------------===// +// SourceManager Helper Classes +//===--------------------------------------------------------------------===// + // This (temporary) directive toggles between lazy and eager creation of // MemBuffers. This directive is not permanent, and is here to test a few // potential optimizations in PTH. Once it is clear whether eager or lazy @@ -62,12 +66,16 @@ const llvm::MemoryBuffer* ContentCache::getBuffer() const { return Buffer; } +//===--------------------------------------------------------------------===// +// Private 'Create' methods. +//===--------------------------------------------------------------------===// -/// getFileInfo - Create or return a cached FileInfo for the specified file. -/// -const ContentCache* SourceManager::getContentCache(const FileEntry *FileEnt) { - +/// getOrCreateContentCache - Create or return a cached ContentCache for the +/// specified file. +const ContentCache * +SourceManager::getOrCreateContentCache(const FileEntry *FileEnt) { assert(FileEnt && "Didn't specify a file entry to use?"); + // Do we already have information about this file? std::set<ContentCache>::iterator I = FileInfos.lower_bound(ContentCache(FileEnt)); @@ -107,47 +115,34 @@ SourceManager::createMemBufferContentCache(const MemoryBuffer *Buffer) { return &Entry; } +//===----------------------------------------------------------------------===// +// Methods to create new FileID's and instantiations. +//===----------------------------------------------------------------------===// /// createFileID - Create a new fileID for the specified ContentCache and /// include position. This works regardless of whether the ContentCache /// corresponds to a file or some other input source. FileID SourceManager::createFileID(const ContentCache *File, - SourceLocation IncludePos, - SrcMgr::CharacteristicKind FileCharacter) { - // If FileEnt is really large (e.g. it's a large .i file), we may not be able - // to fit an arbitrary position in the file in the FilePos field. To handle - // this, we create one FileID for each chunk of the file that fits in a - // FilePos field. + SourceLocation IncludePos, + SrcMgr::CharacteristicKind FileCharacter) { + SLocEntryTable.push_back(SLocEntry::get(NextOffset, + FileInfo::get(IncludePos, File, + FileCharacter))); unsigned FileSize = File->getSize(); - if (FileSize+1 < (1 << SourceLocation::FilePosBits)) { - FileIDs.push_back(FileIDInfo::get(IncludePos, 0, File, FileCharacter)); - assert(FileIDs.size() < (1 << SourceLocation::ChunkIDBits) && - "Ran out of file ID's!"); - return FileID::Create(FileIDs.size()); - } + assert(NextOffset+FileSize+1 > NextOffset && "Ran out of source locations!"); + NextOffset += FileSize+1; - // Create one FileID for each chunk of the file. - unsigned Result = FileIDs.size()+1; - - unsigned ChunkNo = 0; - while (1) { - FileIDs.push_back(FileIDInfo::get(IncludePos, ChunkNo++, File, - FileCharacter)); - - if (FileSize+1 < (1 << SourceLocation::FilePosBits)) break; - FileSize -= (1 << SourceLocation::FilePosBits); - } - - assert(FileIDs.size() < (1 << SourceLocation::ChunkIDBits) && - "Ran out of file ID's!"); - return FileID::Create(Result); + // Set LastFileIDLookup to the newly created file. The next getFileID call is + // almost guaranteed to be from that file. + return LastFileIDLookup = FileID::get(SLocEntryTable.size()-1); } -/// getInstantiationLoc - Return a new SourceLocation that encodes the fact +/// createInstantiationLoc - Return a new SourceLocation that encodes the fact /// that a token from SpellingLoc should actually be referenced from /// InstantiationLoc. -SourceLocation SourceManager::getInstantiationLoc(SourceLocation SpellingLoc, - SourceLocation InstantLoc) { +SourceLocation SourceManager::createInstantiationLoc(SourceLocation SpellingLoc, + SourceLocation InstantLoc, + unsigned TokLength) { // The specified source location may be a mapped location, due to a macro // instantiation or #line directive. Strip off this information to find out // where the characters are actually located. @@ -155,29 +150,13 @@ SourceLocation SourceManager::getInstantiationLoc(SourceLocation SpellingLoc, // Resolve InstantLoc down to a real instantiation location. InstantLoc = getInstantiationLoc(InstantLoc); - - - // If the last macro id is close to the currently requested location, try to - // reuse it. This implements a small cache. - for (int i = MacroIDs.size()-1, e = MacroIDs.size()-6; i >= 0 && i != e; --i){ - MacroIDInfo &LastOne = MacroIDs[i]; - - // The instanitation point and source SpellingLoc have to exactly match to - // reuse (for now). We could allow "nearby" instantiations in the future. - if (LastOne.getInstantiationLoc() != InstantLoc || - LastOne.getSpellingLoc().getChunkID() != SpellingLoc.getChunkID()) - continue; - - // Check to see if the spellloc of the token came from near enough to reuse. - int SpellDelta = SpellingLoc.getRawFilePos() - - LastOne.getSpellingLoc().getRawFilePos(); - if (SourceLocation::isValidMacroSpellingOffs(SpellDelta)) - return SourceLocation::getMacroLoc(i, SpellDelta); - } - - - MacroIDs.push_back(MacroIDInfo::get(InstantLoc, SpellingLoc)); - return SourceLocation::getMacroLoc(MacroIDs.size()-1, 0); + + SLocEntryTable.push_back(SLocEntry::get(NextOffset, + InstantiationInfo::get(InstantLoc, + SpellingLoc))); + assert(NextOffset+TokLength+1 > NextOffset && "Ran out of source locations!"); + NextOffset += TokLength+1; + return SourceLocation::getMacroLoc(NextOffset-(TokLength+1)); } /// getBufferData - Return a pointer to the start and end of the source buffer @@ -189,19 +168,153 @@ SourceManager::getBufferData(FileID FID) const { } +//===--------------------------------------------------------------------===// +// SourceLocation manipulation methods. +//===--------------------------------------------------------------------===// + +/// getFileIDSlow - Return the FileID for a SourceLocation. This is a very hot +/// method that is used for all SourceManager queries that start with a +/// SourceLocation object. It is responsible for finding the entry in +/// SLocEntryTable which contains the specified location. +/// +FileID SourceManager::getFileIDSlow(unsigned SLocOffset) const { + assert(SLocOffset && "Invalid FileID"); + + // After the first and second level caches, I see two common sorts of + // behavior: 1) a lot of searched FileID's are "near" the cached file location + // or are "near" the cached instantiation location. 2) others are just + // completely random and may be a very long way away. + // + // To handle this, we do a linear search for up to 8 steps to catch #1 quickly + // then we fall back to a less cache efficient, but more scalable, binary + // search to find the location. + + // See if this is near the file point - worst case we start scanning from the + // most newly created FileID. + std::vector<SrcMgr::SLocEntry>::const_iterator I; + + if (SLocEntryTable[LastFileIDLookup.ID].getOffset() < SLocOffset) { + // Neither loc prunes our search. + I = SLocEntryTable.end(); + } else { + // Perhaps it is near the file point. + I = SLocEntryTable.begin()+LastFileIDLookup.ID; + } + + // Find the FileID that contains this. "I" is an iterator that points to a + // FileID whose offset is known to be larger than SLocOffset. + unsigned NumProbes = 0; + while (1) { + --I; + if (I->getOffset() <= SLocOffset) { +#if 0 + printf("lin %d -> %d [%s] %d %d\n", SLocOffset, + I-SLocEntryTable.begin(), + I->isInstantiation() ? "inst" : "file", + LastFileIDLookup.ID, int(SLocEntryTable.end()-I)); +#endif + FileID Res = FileID::get(I-SLocEntryTable.begin()); + + // If this isn't an instantiation, remember it. We have good locality + // across FileID lookups. + if (!I->isInstantiation()) + LastFileIDLookup = Res; + NumLinearScans += NumProbes+1; + return Res; + } + if (++NumProbes == 8) + break; + } + + // Convert "I" back into an index. We know that it is an entry whose index is + // larger than the offset we are looking for. + unsigned GreaterIndex = I-SLocEntryTable.begin(); + // LessIndex - This is the lower bound of the range that we're searching. + // We know that the offset corresponding to the FileID is is less than + // SLocOffset. + unsigned LessIndex = 0; + NumProbes = 0; + while (1) { + unsigned MiddleIndex = (GreaterIndex-LessIndex)/2+LessIndex; + unsigned MidOffset = SLocEntryTable[MiddleIndex].getOffset(); + + ++NumProbes; + + // If the offset of the midpoint is too large, chop the high side of the + // range to the midpoint. + if (MidOffset > SLocOffset) { + GreaterIndex = MiddleIndex; + continue; + } + + // If the middle index contains the value, succeed and return. + if (isOffsetInFileID(FileID::get(MiddleIndex), SLocOffset)) { +#if 0 + printf("bin %d -> %d [%s] %d %d\n", SLocOffset, + I-SLocEntryTable.begin(), + I->isInstantiation() ? "inst" : "file", + LastFileIDLookup.ID, int(SLocEntryTable.end()-I)); +#endif + FileID Res = FileID::get(MiddleIndex); + + // If this isn't an instantiation, remember it. We have good locality + // across FileID lookups. + if (!I->isInstantiation()) + LastFileIDLookup = Res; + NumBinaryProbes += NumProbes; + return Res; + } + + // Otherwise, move the low-side up to the middle index. + LessIndex = MiddleIndex; + } +} + +std::pair<FileID, unsigned> +SourceManager::getDecomposedInstantiationLocSlowCase(const SrcMgr::SLocEntry *E, + unsigned Offset) const { + // If this is an instantiation record, walk through all the instantiation + // points. + FileID FID; + SourceLocation Loc; + do { + Loc = E->getInstantiation().getInstantiationLoc(); + + FID = getFileID(Loc); + E = &getSLocEntry(FID); + Offset += Loc.getOffset()-E->getOffset(); + } while (Loc.isFileID()); + + return std::make_pair(FID, Offset); +} + +std::pair<FileID, unsigned> +SourceManager::getDecomposedSpellingLocSlowCase(const SrcMgr::SLocEntry *E, + unsigned Offset) const { + // If this is an instantiation record, get and return the spelling. + SourceLocation Loc = E->getInstantiation().getSpellingLoc(); + FileID FID = getFileID(Loc); + E = &getSLocEntry(FID); + Offset += Loc.getOffset()-E->getOffset(); + assert(Loc.isFileID() && "Should only have one spelling link"); + return std::make_pair(FID, Offset); +} + + +//===----------------------------------------------------------------------===// +// Queries about the code at a SourceLocation. +//===----------------------------------------------------------------------===// /// getCharacterData - Return a pointer to the start of the specified location /// in the appropriate MemoryBuffer. const char *SourceManager::getCharacterData(SourceLocation SL) const { // Note that this is a hot function in the getSpelling() path, which is // heavily used by -E mode. - SL = getSpellingLoc(SL); - - std::pair<FileID, unsigned> LocInfo = getDecomposedFileLoc(SL); + std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(SL); // Note that calling 'getBuffer()' may lazily page in a source file. - return getContentCache(LocInfo.first)->getBuffer()->getBufferStart() + - LocInfo.second; + return getSLocEntry(LocInfo.first).getFile().getContentCache() + ->getBuffer()->getBufferStart() + LocInfo.second; } @@ -209,9 +322,10 @@ const char *SourceManager::getCharacterData(SourceLocation SL) const { /// this is significantly cheaper to compute than the line number. This returns /// zero if the column number isn't known. unsigned SourceManager::getColumnNumber(SourceLocation Loc) const { - if (Loc.getChunkID() == 0) return 0; + if (Loc.isInvalid()) return 0; + assert(Loc.isFileID() && "Don't know what part of instantiation loc to get"); - std::pair<FileID, unsigned> LocInfo = getDecomposedFileLoc(Loc); + std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc); unsigned FilePos = LocInfo.second; const char *Buf = getBuffer(LocInfo.first)->getBufferStart(); @@ -222,21 +336,6 @@ unsigned SourceManager::getColumnNumber(SourceLocation Loc) const { return FilePos-LineStart+1; } -/// getSourceName - This method returns the name of the file or buffer that -/// the SourceLocation specifies. This can be modified with #line directives, -/// etc. -const char *SourceManager::getSourceName(SourceLocation Loc) const { - if (Loc.getChunkID() == 0) return ""; - - Loc = getSpellingLoc(Loc); - unsigned ChunkID = Loc.getChunkID(); - const SrcMgr::ContentCache *C = getFIDInfo(ChunkID)->getContentCache(); - - // To get the source name, first consult the FileEntry (if one exists) before - // the MemBuffer as this will avoid unnecessarily paging in the MemBuffer. - return C->Entry ? C->Entry->getName() : C->getBuffer()->getBufferIdentifier(); -} - static void ComputeLineNumbers(ContentCache* FI) DISABLE_INLINE; static void ComputeLineNumbers(ContentCache* FI) { // Note that calling 'getBuffer()' may lazily page in the file. @@ -287,16 +386,17 @@ static void ComputeLineNumbers(ContentCache* FI) { /// line offsets for the MemoryBuffer, so this is not cheap: use only when /// about to emit a diagnostic. unsigned SourceManager::getLineNumber(SourceLocation Loc) const { - if (Loc.getChunkID() == 0) return 0; + if (Loc.isInvalid()) return 0; + assert(Loc.isFileID() && "Don't know what part of instantiation loc to get"); - ContentCache *Content; - - std::pair<FileID, unsigned> LocInfo = getDecomposedFileLoc(Loc); + std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc); + ContentCache *Content; if (LastLineNoFileIDQuery == LocInfo.first) Content = LastLineNoContentCache; else - Content = const_cast<ContentCache*>(getContentCache(LocInfo.first)); + Content = const_cast<ContentCache*>(getSLocEntry(LocInfo.first) + .getFile().getContentCache()); // If this is the first use of line information for this buffer, compute the /// SourceLineCache for it on demand. @@ -375,15 +475,32 @@ unsigned SourceManager::getLineNumber(SourceLocation Loc) const { return LineNo; } +/// getSourceName - This method returns the name of the file or buffer that +/// the SourceLocation specifies. This can be modified with #line directives, +/// etc. +const char *SourceManager::getSourceName(SourceLocation Loc) const { + if (Loc.isInvalid()) return ""; + + const SrcMgr::ContentCache *C = + getSLocEntry(getFileID(getSpellingLoc(Loc))).getFile().getContentCache(); + + // To get the source name, first consult the FileEntry (if one exists) before + // the MemBuffer as this will avoid unnecessarily paging in the MemBuffer. + return C->Entry ? C->Entry->getName() : C->getBuffer()->getBufferIdentifier(); +} + +//===----------------------------------------------------------------------===// +// Other miscellaneous methods. +//===----------------------------------------------------------------------===// + + /// PrintStats - Print statistics to stderr. /// void SourceManager::PrintStats() const { llvm::cerr << "\n*** Source Manager Stats:\n"; llvm::cerr << FileInfos.size() << " files mapped, " << MemBufferInfos.size() - << " mem buffers mapped, " << FileIDs.size() - << " file ID's allocated.\n"; - llvm::cerr << " " << FileIDs.size() << " normal buffer FileID's, " - << MacroIDs.size() << " macro expansion FileID's.\n"; + << " mem buffers mapped, " << SLocEntryTable.size() + << " SLocEntry's allocated.\n"; unsigned NumLineNumsComputed = 0; unsigned NumFileBytesMapped = 0; @@ -395,6 +512,8 @@ void SourceManager::PrintStats() const { llvm::cerr << NumFileBytesMapped << " bytes of files mapped, " << NumLineNumsComputed << " files with line #'s computed.\n"; + llvm::cerr << "FileID scans: " << NumLinearScans << " linear, " + << NumBinaryProbes << " binary.\n"; } //===----------------------------------------------------------------------===// @@ -450,49 +569,23 @@ void ContentCache::ReadToSourceManager(llvm::Deserializer& D, D.RegisterPtr(PtrID,NULL); else // Get the ContextCache object and register it with the deserializer. - D.RegisterPtr(PtrID,SMgr.getContentCache(E)); + D.RegisterPtr(PtrID, SMgr.getOrCreateContentCache(E)); + return; } - else { - // Register the ContextCache object with the deserializer. - SMgr.MemBufferInfos.push_back(ContentCache()); - ContentCache& Entry = const_cast<ContentCache&>(SMgr.MemBufferInfos.back()); - D.RegisterPtr(&Entry); - - // Create the buffer. - unsigned Size = D.ReadInt(); - Entry.Buffer = MemoryBuffer::getNewUninitMemBuffer(Size); - - // Read the contents of the buffer. - char* p = const_cast<char*>(Entry.Buffer->getBufferStart()); - for (unsigned i = 0; i < Size ; ++i) - p[i] = D.ReadInt(); - } -} - -void FileIDInfo::Emit(llvm::Serializer& S) const { - S.Emit(IncludeLoc); - S.EmitInt(ChunkNo); - S.EmitPtr(Content); -} - -FileIDInfo FileIDInfo::ReadVal(llvm::Deserializer& D) { - FileIDInfo I; - I.IncludeLoc = SourceLocation::ReadVal(D); - I.ChunkNo = D.ReadInt(); - D.ReadPtr(I.Content,false); - return I; -} - -void MacroIDInfo::Emit(llvm::Serializer& S) const { - S.Emit(InstantiationLoc); - S.Emit(SpellingLoc); -} - -MacroIDInfo MacroIDInfo::ReadVal(llvm::Deserializer& D) { - MacroIDInfo I; - I.InstantiationLoc = SourceLocation::ReadVal(D); - I.SpellingLoc = SourceLocation::ReadVal(D); - return I; + + // Register the ContextCache object with the deserializer. + SMgr.MemBufferInfos.push_back(ContentCache()); + ContentCache& Entry = const_cast<ContentCache&>(SMgr.MemBufferInfos.back()); + D.RegisterPtr(&Entry); + + // Create the buffer. + unsigned Size = D.ReadInt(); + Entry.Buffer = MemoryBuffer::getNewUninitMemBuffer(Size); + + // Read the contents of the buffer. + char* p = const_cast<char*>(Entry.Buffer->getBufferStart()); + for (unsigned i = 0; i < Size ; ++i) + p[i] = D.ReadInt(); } void SourceManager::Emit(llvm::Serializer& S) const { @@ -516,13 +609,7 @@ void SourceManager::Emit(llvm::Serializer& S) const { S.ExitBlock(); - // Emit: FileIDs - S.EmitInt(FileIDs.size()); - std::for_each(FileIDs.begin(), FileIDs.end(), S.MakeEmitter<FileIDInfo>()); - - // Emit: MacroIDs - S.EmitInt(MacroIDs.size()); - std::for_each(MacroIDs.begin(), MacroIDs.end(), S.MakeEmitter<MacroIDInfo>()); + // FIXME: Emit SLocEntryTable. S.ExitBlock(); } @@ -533,7 +620,7 @@ SourceManager::CreateAndRegister(llvm::Deserializer& D, FileManager& FMgr){ D.RegisterPtr(M); // Read: the FileID of the main source file of the translation unit. - M->MainFileID = FileID::Create(D.ReadInt()); + M->MainFileID = FileID::get(D.ReadInt()); std::vector<char> Buf; @@ -549,17 +636,7 @@ SourceManager::CreateAndRegister(llvm::Deserializer& D, FileManager& FMgr){ ContentCache::ReadToSourceManager(D,*M,NULL,Buf); } - // Read: FileIDs. - unsigned Size = D.ReadInt(); - M->FileIDs.reserve(Size); - for (; Size > 0 ; --Size) - M->FileIDs.push_back(FileIDInfo::ReadVal(D)); - - // Read: MacroIDs. - Size = D.ReadInt(); - M->MacroIDs.reserve(Size); - for (; Size > 0 ; --Size) - M->MacroIDs.push_back(MacroIDInfo::ReadVal(D)); + // FIXME: Read SLocEntryTable. return M; } diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index 3174a059174..9e8d1aa7407 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -169,8 +169,8 @@ Lexer *Lexer::Create_PragmaLexer(SourceLocation SpellingLoc, // Set the SourceLocation with the remapping information. This ensures that // GetMappedTokenLoc will remap the tokens as they are lexed. - L->FileLoc = SM.getInstantiationLoc(SM.getLocForStartOfFile(SpellingFID), - InstantiationLoc); + L->FileLoc = SM.createInstantiationLoc(SM.getLocForStartOfFile(SpellingFID), + InstantiationLoc, TokLen); // Ensure that the lexer thinks it is inside a directive, so that end \n will // return an EOM token. @@ -214,16 +214,15 @@ void Lexer::Stringify(llvm::SmallVectorImpl<char> &Str) { /// that are part of that. unsigned Lexer::MeasureTokenLength(SourceLocation Loc, const SourceManager &SM) { - // If this comes from a macro expansion, we really do want the macro name, not - // the token this macro expanded to. - Loc = SM.getInstantiationLoc(Loc); - // TODO: this could be special cased for common tokens like identifiers, ')', // etc to make this faster, if it mattered. Just look at StrData[0] to handle // all obviously single-char tokens. This could use // Lexer::isObviouslySimpleCharacter for example to handle identifiers or // something. - std::pair<FileID, unsigned> LocInfo = SM.getDecomposedFileLoc(Loc); + + // If this comes from a macro expansion, we really do want the macro name, not + // the token this macro expanded to. + std::pair<FileID, unsigned> LocInfo = SM.getDecomposedInstantiationLoc(Loc); std::pair<const char *,const char *> Buffer = SM.getBufferData(LocInfo.first); const char *StrData = Buffer.first+LocInfo.second; @@ -310,10 +309,11 @@ static inline bool isNumberBody(unsigned char c) { /// path of the hot getSourceLocation method. Do not allow it to be inlined. static SourceLocation GetMappedTokenLoc(Preprocessor &PP, SourceLocation FileLoc, - unsigned CharNo) DISABLE_INLINE; + unsigned CharNo, + unsigned TokLen) DISABLE_INLINE; static SourceLocation GetMappedTokenLoc(Preprocessor &PP, SourceLocation FileLoc, - unsigned CharNo) { + unsigned CharNo, unsigned TokLen) { // Otherwise, we're lexing "mapped tokens". This is used for things like // _Pragma handling. Combine the instantiation location of FileLoc with the // spelling location. @@ -324,12 +324,13 @@ static SourceLocation GetMappedTokenLoc(Preprocessor &PP, SourceLocation InstLoc = SourceMgr.getInstantiationLoc(FileLoc); SourceLocation SpellingLoc = SourceMgr.getSpellingLoc(FileLoc); SpellingLoc = SpellingLoc.getFileLocWithOffset(CharNo); - return SourceMgr.getInstantiationLoc(SpellingLoc, InstLoc); + return SourceMgr.createInstantiationLoc(SpellingLoc, InstLoc, TokLen); } /// getSourceLocation - Return a source location identifier for the specified /// offset in the current file. -SourceLocation Lexer::getSourceLocation(const char *Loc) const { +SourceLocation Lexer::getSourceLocation(const char *Loc, + unsigned TokLen) const { assert(Loc >= BufferStart && Loc <= BufferEnd && "Location out of range for this buffer!"); @@ -342,7 +343,7 @@ SourceLocation Lexer::getSourceLocation(const char *Loc) const { // Otherwise, this is the _Pragma lexer case, which pretends that all of the // tokens are lexed from where the _Pragma was defined. assert(PP && "This doesn't work on raw lexers"); - return GetMappedTokenLoc(*PP, FileLoc, CharNo); + return GetMappedTokenLoc(*PP, FileLoc, CharNo, TokLen); } /// Diag - Forwarding function for diagnostics. This translate a source diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp index 6c4096de6cc..63caafaf504 100644 --- a/clang/lib/Lex/PPMacroExpansion.cpp +++ b/clang/lib/Lex/PPMacroExpansion.cpp @@ -221,7 +221,7 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier, } else if (MI->getNumTokens() == 1 && isTrivialSingleTokenExpansion(MI, Identifier.getIdentifierInfo(), - *this)){ + *this)) { // Otherwise, if this macro expands into a single trivially-expanded // token: expand it now. This handles common cases like // "#define VAL 42". @@ -247,7 +247,8 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier, // Update the tokens location to include both its instantiation and physical // locations. SourceLocation Loc = - SourceMgr.getInstantiationLoc(Identifier.getLocation(), InstantiateLoc); + SourceMgr.createInstantiationLoc(Identifier.getLocation(), InstantiateLoc, + Identifier.getLength()); Identifier.setLocation(Loc); // If this is #define X X, we must mark the result as unexpandible. @@ -480,13 +481,15 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { ComputeDATE_TIME(DATELoc, TIMELoc, *this); Tok.setKind(tok::string_literal); Tok.setLength(strlen("\"Mmm dd yyyy\"")); - Tok.setLocation(SourceMgr.getInstantiationLoc(DATELoc, Tok.getLocation())); + Tok.setLocation(SourceMgr.createInstantiationLoc(DATELoc, Tok.getLocation(), + Tok.getLength())); } else if (II == Ident__TIME__) { if (!TIMELoc.isValid()) ComputeDATE_TIME(DATELoc, TIMELoc, *this); Tok.setKind(tok::string_literal); Tok.setLength(strlen("\"hh:mm:ss\"")); - Tok.setLocation(SourceMgr.getInstantiationLoc(TIMELoc, Tok.getLocation())); + Tok.setLocation(SourceMgr.createInstantiationLoc(TIMELoc, Tok.getLocation(), + Tok.getLength())); } else if (II == Ident__INCLUDE_LEVEL__) { Diag(Tok, diag::ext_pp_include_level); diff --git a/clang/lib/Lex/PTHLexer.cpp b/clang/lib/Lex/PTHLexer.cpp index ec76a299845..f6994e0976d 100644 --- a/clang/lib/Lex/PTHLexer.cpp +++ b/clang/lib/Lex/PTHLexer.cpp @@ -321,7 +321,7 @@ unsigned PTHManager::getSpelling(FileID FID, unsigned FPos, unsigned PTHManager::getSpelling(SourceLocation Loc, const char *&Buffer) { SourceManager &SM = PP->getSourceManager(); Loc = SM.getSpellingLoc(Loc); - std::pair<FileID, unsigned> LocInfo = SM.getDecomposedFileLoc(Loc); + std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc); return getSpelling(LocInfo.first, LocInfo.second, Buffer); } @@ -407,8 +407,7 @@ unsigned PTHSpellingSearch::getSpellingBinarySearch(unsigned FPos, unsigned PTHLexer::getSpelling(SourceLocation Loc, const char *&Buffer) { SourceManager &SM = PP->getSourceManager(); - Loc = SM.getSpellingLoc(Loc); - std::pair<FileID, unsigned> LocInfo = SM.getDecomposedFileLoc(Loc); + std::pair<FileID, unsigned> LocInfo = SM.getDecomposedSpellingLoc(Loc); FileID FID = LocInfo.first; unsigned FPos = LocInfo.second; diff --git a/clang/lib/Lex/ScratchBuffer.cpp b/clang/lib/Lex/ScratchBuffer.cpp index bef81caac71..695a5365faf 100644 --- a/clang/lib/Lex/ScratchBuffer.cpp +++ b/clang/lib/Lex/ScratchBuffer.cpp @@ -50,7 +50,7 @@ SourceLocation ScratchBuffer::getToken(const char *Buf, unsigned Len) { SourceLocation ScratchBuffer::getToken(const char *Buf, unsigned Len, SourceLocation SourceLoc) { // Map the physloc to the specified sourceloc. - return SourceMgr.getInstantiationLoc(getToken(Buf, Len), SourceLoc); + return SourceMgr.createInstantiationLoc(getToken(Buf, Len), SourceLoc, Len); } void ScratchBuffer::AllocScratchBuffer(unsigned RequestLen) { diff --git a/clang/lib/Lex/TokenLexer.cpp b/clang/lib/Lex/TokenLexer.cpp index dd5352c1b61..ea4ce669d1d 100644 --- a/clang/lib/Lex/TokenLexer.cpp +++ b/clang/lib/Lex/TokenLexer.cpp @@ -314,8 +314,9 @@ void TokenLexer::Lex(Token &Tok) { // that captures all of this. if (InstantiateLoc.isValid()) { // Don't do this for token streams. SourceManager &SrcMgr = PP.getSourceManager(); - Tok.setLocation(SrcMgr.getInstantiationLoc(Tok.getLocation(), - InstantiateLoc)); + Tok.setLocation(SrcMgr.createInstantiationLoc(Tok.getLocation(), + InstantiateLoc, + Tok.getLength())); } // If this is the first token, set the lexical properties of the token to @@ -398,7 +399,7 @@ bool TokenLexer::PasteTokens(Token &Tok) { "Should be a raw location into scratch buffer"); SourceManager &SourceMgr = PP.getSourceManager(); std::pair<FileID, unsigned> LocInfo = - SourceMgr.getDecomposedFileLoc(ResultTokLoc); + SourceMgr.getDecomposedLoc(ResultTokLoc); const char *ScratchBufStart =SourceMgr.getBufferData(LocInfo.first).first; diff --git a/clang/lib/Rewrite/HTMLRewrite.cpp b/clang/lib/Rewrite/HTMLRewrite.cpp index 040146620d2..b5f0fc39511 100644 --- a/clang/lib/Rewrite/HTMLRewrite.cpp +++ b/clang/lib/Rewrite/HTMLRewrite.cpp @@ -441,8 +441,7 @@ void html::HighlightMacros(Rewriter &R, FileID FID, Preprocessor& PP) { // Ignore tokens whose instantiation location was not the main file. SourceLocation LLoc = SourceMgr.getInstantiationLoc(Tok.getLocation()); - std::pair<FileID, unsigned> LLocInfo = - SourceMgr.getDecomposedFileLoc(LLoc); + std::pair<FileID, unsigned> LLocInfo = SourceMgr.getDecomposedLoc(LLoc); if (LLocInfo.first != FID) { PP.Lex(Tok); diff --git a/clang/lib/Rewrite/Rewriter.cpp b/clang/lib/Rewrite/Rewriter.cpp index e92bd7d329d..551fa1d4910 100644 --- a/clang/lib/Rewrite/Rewriter.cpp +++ b/clang/lib/Rewrite/Rewriter.cpp @@ -151,7 +151,7 @@ std::string Rewriter::getRewritenText(SourceRange Range) const { unsigned Rewriter::getLocationOffsetAndFileID(SourceLocation Loc, FileID &FID) const { assert(Loc.isValid() && "Invalid location"); - std::pair<FileID,unsigned> V = SourceMgr->getDecomposedFileLoc(Loc); + std::pair<FileID,unsigned> V = SourceMgr->getDecomposedLoc(Loc); FID = V.first; return V.second; } |