summaryrefslogtreecommitdiffstats
path: root/clang/lib/Lex
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/Lex')
-rw-r--r--clang/lib/Lex/HeaderMap.cpp242
-rw-r--r--clang/lib/Lex/HeaderSearch.cpp425
-rw-r--r--clang/lib/Lex/Lexer.cpp1661
-rw-r--r--clang/lib/Lex/LiteralSupport.cpp691
-rw-r--r--clang/lib/Lex/MacroArgs.cpp225
-rw-r--r--clang/lib/Lex/MacroArgs.h109
-rw-r--r--clang/lib/Lex/MacroInfo.cpp70
-rw-r--r--clang/lib/Lex/Makefile28
-rw-r--r--clang/lib/Lex/PPDirectives.cpp1153
-rw-r--r--clang/lib/Lex/PPExpressions.cpp639
-rw-r--r--clang/lib/Lex/PPLexerChange.cpp401
-rw-r--r--clang/lib/Lex/PPMacroExpansion.cpp523
-rw-r--r--clang/lib/Lex/Pragma.cpp386
-rw-r--r--clang/lib/Lex/Preprocessor.cpp560
-rw-r--r--clang/lib/Lex/ScratchBuffer.cpp72
-rw-r--r--clang/lib/Lex/TokenLexer.cpp488
16 files changed, 7673 insertions, 0 deletions
diff --git a/clang/lib/Lex/HeaderMap.cpp b/clang/lib/Lex/HeaderMap.cpp
new file mode 100644
index 00000000000..282e742b4c8
--- /dev/null
+++ b/clang/lib/Lex/HeaderMap.cpp
@@ -0,0 +1,242 @@
+//===--- HeaderMap.cpp - A file that acts like dir of symlinks ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the HeaderMap interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/HeaderMap.h"
+#include "clang/Basic/FileManager.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/MemoryBuffer.h"
+using namespace clang;
+
+//===----------------------------------------------------------------------===//
+// Data Structures and Manifest Constants
+//===----------------------------------------------------------------------===//
+
+enum {
+ HMAP_HeaderMagicNumber = ('h' << 24) | ('m' << 16) | ('a' << 8) | 'p',
+ HMAP_HeaderVersion = 1,
+
+ HMAP_EmptyBucketKey = 0
+};
+
+namespace clang {
+struct HMapBucket {
+ uint32_t Key; // Offset (into strings) of key.
+
+ uint32_t Prefix; // Offset (into strings) of value prefix.
+ uint32_t Suffix; // Offset (into strings) of value suffix.
+};
+
+struct HMapHeader {
+ uint32_t Magic; // Magic word, also indicates byte order.
+ uint16_t Version; // Version number -- currently 1.
+ uint16_t Reserved; // Reserved for future use - zero for now.
+ uint32_t StringsOffset; // Offset to start of string pool.
+ uint32_t NumEntries; // Number of entries in the string table.
+ uint32_t NumBuckets; // Number of buckets (always a power of 2).
+ uint32_t MaxValueLength; // Length of longest result path (excluding nul).
+ // An array of 'NumBuckets' HMapBucket objects follows this header.
+ // Strings follow the buckets, at StringsOffset.
+};
+} // end namespace clang.
+
+/// HashHMapKey - This is the 'well known' hash function required by the file
+/// format, used to look up keys in the hash table. The hash table uses simple
+/// linear probing based on this function.
+static inline unsigned HashHMapKey(const char *S, const char *End) {
+ unsigned Result = 0;
+
+ for (; S != End; S++)
+ Result += tolower(*S) * 13;
+ return Result;
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Verification and Construction
+//===----------------------------------------------------------------------===//
+
+/// HeaderMap::Create - This attempts to load the specified file as a header
+/// map. If it doesn't look like a HeaderMap, it gives up and returns null.
+/// If it looks like a HeaderMap but is obviously corrupted, it puts a reason
+/// into the string error argument and returns null.
+const HeaderMap *HeaderMap::Create(const FileEntry *FE) {
+ // If the file is too small to be a header map, ignore it.
+ unsigned FileSize = FE->getSize();
+ if (FileSize <= sizeof(HMapHeader)) return 0;
+
+ llvm::OwningPtr<const llvm::MemoryBuffer> FileBuffer(
+ llvm::MemoryBuffer::getFile(FE->getName(), strlen(FE->getName()), 0,
+ FE->getSize()));
+ if (FileBuffer == 0) return 0; // Unreadable file?
+ const char *FileStart = FileBuffer->getBufferStart();
+
+ // We know the file is at least as big as the header, check it now.
+ const HMapHeader *Header = reinterpret_cast<const HMapHeader*>(FileStart);
+
+ // Sniff it to see if it's a headermap by checking the magic number and
+ // version.
+ bool NeedsByteSwap;
+ if (Header->Magic == HMAP_HeaderMagicNumber &&
+ Header->Version == HMAP_HeaderVersion)
+ NeedsByteSwap = false;
+ else if (Header->Magic == llvm::ByteSwap_32(HMAP_HeaderMagicNumber) &&
+ Header->Version == llvm::ByteSwap_16(HMAP_HeaderVersion))
+ NeedsByteSwap = true; // Mixed endianness headermap.
+ else
+ return 0; // Not a header map.
+
+ if (Header->Reserved != 0) return 0;
+
+ // Okay, everything looks good, create the header map.
+ return new HeaderMap(FileBuffer.take(), NeedsByteSwap);
+}
+
+HeaderMap::~HeaderMap() {
+ delete FileBuffer;
+}
+
+//===----------------------------------------------------------------------===//
+// Utility Methods
+//===----------------------------------------------------------------------===//
+
+
+/// getFileName - Return the filename of the headermap.
+const char *HeaderMap::getFileName() const {
+ return FileBuffer->getBufferIdentifier();
+}
+
+unsigned HeaderMap::getEndianAdjustedWord(unsigned X) const {
+ if (!NeedsBSwap) return X;
+ return llvm::ByteSwap_32(X);
+}
+
+/// getHeader - Return a reference to the file header, in unbyte-swapped form.
+/// This method cannot fail.
+const HMapHeader &HeaderMap::getHeader() const {
+ // We know the file is at least as big as the header. Return it.
+ return *reinterpret_cast<const HMapHeader*>(FileBuffer->getBufferStart());
+}
+
+/// getBucket - Return the specified hash table bucket from the header map,
+/// bswap'ing its fields as appropriate. If the bucket number is not valid,
+/// this return a bucket with an empty key (0).
+HMapBucket HeaderMap::getBucket(unsigned BucketNo) const {
+ HMapBucket Result;
+ Result.Key = HMAP_EmptyBucketKey;
+
+ const HMapBucket *BucketArray =
+ reinterpret_cast<const HMapBucket*>(FileBuffer->getBufferStart() +
+ sizeof(HMapHeader));
+
+ const HMapBucket *BucketPtr = BucketArray+BucketNo;
+ if ((char*)(BucketPtr+1) > FileBuffer->getBufferEnd())
+ return Result; // Invalid buffer, corrupt hmap.
+
+ // Otherwise, the bucket is valid. Load the values, bswapping as needed.
+ Result.Key = getEndianAdjustedWord(BucketPtr->Key);
+ Result.Prefix = getEndianAdjustedWord(BucketPtr->Prefix);
+ Result.Suffix = getEndianAdjustedWord(BucketPtr->Suffix);
+ return Result;
+}
+
+/// getString - Look up the specified string in the string table. If the string
+/// index is not valid, it returns an empty string.
+const char *HeaderMap::getString(unsigned StrTabIdx) const {
+ // Add the start of the string table to the idx.
+ StrTabIdx += getEndianAdjustedWord(getHeader().StringsOffset);
+
+ // Check for invalid index.
+ if (StrTabIdx >= FileBuffer->getBufferSize())
+ return 0;
+
+ // Otherwise, we have a valid pointer into the file. Just return it. We know
+ // that the "string" can not overrun the end of the file, because the buffer
+ // is nul terminated by virtue of being a MemoryBuffer.
+ return FileBuffer->getBufferStart()+StrTabIdx;
+}
+
+/// StringsEqualWithoutCase - Compare the specified two strings for case-
+/// insensitive equality, returning true if they are equal. Both strings are
+/// known to have the same length.
+static bool StringsEqualWithoutCase(const char *S1, const char *S2,
+ unsigned Len) {
+ for (; Len; ++S1, ++S2, --Len)
+ if (tolower(*S1) != tolower(*S2))
+ return false;
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// The Main Drivers
+//===----------------------------------------------------------------------===//
+
+/// dump - Print the contents of this headermap to stderr.
+void HeaderMap::dump() const {
+ const HMapHeader &Hdr = getHeader();
+ unsigned NumBuckets = getEndianAdjustedWord(Hdr.NumBuckets);
+
+ fprintf(stderr, "Header Map %s:\n %d buckets, %d entries\n",
+ getFileName(), NumBuckets,
+ getEndianAdjustedWord(Hdr.NumEntries));
+
+ for (unsigned i = 0; i != NumBuckets; ++i) {
+ HMapBucket B = getBucket(i);
+ if (B.Key == HMAP_EmptyBucketKey) continue;
+
+ const char *Key = getString(B.Key);
+ const char *Prefix = getString(B.Prefix);
+ const char *Suffix = getString(B.Suffix);
+ fprintf(stderr, " %d. %s -> '%s' '%s'\n", i, Key, Prefix, Suffix);
+ }
+}
+
+/// LookupFile - Check to see if the specified relative filename is located in
+/// this HeaderMap. If so, open it and return its FileEntry.
+const FileEntry *HeaderMap::LookupFile(const char *FilenameStart,
+ const char *FilenameEnd,
+ FileManager &FM) const {
+ const HMapHeader &Hdr = getHeader();
+ unsigned NumBuckets = getEndianAdjustedWord(Hdr.NumBuckets);
+
+ // If the number of buckets is not a power of two, the headermap is corrupt.
+ // Don't probe infinitely.
+ if (NumBuckets & (NumBuckets-1))
+ return 0;
+
+ // Linearly probe the hash table.
+ for (unsigned Bucket = HashHMapKey(FilenameStart, FilenameEnd);; ++Bucket) {
+ HMapBucket B = getBucket(Bucket & (NumBuckets-1));
+ if (B.Key == HMAP_EmptyBucketKey) return 0; // Hash miss.
+
+ // See if the key matches. If not, probe on.
+ const char *Key = getString(B.Key);
+ unsigned BucketKeyLen = strlen(Key);
+ if (BucketKeyLen != unsigned(FilenameEnd-FilenameStart))
+ continue;
+
+ // See if the actual strings equal.
+ if (!StringsEqualWithoutCase(FilenameStart, Key, BucketKeyLen))
+ continue;
+
+ // If so, we have a match in the hash table. Construct the destination
+ // path.
+ llvm::SmallString<1024> DestPath;
+ DestPath += getString(B.Prefix);
+ DestPath += getString(B.Suffix);
+ return FM.getFile(DestPath.begin(), DestPath.end());
+ }
+}
diff --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp
new file mode 100644
index 00000000000..44ae35c8b7e
--- /dev/null
+++ b/clang/lib/Lex/HeaderSearch.cpp
@@ -0,0 +1,425 @@
+//===--- HeaderSearch.cpp - Resolve Header File Locations ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the DirectoryLookup and HeaderSearch interfaces.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/HeaderSearch.h"
+#include "clang/Lex/HeaderMap.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/IdentifierTable.h"
+#include "llvm/System/Path.h"
+#include "llvm/ADT/SmallString.h"
+using namespace clang;
+
+HeaderSearch::HeaderSearch(FileManager &FM) : FileMgr(FM), FrameworkMap(64) {
+ SystemDirIdx = 0;
+ NoCurDirSearch = false;
+
+ NumIncluded = 0;
+ NumMultiIncludeFileOptzn = 0;
+ NumFrameworkLookups = NumSubFrameworkLookups = 0;
+}
+
+HeaderSearch::~HeaderSearch() {
+ // Delete headermaps.
+ for (unsigned i = 0, e = HeaderMaps.size(); i != e; ++i)
+ delete HeaderMaps[i].second;
+}
+
+void HeaderSearch::PrintStats() {
+ fprintf(stderr, "\n*** HeaderSearch Stats:\n");
+ fprintf(stderr, "%d files tracked.\n", (int)FileInfo.size());
+ unsigned NumOnceOnlyFiles = 0, MaxNumIncludes = 0, NumSingleIncludedFiles = 0;
+ for (unsigned i = 0, e = FileInfo.size(); i != e; ++i) {
+ NumOnceOnlyFiles += FileInfo[i].isImport;
+ if (MaxNumIncludes < FileInfo[i].NumIncludes)
+ MaxNumIncludes = FileInfo[i].NumIncludes;
+ NumSingleIncludedFiles += FileInfo[i].NumIncludes == 1;
+ }
+ fprintf(stderr, " %d #import/#pragma once files.\n", NumOnceOnlyFiles);
+ fprintf(stderr, " %d included exactly once.\n", NumSingleIncludedFiles);
+ fprintf(stderr, " %d max times a file is included.\n", MaxNumIncludes);
+
+ fprintf(stderr, " %d #include/#include_next/#import.\n", NumIncluded);
+ fprintf(stderr, " %d #includes skipped due to"
+ " the multi-include optimization.\n", NumMultiIncludeFileOptzn);
+
+ fprintf(stderr, "%d framework lookups.\n", NumFrameworkLookups);
+ fprintf(stderr, "%d subframework lookups.\n", NumSubFrameworkLookups);
+}
+
+/// CreateHeaderMap - This method returns a HeaderMap for the specified
+/// FileEntry, uniquing them through the the 'HeaderMaps' datastructure.
+const HeaderMap *HeaderSearch::CreateHeaderMap(const FileEntry *FE) {
+ // We expect the number of headermaps to be small, and almost always empty.
+ // If it ever grows, use of a linear search should be re-evaluated.
+ if (!HeaderMaps.empty()) {
+ for (unsigned i = 0, e = HeaderMaps.size(); i != e; ++i)
+ // Pointer equality comparison of FileEntries works because they are
+ // already uniqued by inode.
+ if (HeaderMaps[i].first == FE)
+ return HeaderMaps[i].second;
+ }
+
+ if (const HeaderMap *HM = HeaderMap::Create(FE)) {
+ HeaderMaps.push_back(std::make_pair(FE, HM));
+ return HM;
+ }
+
+ return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// File lookup within a DirectoryLookup scope
+//===----------------------------------------------------------------------===//
+
+/// getName - Return the directory or filename corresponding to this lookup
+/// object.
+const char *DirectoryLookup::getName() const {
+ if (isNormalDir())
+ return getDir()->getName();
+ if (isFramework())
+ return getFrameworkDir()->getName();
+ assert(isHeaderMap() && "Unknown DirectoryLookup");
+ return getHeaderMap()->getFileName();
+}
+
+
+/// LookupFile - Lookup the specified file in this search path, returning it
+/// if it exists or returning null if not.
+const FileEntry *DirectoryLookup::LookupFile(const char *FilenameStart,
+ const char *FilenameEnd,
+ HeaderSearch &HS) const {
+ llvm::SmallString<1024> TmpDir;
+ if (isNormalDir()) {
+ // Concatenate the requested file onto the directory.
+ // FIXME: Portability. Filename concatenation should be in sys::Path.
+ TmpDir += getDir()->getName();
+ TmpDir.push_back('/');
+ TmpDir.append(FilenameStart, FilenameEnd);
+ return HS.getFileMgr().getFile(TmpDir.begin(), TmpDir.end());
+ }
+
+ if (isFramework())
+ return DoFrameworkLookup(FilenameStart, FilenameEnd, HS);
+
+ assert(isHeaderMap() && "Unknown directory lookup");
+ return getHeaderMap()->LookupFile(FilenameStart, FilenameEnd,HS.getFileMgr());
+}
+
+
+/// DoFrameworkLookup - Do a lookup of the specified file in the current
+/// DirectoryLookup, which is a framework directory.
+const FileEntry *DirectoryLookup::DoFrameworkLookup(const char *FilenameStart,
+ const char *FilenameEnd,
+ HeaderSearch &HS) const {
+ FileManager &FileMgr = HS.getFileMgr();
+
+ // Framework names must have a '/' in the filename.
+ const char *SlashPos = std::find(FilenameStart, FilenameEnd, '/');
+ if (SlashPos == FilenameEnd) return 0;
+
+ // Find out if this is the home for the specified framework, by checking
+ // HeaderSearch. Possible answer are yes/no and unknown.
+ const DirectoryEntry *&FrameworkDirCache =
+ HS.LookupFrameworkCache(FilenameStart, SlashPos);
+
+ // If it is known and in some other directory, fail.
+ if (FrameworkDirCache && FrameworkDirCache != getFrameworkDir())
+ return 0;
+
+ // Otherwise, construct the path to this framework dir.
+
+ // FrameworkName = "/System/Library/Frameworks/"
+ llvm::SmallString<1024> FrameworkName;
+ FrameworkName += getFrameworkDir()->getName();
+ if (FrameworkName.empty() || FrameworkName.back() != '/')
+ FrameworkName.push_back('/');
+
+ // FrameworkName = "/System/Library/Frameworks/Cocoa"
+ FrameworkName.append(FilenameStart, SlashPos);
+
+ // FrameworkName = "/System/Library/Frameworks/Cocoa.framework/"
+ FrameworkName += ".framework/";
+
+ // If the cache entry is still unresolved, query to see if the cache entry is
+ // still unresolved. If so, check its existence now.
+ if (FrameworkDirCache == 0) {
+ HS.IncrementFrameworkLookupCount();
+
+ // If the framework dir doesn't exist, we fail.
+ // FIXME: It's probably more efficient to query this with FileMgr.getDir.
+ if (!llvm::sys::Path(std::string(FrameworkName.begin(),
+ FrameworkName.end())).exists())
+ return 0;
+
+ // Otherwise, if it does, remember that this is the right direntry for this
+ // framework.
+ FrameworkDirCache = getFrameworkDir();
+ }
+
+ // Check "/System/Library/Frameworks/Cocoa.framework/Headers/file.h"
+ unsigned OrigSize = FrameworkName.size();
+
+ FrameworkName += "Headers/";
+ FrameworkName.append(SlashPos+1, FilenameEnd);
+ if (const FileEntry *FE = FileMgr.getFile(FrameworkName.begin(),
+ FrameworkName.end())) {
+ return FE;
+ }
+
+ // Check "/System/Library/Frameworks/Cocoa.framework/PrivateHeaders/file.h"
+ const char *Private = "Private";
+ FrameworkName.insert(FrameworkName.begin()+OrigSize, Private,
+ Private+strlen(Private));
+ return FileMgr.getFile(FrameworkName.begin(), FrameworkName.end());
+}
+
+
+//===----------------------------------------------------------------------===//
+// Header File Location.
+//===----------------------------------------------------------------------===//
+
+
+/// LookupFile - Given a "foo" or <foo> reference, look up the indicated file,
+/// return null on failure. isAngled indicates whether the file reference is
+/// for system #include's or not (i.e. using <> instead of ""). CurFileEnt, if
+/// non-null, indicates where the #including file is, in case a relative search
+/// is needed.
+const FileEntry *HeaderSearch::LookupFile(const char *FilenameStart,
+ const char *FilenameEnd,
+ bool isAngled,
+ const DirectoryLookup *FromDir,
+ const DirectoryLookup *&CurDir,
+ const FileEntry *CurFileEnt) {
+ // If 'Filename' is absolute, check to see if it exists and no searching.
+ // FIXME: Portability. This should be a sys::Path interface, this doesn't
+ // handle things like C:\foo.txt right, nor win32 \\network\device\blah.
+ if (FilenameStart[0] == '/') {
+ CurDir = 0;
+
+ // If this was an #include_next "/absolute/file", fail.
+ if (FromDir) return 0;
+
+ // Otherwise, just return the file.
+ return FileMgr.getFile(FilenameStart, FilenameEnd);
+ }
+
+ // Step #0, unless disabled, check to see if the file is in the #includer's
+ // directory. This has to be based on CurFileEnt, not CurDir, because
+ // CurFileEnt could be a #include of a subdirectory (#include "foo/bar.h") and
+ // a subsequent include of "baz.h" should resolve to "whatever/foo/baz.h".
+ // This search is not done for <> headers.
+ if (CurFileEnt && !isAngled && !NoCurDirSearch) {
+ llvm::SmallString<1024> TmpDir;
+ // Concatenate the requested file onto the directory.
+ // FIXME: Portability. Filename concatenation should be in sys::Path.
+ TmpDir += CurFileEnt->getDir()->getName();
+ TmpDir.push_back('/');
+ TmpDir.append(FilenameStart, FilenameEnd);
+ if (const FileEntry *FE = FileMgr.getFile(TmpDir.begin(), TmpDir.end())) {
+ // Leave CurDir unset.
+ // This file is a system header or C++ unfriendly if the old file is.
+ //
+ // Note that the temporary 'DirInfo' is required here, as either call to
+ // getFileInfo could resize the vector and we don't want to rely on order
+ // of evaluation.
+ unsigned DirInfo = getFileInfo(CurFileEnt).DirInfo;
+ getFileInfo(FE).DirInfo = DirInfo;
+ return FE;
+ }
+ }
+
+ CurDir = 0;
+
+ // If this is a system #include, ignore the user #include locs.
+ unsigned i = isAngled ? SystemDirIdx : 0;
+
+ // If this is a #include_next request, start searching after the directory the
+ // file was found in.
+ if (FromDir)
+ i = FromDir-&SearchDirs[0];
+
+ // Cache all of the lookups performed by this method. Many headers are
+ // multiply included, and the "pragma once" optimization prevents them from
+ // being relex/pp'd, but they would still have to search through a
+ // (potentially huge) series of SearchDirs to find it.
+ std::pair<unsigned, unsigned> &CacheLookup =
+ LookupFileCache.GetOrCreateValue(FilenameStart, FilenameEnd).getValue();
+
+ // If the entry has been previously looked up, the first value will be
+ // non-zero. If the value is equal to i (the start point of our search), then
+ // this is a matching hit.
+ if (CacheLookup.first == i+1) {
+ // Skip querying potentially lots of directories for this lookup.
+ i = CacheLookup.second;
+ } else {
+ // Otherwise, this is the first query, or the previous query didn't match
+ // our search start. We will fill in our found location below, so prime the
+ // start point value.
+ CacheLookup.first = i+1;
+ }
+
+ // Check each directory in sequence to see if it contains this file.
+ for (; i != SearchDirs.size(); ++i) {
+ const FileEntry *FE =
+ SearchDirs[i].LookupFile(FilenameStart, FilenameEnd, *this);
+ if (!FE) continue;
+
+ CurDir = &SearchDirs[i];
+
+ // This file is a system header or C++ unfriendly if the dir is.
+ getFileInfo(FE).DirInfo = CurDir->getDirCharacteristic();
+
+ // Remember this location for the next lookup we do.
+ CacheLookup.second = i;
+ return FE;
+ }
+
+ // Otherwise, didn't find it. Remember we didn't find this.
+ CacheLookup.second = SearchDirs.size();
+ return 0;
+}
+
+/// LookupSubframeworkHeader - Look up a subframework for the specified
+/// #include file. For example, if #include'ing <HIToolbox/HIToolbox.h> from
+/// within ".../Carbon.framework/Headers/Carbon.h", check to see if HIToolbox
+/// is a subframework within Carbon.framework. If so, return the FileEntry
+/// for the designated file, otherwise return null.
+const FileEntry *HeaderSearch::
+LookupSubframeworkHeader(const char *FilenameStart,
+ const char *FilenameEnd,
+ const FileEntry *ContextFileEnt) {
+ assert(ContextFileEnt && "No context file?");
+
+ // Framework names must have a '/' in the filename. Find it.
+ const char *SlashPos = std::find(FilenameStart, FilenameEnd, '/');
+ if (SlashPos == FilenameEnd) return 0;
+
+ // Look up the base framework name of the ContextFileEnt.
+ const char *ContextName = ContextFileEnt->getName();
+
+ // If the context info wasn't a framework, couldn't be a subframework.
+ const char *FrameworkPos = strstr(ContextName, ".framework/");
+ if (FrameworkPos == 0)
+ return 0;
+
+ llvm::SmallString<1024> FrameworkName(ContextName,
+ FrameworkPos+strlen(".framework/"));
+
+ // Append Frameworks/HIToolbox.framework/
+ FrameworkName += "Frameworks/";
+ FrameworkName.append(FilenameStart, SlashPos);
+ FrameworkName += ".framework/";
+
+ llvm::StringMapEntry<const DirectoryEntry *> &CacheLookup =
+ FrameworkMap.GetOrCreateValue(FilenameStart, SlashPos);
+
+ // Some other location?
+ if (CacheLookup.getValue() &&
+ CacheLookup.getKeyLength() == FrameworkName.size() &&
+ memcmp(CacheLookup.getKeyData(), &FrameworkName[0],
+ CacheLookup.getKeyLength()) != 0)
+ return 0;
+
+ // Cache subframework.
+ if (CacheLookup.getValue() == 0) {
+ ++NumSubFrameworkLookups;
+
+ // If the framework dir doesn't exist, we fail.
+ const DirectoryEntry *Dir = FileMgr.getDirectory(FrameworkName.begin(),
+ FrameworkName.end());
+ if (Dir == 0) return 0;
+
+ // Otherwise, if it does, remember that this is the right direntry for this
+ // framework.
+ CacheLookup.setValue(Dir);
+ }
+
+ const FileEntry *FE = 0;
+
+ // Check ".../Frameworks/HIToolbox.framework/Headers/HIToolbox.h"
+ llvm::SmallString<1024> HeadersFilename(FrameworkName);
+ HeadersFilename += "Headers/";
+ HeadersFilename.append(SlashPos+1, FilenameEnd);
+ if (!(FE = FileMgr.getFile(HeadersFilename.begin(),
+ HeadersFilename.end()))) {
+
+ // Check ".../Frameworks/HIToolbox.framework/PrivateHeaders/HIToolbox.h"
+ HeadersFilename = FrameworkName;
+ HeadersFilename += "PrivateHeaders/";
+ HeadersFilename.append(SlashPos+1, FilenameEnd);
+ if (!(FE = FileMgr.getFile(HeadersFilename.begin(), HeadersFilename.end())))
+ return 0;
+ }
+
+ // This file is a system header or C++ unfriendly if the old file is.
+ //
+ // Note that the temporary 'DirInfo' is required here, as either call to
+ // getFileInfo could resize the vector and we don't want to rely on order
+ // of evaluation.
+ unsigned DirInfo = getFileInfo(ContextFileEnt).DirInfo;
+ getFileInfo(FE).DirInfo = DirInfo;
+ return FE;
+}
+
+//===----------------------------------------------------------------------===//
+// File Info Management.
+//===----------------------------------------------------------------------===//
+
+
+/// getFileInfo - Return the PerFileInfo structure for the specified
+/// FileEntry.
+HeaderSearch::PerFileInfo &HeaderSearch::getFileInfo(const FileEntry *FE) {
+ if (FE->getUID() >= FileInfo.size())
+ FileInfo.resize(FE->getUID()+1);
+ return FileInfo[FE->getUID()];
+}
+
+/// ShouldEnterIncludeFile - Mark the specified file as a target of of a
+/// #include, #include_next, or #import directive. Return false if #including
+/// the file will have no effect or true if we should include it.
+bool HeaderSearch::ShouldEnterIncludeFile(const FileEntry *File, bool isImport){
+ ++NumIncluded; // Count # of attempted #includes.
+
+ // Get information about this file.
+ PerFileInfo &FileInfo = getFileInfo(File);
+
+ // If this is a #import directive, check that we have not already imported
+ // this header.
+ if (isImport) {
+ // If this has already been imported, don't import it again.
+ FileInfo.isImport = true;
+
+ // Has this already been #import'ed or #include'd?
+ if (FileInfo.NumIncludes) return false;
+ } else {
+ // Otherwise, if this is a #include of a file that was previously #import'd
+ // or if this is the second #include of a #pragma once file, ignore it.
+ if (FileInfo.isImport)
+ return false;
+ }
+
+ // Next, check to see if the file is wrapped with #ifndef guards. If so, and
+ // if the macro that guards it is defined, we know the #include has no effect.
+ if (FileInfo.ControllingMacro &&
+ FileInfo.ControllingMacro->hasMacroDefinition()) {
+ ++NumMultiIncludeFileOptzn;
+ return false;
+ }
+
+ // Increment the number of times this file has been included.
+ ++FileInfo.NumIncludes;
+
+ return true;
+}
+
+
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
new file mode 100644
index 00000000000..98bbb386305
--- /dev/null
+++ b/clang/lib/Lex/Lexer.cpp
@@ -0,0 +1,1661 @@
+//===--- Lexer.cpp - C Language Family Lexer ------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Lexer and Token interfaces.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: GCC Diagnostics emitted by the lexer:
+// PEDWARN: (form feed|vertical tab) in preprocessing directive
+//
+// Universal characters, unicode, char mapping:
+// WARNING: `%.*s' is not in NFKC
+// WARNING: `%.*s' is not in NFC
+//
+// Other:
+// TODO: Options to support:
+// -fexec-charset,-fwide-exec-charset
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/Lexer.h"
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/SourceManager.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <cctype>
+using namespace clang;
+
+static void InitCharacterInfo();
+
+//===----------------------------------------------------------------------===//
+// Token Class Implementation
+//===----------------------------------------------------------------------===//
+
+/// isObjCAtKeyword - Return true if we have an ObjC keyword identifier.
+bool Token::isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const {
+ return is(tok::identifier) &&
+ getIdentifierInfo()->getObjCKeywordID() == objcKey;
+}
+
+/// getObjCKeywordID - Return the ObjC keyword kind.
+tok::ObjCKeywordKind Token::getObjCKeywordID() const {
+ IdentifierInfo *specId = getIdentifierInfo();
+ return specId ? specId->getObjCKeywordID() : tok::objc_not_keyword;
+}
+
+/// isNamedIdentifier - Return true if this token is a ppidentifier with the
+/// specified name. For example, tok.isNamedIdentifier("this").
+bool Token::isNamedIdentifier(const char *Name) const {
+ return IdentInfo && !strcmp(IdentInfo->getName(), Name);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Lexer Class Implementation
+//===----------------------------------------------------------------------===//
+
+
+/// Lexer constructor - Create a new lexer object for the specified buffer
+/// with the specified preprocessor managing the lexing process. This lexer
+/// assumes that the associated file buffer and Preprocessor objects will
+/// outlive it, so it doesn't take ownership of either of them.
+Lexer::Lexer(SourceLocation fileloc, Preprocessor &pp,
+ const char *BufStart, const char *BufEnd)
+ : FileLoc(fileloc), PP(&pp), Features(pp.getLangOptions()) {
+
+ SourceManager &SourceMgr = PP->getSourceManager();
+ unsigned InputFileID = SourceMgr.getPhysicalLoc(FileLoc).getFileID();
+ const llvm::MemoryBuffer *InputFile = SourceMgr.getBuffer(InputFileID);
+
+ Is_PragmaLexer = false;
+ InitCharacterInfo();
+
+ // BufferStart must always be InputFile->getBufferStart().
+ BufferStart = InputFile->getBufferStart();
+
+ // BufferPtr and BufferEnd can start out somewhere inside the current buffer.
+ // If unspecified, they starts at the start/end of the buffer.
+ BufferPtr = BufStart ? BufStart : BufferStart;
+ BufferEnd = BufEnd ? BufEnd : InputFile->getBufferEnd();
+
+ assert(BufferEnd[0] == 0 &&
+ "We assume that the input buffer has a null character at the end"
+ " to simplify lexing!");
+
+ // Start of the file is a start of line.
+ IsAtStartOfLine = true;
+
+ // We are not after parsing a #.
+ ParsingPreprocessorDirective = false;
+
+ // We are not after parsing #include.
+ ParsingFilename = false;
+
+ // We are not in raw mode. Raw mode disables diagnostics and interpretation
+ // of tokens (e.g. identifiers, thus disabling macro expansion). It is used
+ // to quickly lex the tokens of the buffer, e.g. when handling a "#if 0" block
+ // or otherwise skipping over tokens.
+ LexingRawMode = false;
+
+ // Default to keeping comments if requested.
+ KeepCommentMode = PP->getCommentRetentionState();
+}
+
+/// Lexer constructor - Create a new raw lexer object. This object is only
+/// suitable for calls to 'LexRawToken'. This lexer assumes that the
+/// associated file buffer will outlive it, so it doesn't take ownership of
+/// either of them.
+Lexer::Lexer(SourceLocation fileloc, const LangOptions &features,
+ const char *BufStart, const char *BufEnd)
+ : FileLoc(fileloc), PP(0), Features(features) {
+ Is_PragmaLexer = false;
+ InitCharacterInfo();
+
+ BufferStart = BufStart;
+ BufferPtr = BufStart;
+ BufferEnd = BufEnd;
+
+ assert(BufferEnd[0] == 0 &&
+ "We assume that the input buffer has a null character at the end"
+ " to simplify lexing!");
+
+ // Start of the file is a start of line.
+ IsAtStartOfLine = true;
+
+ // We are not after parsing a #.
+ ParsingPreprocessorDirective = false;
+
+ // We are not after parsing #include.
+ ParsingFilename = false;
+
+ // We *are* in raw mode.
+ LexingRawMode = true;
+
+ // Never keep comments in raw mode.
+ KeepCommentMode = false;
+}
+
+
+/// Stringify - Convert the specified string into a C string, with surrounding
+/// ""'s, and with escaped \ and " characters.
+std::string Lexer::Stringify(const std::string &Str, bool Charify) {
+ std::string Result = Str;
+ char Quote = Charify ? '\'' : '"';
+ for (unsigned i = 0, e = Result.size(); i != e; ++i) {
+ if (Result[i] == '\\' || Result[i] == Quote) {
+ Result.insert(Result.begin()+i, '\\');
+ ++i; ++e;
+ }
+ }
+ return Result;
+}
+
+/// Stringify - Convert the specified string into a C string by escaping '\'
+/// and " characters. This does not add surrounding ""'s to the string.
+void Lexer::Stringify(llvm::SmallVectorImpl<char> &Str) {
+ for (unsigned i = 0, e = Str.size(); i != e; ++i) {
+ if (Str[i] == '\\' || Str[i] == '"') {
+ Str.insert(Str.begin()+i, '\\');
+ ++i; ++e;
+ }
+ }
+}
+
+
+/// MeasureTokenLength - Relex the token at the specified location and return
+/// its length in bytes in the input file. If the token needs cleaning (e.g.
+/// includes a trigraph or an escaped newline) then this count includes bytes
+/// that are part of that.
+unsigned Lexer::MeasureTokenLength(SourceLocation Loc,
+ const SourceManager &SM) {
+ // If this comes from a macro expansion, we really do want the macro name, not
+ // the token this macro expanded to.
+ Loc = SM.getLogicalLoc(Loc);
+
+ const char *StrData = SM.getCharacterData(Loc);
+
+ // TODO: this could be special cased for common tokens like identifiers, ')',
+ // etc to make this faster, if it mattered. Just look at StrData[0] to handle
+ // all obviously single-char tokens. This could use
+ // Lexer::isObviouslySimpleCharacter for example to handle identifiers or
+ // something.
+
+
+ const char *BufEnd = SM.getBufferData(Loc.getFileID()).second;
+
+ // Create a langops struct and enable trigraphs. This is sufficient for
+ // measuring tokens.
+ LangOptions LangOpts;
+ LangOpts.Trigraphs = true;
+
+ // Create a lexer starting at the beginning of this token.
+ Lexer TheLexer(Loc, LangOpts, StrData, BufEnd);
+ Token TheTok;
+ TheLexer.LexRawToken(TheTok);
+ return TheTok.getLength();
+}
+
+//===----------------------------------------------------------------------===//
+// Character information.
+//===----------------------------------------------------------------------===//
+
+static unsigned char CharInfo[256];
+
+enum {
+ CHAR_HORZ_WS = 0x01, // ' ', '\t', '\f', '\v'. Note, no '\0'
+ CHAR_VERT_WS = 0x02, // '\r', '\n'
+ CHAR_LETTER = 0x04, // a-z,A-Z
+ CHAR_NUMBER = 0x08, // 0-9
+ CHAR_UNDER = 0x10, // _
+ CHAR_PERIOD = 0x20 // .
+};
+
+static void InitCharacterInfo() {
+ static bool isInited = false;
+ if (isInited) return;
+ isInited = true;
+
+ // Intiialize the CharInfo table.
+ // TODO: statically initialize this.
+ CharInfo[(int)' '] = CharInfo[(int)'\t'] =
+ CharInfo[(int)'\f'] = CharInfo[(int)'\v'] = CHAR_HORZ_WS;
+ CharInfo[(int)'\n'] = CharInfo[(int)'\r'] = CHAR_VERT_WS;
+
+ CharInfo[(int)'_'] = CHAR_UNDER;
+ CharInfo[(int)'.'] = CHAR_PERIOD;
+ for (unsigned i = 'a'; i <= 'z'; ++i)
+ CharInfo[i] = CharInfo[i+'A'-'a'] = CHAR_LETTER;
+ for (unsigned i = '0'; i <= '9'; ++i)
+ CharInfo[i] = CHAR_NUMBER;
+}
+
+/// isIdentifierBody - Return true if this is the body character of an
+/// identifier, which is [a-zA-Z0-9_].
+static inline bool isIdentifierBody(unsigned char c) {
+ return (CharInfo[c] & (CHAR_LETTER|CHAR_NUMBER|CHAR_UNDER)) ? true : false;
+}
+
+/// isHorizontalWhitespace - Return true if this character is horizontal
+/// whitespace: ' ', '\t', '\f', '\v'. Note that this returns false for '\0'.
+static inline bool isHorizontalWhitespace(unsigned char c) {
+ return (CharInfo[c] & CHAR_HORZ_WS) ? true : false;
+}
+
+/// isWhitespace - Return true if this character is horizontal or vertical
+/// whitespace: ' ', '\t', '\f', '\v', '\n', '\r'. Note that this returns false
+/// for '\0'.
+static inline bool isWhitespace(unsigned char c) {
+ return (CharInfo[c] & (CHAR_HORZ_WS|CHAR_VERT_WS)) ? true : false;
+}
+
+/// isNumberBody - Return true if this is the body character of an
+/// preprocessing number, which is [a-zA-Z0-9_.].
+static inline bool isNumberBody(unsigned char c) {
+ return (CharInfo[c] & (CHAR_LETTER|CHAR_NUMBER|CHAR_UNDER|CHAR_PERIOD)) ?
+ true : false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Diagnostics forwarding code.
+//===----------------------------------------------------------------------===//
+
+/// GetMappedTokenLoc - If lexing out of a 'mapped buffer', where we pretend the
+/// lexer buffer was all instantiated at a single point, perform the mapping.
+/// This is currently only used for _Pragma implementation, so it is the slow
+/// path of the hot getSourceLocation method. Do not allow it to be inlined.
+static SourceLocation GetMappedTokenLoc(Preprocessor &PP,
+ SourceLocation FileLoc,
+ unsigned CharNo) DISABLE_INLINE;
+static SourceLocation GetMappedTokenLoc(Preprocessor &PP,
+ SourceLocation FileLoc,
+ unsigned CharNo) {
+ // Otherwise, we're lexing "mapped tokens". This is used for things like
+ // _Pragma handling. Combine the instantiation location of FileLoc with the
+ // physical location.
+ SourceManager &SourceMgr = PP.getSourceManager();
+
+ // Create a new SLoc which is expanded from logical(FileLoc) but whose
+ // characters come from phys(FileLoc)+Offset.
+ SourceLocation VirtLoc = SourceMgr.getLogicalLoc(FileLoc);
+ SourceLocation PhysLoc = SourceMgr.getPhysicalLoc(FileLoc);
+ PhysLoc = SourceLocation::getFileLoc(PhysLoc.getFileID(), CharNo);
+ return SourceMgr.getInstantiationLoc(PhysLoc, VirtLoc);
+}
+
+/// getSourceLocation - Return a source location identifier for the specified
+/// offset in the current file.
+SourceLocation Lexer::getSourceLocation(const char *Loc) const {
+ assert(Loc >= BufferStart && Loc <= BufferEnd &&
+ "Location out of range for this buffer!");
+
+ // In the normal case, we're just lexing from a simple file buffer, return
+ // the file id from FileLoc with the offset specified.
+ unsigned CharNo = Loc-BufferStart;
+ if (FileLoc.isFileID())
+ return SourceLocation::getFileLoc(FileLoc.getFileID(), CharNo);
+
+ assert(PP && "This doesn't work on raw lexers");
+ return GetMappedTokenLoc(*PP, FileLoc, CharNo);
+}
+
+/// Diag - Forwarding function for diagnostics. This translate a source
+/// position in the current buffer into a SourceLocation object for rendering.
+void Lexer::Diag(const char *Loc, unsigned DiagID,
+ const std::string &Msg) const {
+ if (LexingRawMode && Diagnostic::isBuiltinNoteWarningOrExtension(DiagID))
+ return;
+ PP->Diag(getSourceLocation(Loc), DiagID, Msg);
+}
+void Lexer::Diag(SourceLocation Loc, unsigned DiagID,
+ const std::string &Msg) const {
+ if (LexingRawMode && Diagnostic::isBuiltinNoteWarningOrExtension(DiagID))
+ return;
+ PP->Diag(Loc, DiagID, Msg);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Trigraph and Escaped Newline Handling Code.
+//===----------------------------------------------------------------------===//
+
+/// GetTrigraphCharForLetter - Given a character that occurs after a ?? pair,
+/// return the decoded trigraph letter it corresponds to, or '\0' if nothing.
+static char GetTrigraphCharForLetter(char Letter) {
+ switch (Letter) {
+ default: return 0;
+ case '=': return '#';
+ case ')': return ']';
+ case '(': return '[';
+ case '!': return '|';
+ case '\'': return '^';
+ case '>': return '}';
+ case '/': return '\\';
+ case '<': return '{';
+ case '-': return '~';
+ }
+}
+
+/// DecodeTrigraphChar - If the specified character is a legal trigraph when
+/// prefixed with ??, emit a trigraph warning. If trigraphs are enabled,
+/// return the result character. Finally, emit a warning about trigraph use
+/// whether trigraphs are enabled or not.
+static char DecodeTrigraphChar(const char *CP, Lexer *L) {
+ char Res = GetTrigraphCharForLetter(*CP);
+ if (Res && L) {
+ if (!L->getFeatures().Trigraphs) {
+ L->Diag(CP-2, diag::trigraph_ignored);
+ return 0;
+ } else {
+ L->Diag(CP-2, diag::trigraph_converted, std::string()+Res);
+ }
+ }
+ return Res;
+}
+
+/// getCharAndSizeSlow - Peek a single 'character' from the specified buffer,
+/// get its size, and return it. This is tricky in several cases:
+/// 1. If currently at the start of a trigraph, we warn about the trigraph,
+/// then either return the trigraph (skipping 3 chars) or the '?',
+/// depending on whether trigraphs are enabled or not.
+/// 2. If this is an escaped newline (potentially with whitespace between
+/// the backslash and newline), implicitly skip the newline and return
+/// the char after it.
+/// 3. If this is a UCN, return it. FIXME: C++ UCN's?
+///
+/// This handles the slow/uncommon case of the getCharAndSize method. Here we
+/// know that we can accumulate into Size, and that we have already incremented
+/// Ptr by Size bytes.
+///
+/// NOTE: When this method is updated, getCharAndSizeSlowNoWarn (below) should
+/// be updated to match.
+///
+char Lexer::getCharAndSizeSlow(const char *Ptr, unsigned &Size,
+ Token *Tok) {
+ // If we have a slash, look for an escaped newline.
+ if (Ptr[0] == '\\') {
+ ++Size;
+ ++Ptr;
+Slash:
+ // Common case, backslash-char where the char is not whitespace.
+ if (!isWhitespace(Ptr[0])) return '\\';
+
+ // See if we have optional whitespace characters followed by a newline.
+ {
+ unsigned SizeTmp = 0;
+ do {
+ ++SizeTmp;
+ if (Ptr[SizeTmp-1] == '\n' || Ptr[SizeTmp-1] == '\r') {
+ // Remember that this token needs to be cleaned.
+ if (Tok) Tok->setFlag(Token::NeedsCleaning);
+
+ // Warn if there was whitespace between the backslash and newline.
+ if (SizeTmp != 1 && Tok)
+ Diag(Ptr, diag::backslash_newline_space);
+
+ // If this is a \r\n or \n\r, skip the newlines.
+ if ((Ptr[SizeTmp] == '\r' || Ptr[SizeTmp] == '\n') &&
+ Ptr[SizeTmp-1] != Ptr[SizeTmp])
+ ++SizeTmp;
+
+ // Found backslash<whitespace><newline>. Parse the char after it.
+ Size += SizeTmp;
+ Ptr += SizeTmp;
+ // Use slow version to accumulate a correct size field.
+ return getCharAndSizeSlow(Ptr, Size, Tok);
+ }
+ } while (isWhitespace(Ptr[SizeTmp]));
+ }
+
+ // Otherwise, this is not an escaped newline, just return the slash.
+ return '\\';
+ }
+
+ // If this is a trigraph, process it.
+ if (Ptr[0] == '?' && Ptr[1] == '?') {
+ // If this is actually a legal trigraph (not something like "??x"), emit
+ // a trigraph warning. If so, and if trigraphs are enabled, return it.
+ if (char C = DecodeTrigraphChar(Ptr+2, Tok ? this : 0)) {
+ // Remember that this token needs to be cleaned.
+ if (Tok) Tok->setFlag(Token::NeedsCleaning);
+
+ Ptr += 3;
+ Size += 3;
+ if (C == '\\') goto Slash;
+ return C;
+ }
+ }
+
+ // If this is neither, return a single character.
+ ++Size;
+ return *Ptr;
+}
+
+
+/// getCharAndSizeSlowNoWarn - Handle the slow/uncommon case of the
+/// getCharAndSizeNoWarn method. Here we know that we can accumulate into Size,
+/// and that we have already incremented Ptr by Size bytes.
+///
+/// NOTE: When this method is updated, getCharAndSizeSlow (above) should
+/// be updated to match.
+char Lexer::getCharAndSizeSlowNoWarn(const char *Ptr, unsigned &Size,
+ const LangOptions &Features) {
+ // If we have a slash, look for an escaped newline.
+ if (Ptr[0] == '\\') {
+ ++Size;
+ ++Ptr;
+Slash:
+ // Common case, backslash-char where the char is not whitespace.
+ if (!isWhitespace(Ptr[0])) return '\\';
+
+ // See if we have optional whitespace characters followed by a newline.
+ {
+ unsigned SizeTmp = 0;
+ do {
+ ++SizeTmp;
+ if (Ptr[SizeTmp-1] == '\n' || Ptr[SizeTmp-1] == '\r') {
+
+ // If this is a \r\n or \n\r, skip the newlines.
+ if ((Ptr[SizeTmp] == '\r' || Ptr[SizeTmp] == '\n') &&
+ Ptr[SizeTmp-1] != Ptr[SizeTmp])
+ ++SizeTmp;
+
+ // Found backslash<whitespace><newline>. Parse the char after it.
+ Size += SizeTmp;
+ Ptr += SizeTmp;
+
+ // Use slow version to accumulate a correct size field.
+ return getCharAndSizeSlowNoWarn(Ptr, Size, Features);
+ }
+ } while (isWhitespace(Ptr[SizeTmp]));
+ }
+
+ // Otherwise, this is not an escaped newline, just return the slash.
+ return '\\';
+ }
+
+ // If this is a trigraph, process it.
+ if (Features.Trigraphs && Ptr[0] == '?' && Ptr[1] == '?') {
+ // If this is actually a legal trigraph (not something like "??x"), return
+ // it.
+ if (char C = GetTrigraphCharForLetter(Ptr[2])) {
+ Ptr += 3;
+ Size += 3;
+ if (C == '\\') goto Slash;
+ return C;
+ }
+ }
+
+ // If this is neither, return a single character.
+ ++Size;
+ return *Ptr;
+}
+
+//===----------------------------------------------------------------------===//
+// Helper methods for lexing.
+//===----------------------------------------------------------------------===//
+
+void Lexer::LexIdentifier(Token &Result, const char *CurPtr) {
+ // Match [_A-Za-z0-9]*, we have already matched [_A-Za-z$]
+ unsigned Size;
+ unsigned char C = *CurPtr++;
+ while (isIdentifierBody(C)) {
+ C = *CurPtr++;
+ }
+ --CurPtr; // Back up over the skipped character.
+
+ // Fast path, no $,\,? in identifier found. '\' might be an escaped newline
+ // or UCN, and ? might be a trigraph for '\', an escaped newline or UCN.
+ // FIXME: UCNs.
+ if (C != '\\' && C != '?' && (C != '$' || !Features.DollarIdents)) {
+FinishIdentifier:
+ const char *IdStart = BufferPtr;
+ FormTokenWithChars(Result, CurPtr);
+ Result.setKind(tok::identifier);
+
+ // If we are in raw mode, return this identifier raw. There is no need to
+ // look up identifier information or attempt to macro expand it.
+ if (LexingRawMode) return;
+
+ // Fill in Result.IdentifierInfo, looking up the identifier in the
+ // identifier table.
+ PP->LookUpIdentifierInfo(Result, IdStart);
+
+ // Finally, now that we know we have an identifier, pass this off to the
+ // preprocessor, which may macro expand it or something.
+ return PP->HandleIdentifier(Result);
+ }
+
+ // Otherwise, $,\,? in identifier found. Enter slower path.
+
+ C = getCharAndSize(CurPtr, Size);
+ while (1) {
+ if (C == '$') {
+ // If we hit a $ and they are not supported in identifiers, we are done.
+ if (!Features.DollarIdents) goto FinishIdentifier;
+
+ // Otherwise, emit a diagnostic and continue.
+ Diag(CurPtr, diag::ext_dollar_in_identifier);
+ CurPtr = ConsumeChar(CurPtr, Size, Result);
+ C = getCharAndSize(CurPtr, Size);
+ continue;
+ } else if (!isIdentifierBody(C)) { // FIXME: UCNs.
+ // Found end of identifier.
+ goto FinishIdentifier;
+ }
+
+ // Otherwise, this character is good, consume it.
+ CurPtr = ConsumeChar(CurPtr, Size, Result);
+
+ C = getCharAndSize(CurPtr, Size);
+ while (isIdentifierBody(C)) { // FIXME: UCNs.
+ CurPtr = ConsumeChar(CurPtr, Size, Result);
+ C = getCharAndSize(CurPtr, Size);
+ }
+ }
+}
+
+
+/// LexNumericConstant - Lex the remainer of a integer or floating point
+/// constant. From[-1] is the first character lexed. Return the end of the
+/// constant.
+void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) {
+ unsigned Size;
+ char C = getCharAndSize(CurPtr, Size);
+ char PrevCh = 0;
+ while (isNumberBody(C)) { // FIXME: UCNs?
+ CurPtr = ConsumeChar(CurPtr, Size, Result);
+ PrevCh = C;
+ C = getCharAndSize(CurPtr, Size);
+ }
+
+ // If we fell out, check for a sign, due to 1e+12. If we have one, continue.
+ if ((C == '-' || C == '+') && (PrevCh == 'E' || PrevCh == 'e'))
+ return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result));
+
+ // If we have a hex FP constant, continue.
+ if (Features.HexFloats &&
+ (C == '-' || C == '+') && (PrevCh == 'P' || PrevCh == 'p'))
+ return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result));
+
+ Result.setKind(tok::numeric_constant);
+
+ // Update the location of token as well as BufferPtr.
+ FormTokenWithChars(Result, CurPtr);
+}
+
+/// LexStringLiteral - Lex the remainder of a string literal, after having lexed
+/// either " or L".
+void Lexer::LexStringLiteral(Token &Result, const char *CurPtr, bool Wide){
+ const char *NulCharacter = 0; // Does this string contain the \0 character?
+
+ char C = getAndAdvanceChar(CurPtr, Result);
+ while (C != '"') {
+ // Skip escaped characters.
+ if (C == '\\') {
+ // Skip the escaped character.
+ C = getAndAdvanceChar(CurPtr, Result);
+ } else if (C == '\n' || C == '\r' || // Newline.
+ (C == 0 && CurPtr-1 == BufferEnd)) { // End of file.
+ if (!LexingRawMode) Diag(BufferPtr, diag::err_unterminated_string);
+ Result.setKind(tok::unknown);
+ FormTokenWithChars(Result, CurPtr-1);
+ return;
+ } else if (C == 0) {
+ NulCharacter = CurPtr-1;
+ }
+ C = getAndAdvanceChar(CurPtr, Result);
+ }
+
+ // If a nul character existed in the string, warn about it.
+ if (NulCharacter) Diag(NulCharacter, diag::null_in_string);
+
+ Result.setKind(Wide ? tok::wide_string_literal : tok::string_literal);
+
+ // Update the location of the token as well as the BufferPtr instance var.
+ FormTokenWithChars(Result, CurPtr);
+}
+
+/// LexAngledStringLiteral - Lex the remainder of an angled string literal,
+/// after having lexed the '<' character. This is used for #include filenames.
+void Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) {
+ const char *NulCharacter = 0; // Does this string contain the \0 character?
+
+ char C = getAndAdvanceChar(CurPtr, Result);
+ while (C != '>') {
+ // Skip escaped characters.
+ if (C == '\\') {
+ // Skip the escaped character.
+ C = getAndAdvanceChar(CurPtr, Result);
+ } else if (C == '\n' || C == '\r' || // Newline.
+ (C == 0 && CurPtr-1 == BufferEnd)) { // End of file.
+ if (!LexingRawMode) Diag(BufferPtr, diag::err_unterminated_string);
+ Result.setKind(tok::unknown);
+ FormTokenWithChars(Result, CurPtr-1);
+ return;
+ } else if (C == 0) {
+ NulCharacter = CurPtr-1;
+ }
+ C = getAndAdvanceChar(CurPtr, Result);
+ }
+
+ // If a nul character existed in the string, warn about it.
+ if (NulCharacter) Diag(NulCharacter, diag::null_in_string);
+
+ Result.setKind(tok::angle_string_literal);
+
+ // Update the location of token as well as BufferPtr.
+ FormTokenWithChars(Result, CurPtr);
+}
+
+
+/// LexCharConstant - Lex the remainder of a character constant, after having
+/// lexed either ' or L'.
+void Lexer::LexCharConstant(Token &Result, const char *CurPtr) {
+ const char *NulCharacter = 0; // Does this character contain the \0 character?
+
+ // Handle the common case of 'x' and '\y' efficiently.
+ char C = getAndAdvanceChar(CurPtr, Result);
+ if (C == '\'') {
+ if (!LexingRawMode) Diag(BufferPtr, diag::err_empty_character);
+ Result.setKind(tok::unknown);
+ FormTokenWithChars(Result, CurPtr);
+ return;
+ } else if (C == '\\') {
+ // Skip the escaped character.
+ // FIXME: UCN's.
+ C = getAndAdvanceChar(CurPtr, Result);
+ }
+
+ if (C && C != '\n' && C != '\r' && CurPtr[0] == '\'') {
+ ++CurPtr;
+ } else {
+ // Fall back on generic code for embedded nulls, newlines, wide chars.
+ do {
+ // Skip escaped characters.
+ if (C == '\\') {
+ // Skip the escaped character.
+ C = getAndAdvanceChar(CurPtr, Result);
+ } else if (C == '\n' || C == '\r' || // Newline.
+ (C == 0 && CurPtr-1 == BufferEnd)) { // End of file.
+ if (!LexingRawMode) Diag(BufferPtr, diag::err_unterminated_char);
+ Result.setKind(tok::unknown);
+ FormTokenWithChars(Result, CurPtr-1);
+ return;
+ } else if (C == 0) {
+ NulCharacter = CurPtr-1;
+ }
+ C = getAndAdvanceChar(CurPtr, Result);
+ } while (C != '\'');
+ }
+
+ if (NulCharacter) Diag(NulCharacter, diag::null_in_char);
+
+ Result.setKind(tok::char_constant);
+
+ // Update the location of token as well as BufferPtr.
+ FormTokenWithChars(Result, CurPtr);
+}
+
+/// SkipWhitespace - Efficiently skip over a series of whitespace characters.
+/// Update BufferPtr to point to the next non-whitespace character and return.
+void Lexer::SkipWhitespace(Token &Result, const char *CurPtr) {
+ // Whitespace - Skip it, then return the token after the whitespace.
+ unsigned char Char = *CurPtr; // Skip consequtive spaces efficiently.
+ while (1) {
+ // Skip horizontal whitespace very aggressively.
+ while (isHorizontalWhitespace(Char))
+ Char = *++CurPtr;
+
+ // Otherwise if we something other than whitespace, we're done.
+ if (Char != '\n' && Char != '\r')
+ break;
+
+ if (ParsingPreprocessorDirective) {
+ // End of preprocessor directive line, let LexTokenInternal handle this.
+ BufferPtr = CurPtr;
+ return;
+ }
+
+ // ok, but handle newline.
+ // The returned token is at the start of the line.
+ Result.setFlag(Token::StartOfLine);
+ // No leading whitespace seen so far.
+ Result.clearFlag(Token::LeadingSpace);
+ Char = *++CurPtr;
+ }
+
+ // If this isn't immediately after a newline, there is leading space.
+ char PrevChar = CurPtr[-1];
+ if (PrevChar != '\n' && PrevChar != '\r')
+ Result.setFlag(Token::LeadingSpace);
+
+ BufferPtr = CurPtr;
+}
+
+// SkipBCPLComment - We have just read the // characters from input. Skip until
+// we find the newline character thats terminate the comment. Then update
+/// BufferPtr and return.
+bool Lexer::SkipBCPLComment(Token &Result, const char *CurPtr) {
+ // If BCPL comments aren't explicitly enabled for this language, emit an
+ // extension warning.
+ if (!Features.BCPLComment) {
+ Diag(BufferPtr, diag::ext_bcpl_comment);
+
+ // Mark them enabled so we only emit one warning for this translation
+ // unit.
+ Features.BCPLComment = true;
+ }
+
+ // Scan over the body of the comment. The common case, when scanning, is that
+ // the comment contains normal ascii characters with nothing interesting in
+ // them. As such, optimize for this case with the inner loop.
+ char C;
+ do {
+ C = *CurPtr;
+ // FIXME: Speedup BCPL comment lexing. Just scan for a \n or \r character.
+ // If we find a \n character, scan backwards, checking to see if it's an
+ // escaped newline, like we do for block comments.
+
+ // Skip over characters in the fast loop.
+ while (C != 0 && // Potentially EOF.
+ C != '\\' && // Potentially escaped newline.
+ C != '?' && // Potentially trigraph.
+ C != '\n' && C != '\r') // Newline or DOS-style newline.
+ C = *++CurPtr;
+
+ // If this is a newline, we're done.
+ if (C == '\n' || C == '\r')
+ break; // Found the newline? Break out!
+
+ // Otherwise, this is a hard case. Fall back on getAndAdvanceChar to
+ // properly decode the character.
+ const char *OldPtr = CurPtr;
+ C = getAndAdvanceChar(CurPtr, Result);
+
+ // If we read multiple characters, and one of those characters was a \r or
+ // \n, then we had an escaped newline within the comment. Emit diagnostic
+ // unless the next line is also a // comment.
+ if (CurPtr != OldPtr+1 && C != '/' && CurPtr[0] != '/') {
+ for (; OldPtr != CurPtr; ++OldPtr)
+ if (OldPtr[0] == '\n' || OldPtr[0] == '\r') {
+ // Okay, we found a // comment that ends in a newline, if the next
+ // line is also a // comment, but has spaces, don't emit a diagnostic.
+ if (isspace(C)) {
+ const char *ForwardPtr = CurPtr;
+ while (isspace(*ForwardPtr)) // Skip whitespace.
+ ++ForwardPtr;
+ if (ForwardPtr[0] == '/' && ForwardPtr[1] == '/')
+ break;
+ }
+
+ Diag(OldPtr-1, diag::ext_multi_line_bcpl_comment);
+ break;
+ }
+ }
+
+ if (CurPtr == BufferEnd+1) { --CurPtr; break; }
+ } while (C != '\n' && C != '\r');
+
+ // Found but did not consume the newline.
+
+ // If we are returning comments as tokens, return this comment as a token.
+ if (KeepCommentMode)
+ return SaveBCPLComment(Result, CurPtr);
+
+ // If we are inside a preprocessor directive and we see the end of line,
+ // return immediately, so that the lexer can return this as an EOM token.
+ if (ParsingPreprocessorDirective || CurPtr == BufferEnd) {
+ BufferPtr = CurPtr;
+ return true;
+ }
+
+ // Otherwise, eat the \n character. We don't care if this is a \n\r or
+ // \r\n sequence.
+ ++CurPtr;
+
+ // The next returned token is at the start of the line.
+ Result.setFlag(Token::StartOfLine);
+ // No leading whitespace seen so far.
+ Result.clearFlag(Token::LeadingSpace);
+ BufferPtr = CurPtr;
+ return true;
+}
+
+/// SaveBCPLComment - If in save-comment mode, package up this BCPL comment in
+/// an appropriate way and return it.
+bool Lexer::SaveBCPLComment(Token &Result, const char *CurPtr) {
+ Result.setKind(tok::comment);
+ FormTokenWithChars(Result, CurPtr);
+
+ // If this BCPL-style comment is in a macro definition, transmogrify it into
+ // a C-style block comment.
+ if (ParsingPreprocessorDirective) {
+ std::string Spelling = PP->getSpelling(Result);
+ assert(Spelling[0] == '/' && Spelling[1] == '/' && "Not bcpl comment?");
+ Spelling[1] = '*'; // Change prefix to "/*".
+ Spelling += "*/"; // add suffix.
+
+ Result.setLocation(PP->CreateString(&Spelling[0], Spelling.size(),
+ Result.getLocation()));
+ Result.setLength(Spelling.size());
+ }
+ return false;
+}
+
+/// isBlockCommentEndOfEscapedNewLine - Return true if the specified newline
+/// character (either \n or \r) is part of an escaped newline sequence. Issue a
+/// diagnostic if so. We know that the is inside of a block comment.
+static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr,
+ Lexer *L) {
+ assert(CurPtr[0] == '\n' || CurPtr[0] == '\r');
+
+ // Back up off the newline.
+ --CurPtr;
+
+ // If this is a two-character newline sequence, skip the other character.
+ if (CurPtr[0] == '\n' || CurPtr[0] == '\r') {
+ // \n\n or \r\r -> not escaped newline.
+ if (CurPtr[0] == CurPtr[1])
+ return false;
+ // \n\r or \r\n -> skip the newline.
+ --CurPtr;
+ }
+
+ // If we have horizontal whitespace, skip over it. We allow whitespace
+ // between the slash and newline.
+ bool HasSpace = false;
+ while (isHorizontalWhitespace(*CurPtr) || *CurPtr == 0) {
+ --CurPtr;
+ HasSpace = true;
+ }
+
+ // If we have a slash, we know this is an escaped newline.
+ if (*CurPtr == '\\') {
+ if (CurPtr[-1] != '*') return false;
+ } else {
+ // It isn't a slash, is it the ?? / trigraph?
+ if (CurPtr[0] != '/' || CurPtr[-1] != '?' || CurPtr[-2] != '?' ||
+ CurPtr[-3] != '*')
+ return false;
+
+ // This is the trigraph ending the comment. Emit a stern warning!
+ CurPtr -= 2;
+
+ // If no trigraphs are enabled, warn that we ignored this trigraph and
+ // ignore this * character.
+ if (!L->getFeatures().Trigraphs) {
+ L->Diag(CurPtr, diag::trigraph_ignored_block_comment);
+ return false;
+ }
+ L->Diag(CurPtr, diag::trigraph_ends_block_comment);
+ }
+
+ // Warn about having an escaped newline between the */ characters.
+ L->Diag(CurPtr, diag::escaped_newline_block_comment_end);
+
+ // If there was space between the backslash and newline, warn about it.
+ if (HasSpace) L->Diag(CurPtr, diag::backslash_newline_space);
+
+ return true;
+}
+
+#ifdef __SSE2__
+#include <emmintrin.h>
+#elif __ALTIVEC__
+#include <altivec.h>
+#undef bool
+#endif
+
+/// SkipBlockComment - We have just read the /* characters from input. Read
+/// until we find the */ characters that terminate the comment. Note that we
+/// don't bother decoding trigraphs or escaped newlines in block comments,
+/// because they cannot cause the comment to end. The only thing that can
+/// happen is the comment could end with an escaped newline between the */ end
+/// of comment.
+bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) {
+ // Scan one character past where we should, looking for a '/' character. Once
+ // we find it, check to see if it was preceeded by a *. This common
+ // optimization helps people who like to put a lot of * characters in their
+ // comments.
+
+ // The first character we get with newlines and trigraphs skipped to handle
+ // the degenerate /*/ case below correctly if the * has an escaped newline
+ // after it.
+ unsigned CharSize;
+ unsigned char C = getCharAndSize(CurPtr, CharSize);
+ CurPtr += CharSize;
+ if (C == 0 && CurPtr == BufferEnd+1) {
+ Diag(BufferPtr, diag::err_unterminated_block_comment);
+ BufferPtr = CurPtr-1;
+ return true;
+ }
+
+ // Check to see if the first character after the '/*' is another /. If so,
+ // then this slash does not end the block comment, it is part of it.
+ if (C == '/')
+ C = *CurPtr++;
+
+ while (1) {
+ // Skip over all non-interesting characters until we find end of buffer or a
+ // (probably ending) '/' character.
+ if (CurPtr + 24 < BufferEnd) {
+ // While not aligned to a 16-byte boundary.
+ while (C != '/' && ((intptr_t)CurPtr & 0x0F) != 0)
+ C = *CurPtr++;
+
+ if (C == '/') goto FoundSlash;
+
+#ifdef __SSE2__
+ __m128i Slashes = _mm_set_epi8('/', '/', '/', '/', '/', '/', '/', '/',
+ '/', '/', '/', '/', '/', '/', '/', '/');
+ while (CurPtr+16 <= BufferEnd &&
+ _mm_movemask_epi8(_mm_cmpeq_epi8(*(__m128i*)CurPtr, Slashes)) == 0)
+ CurPtr += 16;
+#elif __ALTIVEC__
+ __vector unsigned char Slashes = {
+ '/', '/', '/', '/', '/', '/', '/', '/',
+ '/', '/', '/', '/', '/', '/', '/', '/'
+ };
+ while (CurPtr+16 <= BufferEnd &&
+ !vec_any_eq(*(vector unsigned char*)CurPtr, Slashes))
+ CurPtr += 16;
+#else
+ // Scan for '/' quickly. Many block comments are very large.
+ while (CurPtr[0] != '/' &&
+ CurPtr[1] != '/' &&
+ CurPtr[2] != '/' &&
+ CurPtr[3] != '/' &&
+ CurPtr+4 < BufferEnd) {
+ CurPtr += 4;
+ }
+#endif
+
+ // It has to be one of the bytes scanned, increment to it and read one.
+ C = *CurPtr++;
+ }
+
+ // Loop to scan the remainder.
+ while (C != '/' && C != '\0')
+ C = *CurPtr++;
+
+ FoundSlash:
+ if (C == '/') {
+ if (CurPtr[-2] == '*') // We found the final */. We're done!
+ break;
+
+ if ((CurPtr[-2] == '\n' || CurPtr[-2] == '\r')) {
+ if (isEndOfBlockCommentWithEscapedNewLine(CurPtr-2, this)) {
+ // We found the final */, though it had an escaped newline between the
+ // * and /. We're done!
+ break;
+ }
+ }
+ if (CurPtr[0] == '*' && CurPtr[1] != '/') {
+ // If this is a /* inside of the comment, emit a warning. Don't do this
+ // if this is a /*/, which will end the comment. This misses cases with
+ // embedded escaped newlines, but oh well.
+ Diag(CurPtr-1, diag::nested_block_comment);
+ }
+ } else if (C == 0 && CurPtr == BufferEnd+1) {
+ Diag(BufferPtr, diag::err_unterminated_block_comment);
+ // Note: the user probably forgot a */. We could continue immediately
+ // after the /*, but this would involve lexing a lot of what really is the
+ // comment, which surely would confuse the parser.
+ BufferPtr = CurPtr-1;
+ return true;
+ }
+ C = *CurPtr++;
+ }
+
+ // If we are returning comments as tokens, return this comment as a token.
+ if (KeepCommentMode) {
+ Result.setKind(tok::comment);
+ FormTokenWithChars(Result, CurPtr);
+ return false;
+ }
+
+ // It is common for the tokens immediately after a /**/ comment to be
+ // whitespace. Instead of going through the big switch, handle it
+ // efficiently now.
+ if (isHorizontalWhitespace(*CurPtr)) {
+ Result.setFlag(Token::LeadingSpace);
+ SkipWhitespace(Result, CurPtr+1);
+ return true;
+ }
+
+ // Otherwise, just return so that the next character will be lexed as a token.
+ BufferPtr = CurPtr;
+ Result.setFlag(Token::LeadingSpace);
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// Primary Lexing Entry Points
+//===----------------------------------------------------------------------===//
+
+/// LexIncludeFilename - After the preprocessor has parsed a #include, lex and
+/// (potentially) macro expand the filename.
+void Lexer::LexIncludeFilename(Token &FilenameTok) {
+ assert(ParsingPreprocessorDirective &&
+ ParsingFilename == false &&
+ "Must be in a preprocessing directive!");
+
+ // We are now parsing a filename!
+ ParsingFilename = true;
+
+ // Lex the filename.
+ Lex(FilenameTok);
+
+ // We should have obtained the filename now.
+ ParsingFilename = false;
+
+ // No filename?
+ if (FilenameTok.is(tok::eom))
+ Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename);
+}
+
+/// ReadToEndOfLine - Read the rest of the current preprocessor line as an
+/// uninterpreted string. This switches the lexer out of directive mode.
+std::string Lexer::ReadToEndOfLine() {
+ assert(ParsingPreprocessorDirective && ParsingFilename == false &&
+ "Must be in a preprocessing directive!");
+ std::string Result;
+ Token Tmp;
+
+ // CurPtr - Cache BufferPtr in an automatic variable.
+ const char *CurPtr = BufferPtr;
+ while (1) {
+ char Char = getAndAdvanceChar(CurPtr, Tmp);
+ switch (Char) {
+ default:
+ Result += Char;
+ break;
+ case 0: // Null.
+ // Found end of file?
+ if (CurPtr-1 != BufferEnd) {
+ // Nope, normal character, continue.
+ Result += Char;
+ break;
+ }
+ // FALL THROUGH.
+ case '\r':
+ case '\n':
+ // Okay, we found the end of the line. First, back up past the \0, \r, \n.
+ assert(CurPtr[-1] == Char && "Trigraphs for newline?");
+ BufferPtr = CurPtr-1;
+
+ // Next, lex the character, which should handle the EOM transition.
+ Lex(Tmp);
+ assert(Tmp.is(tok::eom) && "Unexpected token!");
+
+ // Finally, we're done, return the string we found.
+ return Result;
+ }
+ }
+}
+
+/// LexEndOfFile - CurPtr points to the end of this file. Handle this
+/// condition, reporting diagnostics and handling other edge cases as required.
+/// This returns true if Result contains a token, false if PP.Lex should be
+/// called again.
+bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) {
+ // If we hit the end of the file while parsing a preprocessor directive,
+ // end the preprocessor directive first. The next token returned will
+ // then be the end of file.
+ if (ParsingPreprocessorDirective) {
+ // Done parsing the "line".
+ ParsingPreprocessorDirective = false;
+ Result.setKind(tok::eom);
+ // Update the location of token as well as BufferPtr.
+ FormTokenWithChars(Result, CurPtr);
+
+ // Restore comment saving mode, in case it was disabled for directive.
+ KeepCommentMode = PP->getCommentRetentionState();
+ return true; // Have a token.
+ }
+
+ // If we are in raw mode, return this event as an EOF token. Let the caller
+ // that put us in raw mode handle the event.
+ if (LexingRawMode) {
+ Result.startToken();
+ BufferPtr = BufferEnd;
+ FormTokenWithChars(Result, BufferEnd);
+ Result.setKind(tok::eof);
+ return true;
+ }
+
+ // Otherwise, issue diagnostics for unterminated #if and missing newline.
+
+ // If we are in a #if directive, emit an error.
+ while (!ConditionalStack.empty()) {
+ Diag(ConditionalStack.back().IfLoc, diag::err_pp_unterminated_conditional);
+ ConditionalStack.pop_back();
+ }
+
+ // If the file was empty or didn't end in a newline, issue a pedwarn.
+ if (CurPtr[-1] != '\n' && CurPtr[-1] != '\r')
+ Diag(BufferEnd, diag::ext_no_newline_eof);
+
+ BufferPtr = CurPtr;
+
+ // Finally, let the preprocessor handle this.
+ return PP->HandleEndOfFile(Result);
+}
+
+/// isNextPPTokenLParen - Return 1 if the next unexpanded token lexed from
+/// the specified lexer will return a tok::l_paren token, 0 if it is something
+/// else and 2 if there are no more tokens in the buffer controlled by the
+/// lexer.
+unsigned Lexer::isNextPPTokenLParen() {
+ assert(!LexingRawMode && "How can we expand a macro from a skipping buffer?");
+
+ // Switch to 'skipping' mode. This will ensure that we can lex a token
+ // without emitting diagnostics, disables macro expansion, and will cause EOF
+ // to return an EOF token instead of popping the include stack.
+ LexingRawMode = true;
+
+ // Save state that can be changed while lexing so that we can restore it.
+ const char *TmpBufferPtr = BufferPtr;
+
+ Token Tok;
+ Tok.startToken();
+ LexTokenInternal(Tok);
+
+ // Restore state that may have changed.
+ BufferPtr = TmpBufferPtr;
+
+ // Restore the lexer back to non-skipping mode.
+ LexingRawMode = false;
+
+ if (Tok.is(tok::eof))
+ return 2;
+ return Tok.is(tok::l_paren);
+}
+
+
+/// LexTokenInternal - This implements a simple C family lexer. It is an
+/// extremely performance critical piece of code. This assumes that the buffer
+/// has a null character at the end of the file. Return true if an error
+/// occurred and compilation should terminate, false if normal. This returns a
+/// preprocessing token, not a normal token, as such, it is an internal
+/// interface. It assumes that the Flags of result have been cleared before
+/// calling this.
+void Lexer::LexTokenInternal(Token &Result) {
+LexNextToken:
+ // New token, can't need cleaning yet.
+ Result.clearFlag(Token::NeedsCleaning);
+ Result.setIdentifierInfo(0);
+
+ // CurPtr - Cache BufferPtr in an automatic variable.
+ const char *CurPtr = BufferPtr;
+
+ // Small amounts of horizontal whitespace is very common between tokens.
+ if ((*CurPtr == ' ') || (*CurPtr == '\t')) {
+ ++CurPtr;
+ while ((*CurPtr == ' ') || (*CurPtr == '\t'))
+ ++CurPtr;
+ BufferPtr = CurPtr;
+ Result.setFlag(Token::LeadingSpace);
+ }
+
+ unsigned SizeTmp, SizeTmp2; // Temporaries for use in cases below.
+
+ // Read a character, advancing over it.
+ char Char = getAndAdvanceChar(CurPtr, Result);
+ switch (Char) {
+ case 0: // Null.
+ // Found end of file?
+ if (CurPtr-1 == BufferEnd) {
+ // Read the PP instance variable into an automatic variable, because
+ // LexEndOfFile will often delete 'this'.
+ Preprocessor *PPCache = PP;
+ if (LexEndOfFile(Result, CurPtr-1)) // Retreat back into the file.
+ return; // Got a token to return.
+ assert(PPCache && "Raw buffer::LexEndOfFile should return a token");
+ return PPCache->Lex(Result);
+ }
+
+ Diag(CurPtr-1, diag::null_in_file);
+ Result.setFlag(Token::LeadingSpace);
+ SkipWhitespace(Result, CurPtr);
+ goto LexNextToken; // GCC isn't tail call eliminating.
+ case '\n':
+ case '\r':
+ // If we are inside a preprocessor directive and we see the end of line,
+ // we know we are done with the directive, so return an EOM token.
+ if (ParsingPreprocessorDirective) {
+ // Done parsing the "line".
+ ParsingPreprocessorDirective = false;
+
+ // Restore comment saving mode, in case it was disabled for directive.
+ KeepCommentMode = PP->getCommentRetentionState();
+
+ // Since we consumed a newline, we are back at the start of a line.
+ IsAtStartOfLine = true;
+
+ Result.setKind(tok::eom);
+ break;
+ }
+ // The returned token is at the start of the line.
+ Result.setFlag(Token::StartOfLine);
+ // No leading whitespace seen so far.
+ Result.clearFlag(Token::LeadingSpace);
+ SkipWhitespace(Result, CurPtr);
+ goto LexNextToken; // GCC isn't tail call eliminating.
+ case ' ':
+ case '\t':
+ case '\f':
+ case '\v':
+ SkipHorizontalWhitespace:
+ Result.setFlag(Token::LeadingSpace);
+ SkipWhitespace(Result, CurPtr);
+
+ SkipIgnoredUnits:
+ CurPtr = BufferPtr;
+
+ // If the next token is obviously a // or /* */ comment, skip it efficiently
+ // too (without going through the big switch stmt).
+ if (CurPtr[0] == '/' && CurPtr[1] == '/' && !KeepCommentMode) {
+ SkipBCPLComment(Result, CurPtr+2);
+ goto SkipIgnoredUnits;
+ } else if (CurPtr[0] == '/' && CurPtr[1] == '*' && !KeepCommentMode) {
+ SkipBlockComment(Result, CurPtr+2);
+ goto SkipIgnoredUnits;
+ } else if (isHorizontalWhitespace(*CurPtr)) {
+ goto SkipHorizontalWhitespace;
+ }
+ goto LexNextToken; // GCC isn't tail call eliminating.
+
+ // C99 6.4.4.1: Integer Constants.
+ // C99 6.4.4.2: Floating Constants.
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ // Notify MIOpt that we read a non-whitespace/non-comment token.
+ MIOpt.ReadToken();
+ return LexNumericConstant(Result, CurPtr);
+
+ case 'L': // Identifier (Loony) or wide literal (L'x' or L"xyz").
+ // Notify MIOpt that we read a non-whitespace/non-comment token.
+ MIOpt.ReadToken();
+ Char = getCharAndSize(CurPtr, SizeTmp);
+
+ // Wide string literal.
+ if (Char == '"')
+ return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),
+ true);
+
+ // Wide character constant.
+ if (Char == '\'')
+ return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result));
+ // FALL THROUGH, treating L like the start of an identifier.
+
+ // C99 6.4.2: Identifiers.
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
+ case 'H': case 'I': case 'J': case 'K': /*'L'*/case 'M': case 'N':
+ case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
+ case 'V': case 'W': case 'X': case 'Y': case 'Z':
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
+ case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
+ case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
+ case 'v': case 'w': case 'x': case 'y': case 'z':
+ case '_':
+ // Notify MIOpt that we read a non-whitespace/non-comment token.
+ MIOpt.ReadToken();
+ return LexIdentifier(Result, CurPtr);
+
+ case '$': // $ in identifiers.
+ if (Features.DollarIdents) {
+ Diag(CurPtr-1, diag::ext_dollar_in_identifier);
+ // Notify MIOpt that we read a non-whitespace/non-comment token.
+ MIOpt.ReadToken();
+ return LexIdentifier(Result, CurPtr);
+ }
+
+ Result.setKind(tok::unknown);
+ break;
+
+ // C99 6.4.4: Character Constants.
+ case '\'':
+ // Notify MIOpt that we read a non-whitespace/non-comment token.
+ MIOpt.ReadToken();
+ return LexCharConstant(Result, CurPtr);
+
+ // C99 6.4.5: String Literals.
+ case '"':
+ // Notify MIOpt that we read a non-whitespace/non-comment token.
+ MIOpt.ReadToken();
+ return LexStringLiteral(Result, CurPtr, false);
+
+ // C99 6.4.6: Punctuators.
+ case '?':
+ Result.setKind(tok::question);
+ break;
+ case '[':
+ Result.setKind(tok::l_square);
+ break;
+ case ']':
+ Result.setKind(tok::r_square);
+ break;
+ case '(':
+ Result.setKind(tok::l_paren);
+ break;
+ case ')':
+ Result.setKind(tok::r_paren);
+ break;
+ case '{':
+ Result.setKind(tok::l_brace);
+ break;
+ case '}':
+ Result.setKind(tok::r_brace);
+ break;
+ case '.':
+ Char = getCharAndSize(CurPtr, SizeTmp);
+ if (Char >= '0' && Char <= '9') {
+ // Notify MIOpt that we read a non-whitespace/non-comment token.
+ MIOpt.ReadToken();
+
+ return LexNumericConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result));
+ } else if (Features.CPlusPlus && Char == '*') {
+ Result.setKind(tok::periodstar);
+ CurPtr += SizeTmp;
+ } else if (Char == '.' &&
+ getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '.') {
+ Result.setKind(tok::ellipsis);
+ CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+ SizeTmp2, Result);
+ } else {
+ Result.setKind(tok::period);
+ }
+ break;
+ case '&':
+ Char = getCharAndSize(CurPtr, SizeTmp);
+ if (Char == '&') {
+ Result.setKind(tok::ampamp);
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ } else if (Char == '=') {
+ Result.setKind(tok::ampequal);
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ } else {
+ Result.setKind(tok::amp);
+ }
+ break;
+ case '*':
+ if (getCharAndSize(CurPtr, SizeTmp) == '=') {
+ Result.setKind(tok::starequal);
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ } else {
+ Result.setKind(tok::star);
+ }
+ break;
+ case '+':
+ Char = getCharAndSize(CurPtr, SizeTmp);
+ if (Char == '+') {
+ Result.setKind(tok::plusplus);
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ } else if (Char == '=') {
+ Result.setKind(tok::plusequal);
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ } else {
+ Result.setKind(tok::plus);
+ }
+ break;
+ case '-':
+ Char = getCharAndSize(CurPtr, SizeTmp);
+ if (Char == '-') {
+ Result.setKind(tok::minusminus);
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ } else if (Char == '>' && Features.CPlusPlus &&
+ getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '*') {
+ Result.setKind(tok::arrowstar); // C++ ->*
+ CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+ SizeTmp2, Result);
+ } else if (Char == '>') {
+ Result.setKind(tok::arrow);
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ } else if (Char == '=') {
+ Result.setKind(tok::minusequal);
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ } else {
+ Result.setKind(tok::minus);
+ }
+ break;
+ case '~':
+ Result.setKind(tok::tilde);
+ break;
+ case '!':
+ if (getCharAndSize(CurPtr, SizeTmp) == '=') {
+ Result.setKind(tok::exclaimequal);
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ } else {
+ Result.setKind(tok::exclaim);
+ }
+ break;
+ case '/':
+ // 6.4.9: Comments
+ Char = getCharAndSize(CurPtr, SizeTmp);
+ if (Char == '/') { // BCPL comment.
+ if (SkipBCPLComment(Result, ConsumeChar(CurPtr, SizeTmp, Result))) {
+ // It is common for the tokens immediately after a // comment to be
+ // whitespace (indentation for the next line). Instead of going through
+ // the big switch, handle it efficiently now.
+ goto SkipIgnoredUnits;
+ }
+ return; // KeepCommentMode
+ } else if (Char == '*') { // /**/ comment.
+ if (SkipBlockComment(Result, ConsumeChar(CurPtr, SizeTmp, Result)))
+ goto LexNextToken; // GCC isn't tail call eliminating.
+ return; // KeepCommentMode
+ } else if (Char == '=') {
+ Result.setKind(tok::slashequal);
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ } else {
+ Result.setKind(tok::slash);
+ }
+ break;
+ case '%':
+ Char = getCharAndSize(CurPtr, SizeTmp);
+ if (Char == '=') {
+ Result.setKind(tok::percentequal);
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ } else if (Features.Digraphs && Char == '>') {
+ Result.setKind(tok::r_brace); // '%>' -> '}'
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ } else if (Features.Digraphs && Char == ':') {
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ Char = getCharAndSize(CurPtr, SizeTmp);
+ if (Char == '%' && getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == ':') {
+ Result.setKind(tok::hashhash); // '%:%:' -> '##'
+ CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+ SizeTmp2, Result);
+ } else if (Char == '@' && Features.Microsoft) { // %:@ -> #@ -> Charize
+ Result.setKind(tok::hashat);
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ Diag(BufferPtr, diag::charize_microsoft_ext);
+ } else {
+ Result.setKind(tok::hash); // '%:' -> '#'
+
+ // We parsed a # character. If this occurs at the start of the line,
+ // it's actually the start of a preprocessing directive. Callback to
+ // the preprocessor to handle it.
+ // FIXME: -fpreprocessed mode??
+ if (Result.isAtStartOfLine() && !LexingRawMode) {
+ BufferPtr = CurPtr;
+ PP->HandleDirective(Result);
+
+ // As an optimization, if the preprocessor didn't switch lexers, tail
+ // recurse.
+ if (PP->isCurrentLexer(this)) {
+ // Start a new token. If this is a #include or something, the PP may
+ // want us starting at the beginning of the line again. If so, set
+ // the StartOfLine flag.
+ if (IsAtStartOfLine) {
+ Result.setFlag(Token::StartOfLine);
+ IsAtStartOfLine = false;
+ }
+ goto LexNextToken; // GCC isn't tail call eliminating.
+ }
+
+ return PP->Lex(Result);
+ }
+ }
+ } else {
+ Result.setKind(tok::percent);
+ }
+ break;
+ case '<':
+ Char = getCharAndSize(CurPtr, SizeTmp);
+ if (ParsingFilename) {
+ return LexAngledStringLiteral(Result, CurPtr+SizeTmp);
+ } else if (Char == '<' &&
+ getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '=') {
+ Result.setKind(tok::lesslessequal);
+ CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+ SizeTmp2, Result);
+ } else if (Char == '<') {
+ Result.setKind(tok::lessless);
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ } else if (Char == '=') {
+ Result.setKind(tok::lessequal);
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ } else if (Features.Digraphs && Char == ':') {
+ Result.setKind(tok::l_square); // '<:' -> '['
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ } else if (Features.Digraphs && Char == '%') {
+ Result.setKind(tok::l_brace); // '<%' -> '{'
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ } else {
+ Result.setKind(tok::less);
+ }
+ break;
+ case '>':
+ Char = getCharAndSize(CurPtr, SizeTmp);
+ if (Char == '=') {
+ Result.setKind(tok::greaterequal);
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ } else if (Char == '>' &&
+ getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '=') {
+ Result.setKind(tok::greatergreaterequal);
+ CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+ SizeTmp2, Result);
+ } else if (Char == '>') {
+ Result.setKind(tok::greatergreater);
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ } else {
+ Result.setKind(tok::greater);
+ }
+ break;
+ case '^':
+ Char = getCharAndSize(CurPtr, SizeTmp);
+ if (Char == '=') {
+ Result.setKind(tok::caretequal);
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ } else {
+ Result.setKind(tok::caret);
+ }
+ break;
+ case '|':
+ Char = getCharAndSize(CurPtr, SizeTmp);
+ if (Char == '=') {
+ Result.setKind(tok::pipeequal);
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ } else if (Char == '|') {
+ Result.setKind(tok::pipepipe);
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ } else {
+ Result.setKind(tok::pipe);
+ }
+ break;
+ case ':':
+ Char = getCharAndSize(CurPtr, SizeTmp);
+ if (Features.Digraphs && Char == '>') {
+ Result.setKind(tok::r_square); // ':>' -> ']'
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ } else if (Features.CPlusPlus && Char == ':') {
+ Result.setKind(tok::coloncolon);
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ } else {
+ Result.setKind(tok::colon);
+ }
+ break;
+ case ';':
+ Result.setKind(tok::semi);
+ break;
+ case '=':
+ Char = getCharAndSize(CurPtr, SizeTmp);
+ if (Char == '=') {
+ Result.setKind(tok::equalequal);
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ } else {
+ Result.setKind(tok::equal);
+ }
+ break;
+ case ',':
+ Result.setKind(tok::comma);
+ break;
+ case '#':
+ Char = getCharAndSize(CurPtr, SizeTmp);
+ if (Char == '#') {
+ Result.setKind(tok::hashhash);
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ } else if (Char == '@' && Features.Microsoft) { // #@ -> Charize
+ Result.setKind(tok::hashat);
+ Diag(BufferPtr, diag::charize_microsoft_ext);
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ } else {
+ Result.setKind(tok::hash);
+ // We parsed a # character. If this occurs at the start of the line,
+ // it's actually the start of a preprocessing directive. Callback to
+ // the preprocessor to handle it.
+ // FIXME: -fpreprocessed mode??
+ if (Result.isAtStartOfLine() && !LexingRawMode) {
+ BufferPtr = CurPtr;
+ PP->HandleDirective(Result);
+
+ // As an optimization, if the preprocessor didn't switch lexers, tail
+ // recurse.
+ if (PP->isCurrentLexer(this)) {
+ // Start a new token. If this is a #include or something, the PP may
+ // want us starting at the beginning of the line again. If so, set
+ // the StartOfLine flag.
+ if (IsAtStartOfLine) {
+ Result.setFlag(Token::StartOfLine);
+ IsAtStartOfLine = false;
+ }
+ goto LexNextToken; // GCC isn't tail call eliminating.
+ }
+ return PP->Lex(Result);
+ }
+ }
+ break;
+
+ case '@':
+ // Objective C support.
+ if (CurPtr[-1] == '@' && Features.ObjC1)
+ Result.setKind(tok::at);
+ else
+ Result.setKind(tok::unknown);
+ break;
+
+ case '\\':
+ // FIXME: UCN's.
+ // FALL THROUGH.
+ default:
+ Result.setKind(tok::unknown);
+ break;
+ }
+
+ // Notify MIOpt that we read a non-whitespace/non-comment token.
+ MIOpt.ReadToken();
+
+ // Update the location of token as well as BufferPtr.
+ FormTokenWithChars(Result, CurPtr);
+}
diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp
new file mode 100644
index 00000000000..aa0b831af90
--- /dev/null
+++ b/clang/lib/Lex/LiteralSupport.cpp
@@ -0,0 +1,691 @@
+//===--- LiteralSupport.cpp - Code to parse and process literals ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the NumericLiteralParser, CharLiteralParser, and
+// StringLiteralParser interfaces.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/LiteralSupport.h"
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/TargetInfo.h"
+#include "llvm/ADT/StringExtras.h"
+using namespace clang;
+
+/// HexDigitValue - Return the value of the specified hex digit, or -1 if it's
+/// not valid.
+static int HexDigitValue(char C) {
+ if (C >= '0' && C <= '9') return C-'0';
+ if (C >= 'a' && C <= 'f') return C-'a'+10;
+ if (C >= 'A' && C <= 'F') return C-'A'+10;
+ return -1;
+}
+
+/// ProcessCharEscape - Parse a standard C escape sequence, which can occur in
+/// either a character or a string literal.
+static unsigned ProcessCharEscape(const char *&ThisTokBuf,
+ const char *ThisTokEnd, bool &HadError,
+ SourceLocation Loc, bool IsWide,
+ Preprocessor &PP) {
+ // Skip the '\' char.
+ ++ThisTokBuf;
+
+ // We know that this character can't be off the end of the buffer, because
+ // that would have been \", which would not have been the end of string.
+ unsigned ResultChar = *ThisTokBuf++;
+ switch (ResultChar) {
+ // These map to themselves.
+ case '\\': case '\'': case '"': case '?': break;
+
+ // These have fixed mappings.
+ case 'a':
+ // TODO: K&R: the meaning of '\\a' is different in traditional C
+ ResultChar = 7;
+ break;
+ case 'b':
+ ResultChar = 8;
+ break;
+ case 'e':
+ PP.Diag(Loc, diag::ext_nonstandard_escape, "e");
+ ResultChar = 27;
+ break;
+ case 'f':
+ ResultChar = 12;
+ break;
+ case 'n':
+ ResultChar = 10;
+ break;
+ case 'r':
+ ResultChar = 13;
+ break;
+ case 't':
+ ResultChar = 9;
+ break;
+ case 'v':
+ ResultChar = 11;
+ break;
+
+ //case 'u': case 'U': // FIXME: UCNs.
+ case 'x': { // Hex escape.
+ ResultChar = 0;
+ if (ThisTokBuf == ThisTokEnd || !isxdigit(*ThisTokBuf)) {
+ PP.Diag(Loc, diag::err_hex_escape_no_digits);
+ HadError = 1;
+ break;
+ }
+
+ // Hex escapes are a maximal series of hex digits.
+ bool Overflow = false;
+ for (; ThisTokBuf != ThisTokEnd; ++ThisTokBuf) {
+ int CharVal = HexDigitValue(ThisTokBuf[0]);
+ if (CharVal == -1) break;
+ Overflow |= (ResultChar & 0xF0000000) ? true : false; // About to shift out a digit?
+ ResultChar <<= 4;
+ ResultChar |= CharVal;
+ }
+
+ // See if any bits will be truncated when evaluated as a character.
+ unsigned CharWidth = PP.getTargetInfo().getCharWidth(IsWide);
+
+ if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
+ Overflow = true;
+ ResultChar &= ~0U >> (32-CharWidth);
+ }
+
+ // Check for overflow.
+ if (Overflow) // Too many digits to fit in
+ PP.Diag(Loc, diag::warn_hex_escape_too_large);
+ break;
+ }
+ case '0': case '1': case '2': case '3':
+ case '4': case '5': case '6': case '7': {
+ // Octal escapes.
+ --ThisTokBuf;
+ ResultChar = 0;
+
+ // Octal escapes are a series of octal digits with maximum length 3.
+ // "\0123" is a two digit sequence equal to "\012" "3".
+ unsigned NumDigits = 0;
+ do {
+ ResultChar <<= 3;
+ ResultChar |= *ThisTokBuf++ - '0';
+ ++NumDigits;
+ } while (ThisTokBuf != ThisTokEnd && NumDigits < 3 &&
+ ThisTokBuf[0] >= '0' && ThisTokBuf[0] <= '7');
+
+ // Check for overflow. Reject '\777', but not L'\777'.
+ unsigned CharWidth = PP.getTargetInfo().getCharWidth(IsWide);
+
+ if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
+ PP.Diag(Loc, diag::warn_octal_escape_too_large);
+ ResultChar &= ~0U >> (32-CharWidth);
+ }
+ break;
+ }
+
+ // Otherwise, these are not valid escapes.
+ case '(': case '{': case '[': case '%':
+ // GCC accepts these as extensions. We warn about them as such though.
+ if (!PP.getLangOptions().NoExtensions) {
+ PP.Diag(Loc, diag::ext_nonstandard_escape,
+ std::string()+(char)ResultChar);
+ break;
+ }
+ // FALL THROUGH.
+ default:
+ if (isgraph(ThisTokBuf[0])) {
+ PP.Diag(Loc, diag::ext_unknown_escape, std::string()+(char)ResultChar);
+ } else {
+ PP.Diag(Loc, diag::ext_unknown_escape, "x"+llvm::utohexstr(ResultChar));
+ }
+ break;
+ }
+
+ return ResultChar;
+}
+
+
+
+
+/// integer-constant: [C99 6.4.4.1]
+/// decimal-constant integer-suffix
+/// octal-constant integer-suffix
+/// hexadecimal-constant integer-suffix
+/// decimal-constant:
+/// nonzero-digit
+/// decimal-constant digit
+/// octal-constant:
+/// 0
+/// octal-constant octal-digit
+/// hexadecimal-constant:
+/// hexadecimal-prefix hexadecimal-digit
+/// hexadecimal-constant hexadecimal-digit
+/// hexadecimal-prefix: one of
+/// 0x 0X
+/// integer-suffix:
+/// unsigned-suffix [long-suffix]
+/// unsigned-suffix [long-long-suffix]
+/// long-suffix [unsigned-suffix]
+/// long-long-suffix [unsigned-sufix]
+/// nonzero-digit:
+/// 1 2 3 4 5 6 7 8 9
+/// octal-digit:
+/// 0 1 2 3 4 5 6 7
+/// hexadecimal-digit:
+/// 0 1 2 3 4 5 6 7 8 9
+/// a b c d e f
+/// A B C D E F
+/// unsigned-suffix: one of
+/// u U
+/// long-suffix: one of
+/// l L
+/// long-long-suffix: one of
+/// ll LL
+///
+/// floating-constant: [C99 6.4.4.2]
+/// TODO: add rules...
+///
+
+NumericLiteralParser::
+NumericLiteralParser(const char *begin, const char *end,
+ SourceLocation TokLoc, Preprocessor &pp)
+ : PP(pp), ThisTokBegin(begin), ThisTokEnd(end) {
+ s = DigitsBegin = begin;
+ saw_exponent = false;
+ saw_period = false;
+ isLong = false;
+ isUnsigned = false;
+ isLongLong = false;
+ isFloat = false;
+ isImaginary = false;
+ hadError = false;
+
+ if (*s == '0') { // parse radix
+ s++;
+ if ((*s == 'x' || *s == 'X') && (isxdigit(s[1]) || s[1] == '.')) {
+ s++;
+ radix = 16;
+ DigitsBegin = s;
+ s = SkipHexDigits(s);
+ if (s == ThisTokEnd) {
+ // Done.
+ } else if (*s == '.') {
+ s++;
+ saw_period = true;
+ s = SkipHexDigits(s);
+ }
+ // A binary exponent can appear with or with a '.'. If dotted, the
+ // binary exponent is required.
+ if ((*s == 'p' || *s == 'P') && PP.getLangOptions().HexFloats) {
+ s++;
+ saw_exponent = true;
+ if (*s == '+' || *s == '-') s++; // sign
+ const char *first_non_digit = SkipDigits(s);
+ if (first_non_digit == s) {
+ Diag(TokLoc, diag::err_exponent_has_no_digits);
+ return;
+ } else {
+ s = first_non_digit;
+ }
+ } else if (saw_period) {
+ Diag(TokLoc, diag::err_hexconstant_requires_exponent);
+ return;
+ }
+ } else if (*s == 'b' || *s == 'B') {
+ // 0b101010 is a GCC extension.
+ ++s;
+ radix = 2;
+ DigitsBegin = s;
+ s = SkipBinaryDigits(s);
+ if (s == ThisTokEnd) {
+ // Done.
+ } else if (isxdigit(*s)) {
+ Diag(TokLoc, diag::err_invalid_binary_digit, std::string(s, s+1));
+ return;
+ }
+ PP.Diag(TokLoc, diag::ext_binary_literal);
+ } else {
+ // For now, the radix is set to 8. If we discover that we have a
+ // floating point constant, the radix will change to 10. Octal floating
+ // point constants are not permitted (only decimal and hexadecimal).
+ radix = 8;
+ DigitsBegin = s;
+ s = SkipOctalDigits(s);
+ if (s == ThisTokEnd) {
+ // Done.
+ } else if (isxdigit(*s) && !(*s == 'e' || *s == 'E')) {
+ TokLoc = PP.AdvanceToTokenCharacter(TokLoc, s-begin);
+ Diag(TokLoc, diag::err_invalid_octal_digit, std::string(s, s+1));
+ return;
+ } else if (*s == '.') {
+ s++;
+ radix = 10;
+ saw_period = true;
+ s = SkipDigits(s);
+ }
+ if (*s == 'e' || *s == 'E') { // exponent
+ s++;
+ radix = 10;
+ saw_exponent = true;
+ if (*s == '+' || *s == '-') s++; // sign
+ const char *first_non_digit = SkipDigits(s);
+ if (first_non_digit == s) {
+ Diag(TokLoc, diag::err_exponent_has_no_digits);
+ return;
+ } else {
+ s = first_non_digit;
+ }
+ }
+ }
+ } else { // the first digit is non-zero
+ radix = 10;
+ s = SkipDigits(s);
+ if (s == ThisTokEnd) {
+ // Done.
+ } else if (isxdigit(*s) && !(*s == 'e' || *s == 'E')) {
+ Diag(TokLoc, diag::err_invalid_decimal_digit, std::string(s, s+1));
+ return;
+ } else if (*s == '.') {
+ s++;
+ saw_period = true;
+ s = SkipDigits(s);
+ }
+ if (*s == 'e' || *s == 'E') { // exponent
+ s++;
+ saw_exponent = true;
+ if (*s == '+' || *s == '-') s++; // sign
+ const char *first_non_digit = SkipDigits(s);
+ if (first_non_digit == s) {
+ Diag(TokLoc, diag::err_exponent_has_no_digits);
+ return;
+ } else {
+ s = first_non_digit;
+ }
+ }
+ }
+
+ SuffixBegin = s;
+
+ // Parse the suffix. At this point we can classify whether we have an FP or
+ // integer constant.
+ bool isFPConstant = isFloatingLiteral();
+
+ // Loop over all of the characters of the suffix. If we see something bad,
+ // we break out of the loop.
+ for (; s != ThisTokEnd; ++s) {
+ switch (*s) {
+ case 'f': // FP Suffix for "float"
+ case 'F':
+ if (!isFPConstant) break; // Error for integer constant.
+ if (isFloat || isLong) break; // FF, LF invalid.
+ isFloat = true;
+ continue; // Success.
+ case 'u':
+ case 'U':
+ if (isFPConstant) break; // Error for floating constant.
+ if (isUnsigned) break; // Cannot be repeated.
+ isUnsigned = true;
+ continue; // Success.
+ case 'l':
+ case 'L':
+ if (isLong || isLongLong) break; // Cannot be repeated.
+ if (isFloat) break; // LF invalid.
+
+ // Check for long long. The L's need to be adjacent and the same case.
+ if (s+1 != ThisTokEnd && s[1] == s[0]) {
+ if (isFPConstant) break; // long long invalid for floats.
+ isLongLong = true;
+ ++s; // Eat both of them.
+ } else {
+ isLong = true;
+ }
+ continue; // Success.
+ case 'i':
+ case 'I':
+ case 'j':
+ case 'J':
+ if (isImaginary) break; // Cannot be repeated.
+ PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-begin),
+ diag::ext_imaginary_constant);
+ isImaginary = true;
+ continue; // Success.
+ }
+ // If we reached here, there was an error.
+ break;
+ }
+
+ // Report an error if there are any.
+ if (s != ThisTokEnd) {
+ TokLoc = PP.AdvanceToTokenCharacter(TokLoc, s-begin);
+ Diag(TokLoc, isFPConstant ? diag::err_invalid_suffix_float_constant :
+ diag::err_invalid_suffix_integer_constant,
+ std::string(SuffixBegin, ThisTokEnd));
+ return;
+ }
+}
+
+/// GetIntegerValue - Convert this numeric literal value to an APInt that
+/// matches Val's input width. If there is an overflow, set Val to the low bits
+/// of the result and return true. Otherwise, return false.
+bool NumericLiteralParser::GetIntegerValue(llvm::APInt &Val) {
+ Val = 0;
+ s = DigitsBegin;
+
+ llvm::APInt RadixVal(Val.getBitWidth(), radix);
+ llvm::APInt CharVal(Val.getBitWidth(), 0);
+ llvm::APInt OldVal = Val;
+
+ bool OverflowOccurred = false;
+ while (s < SuffixBegin) {
+ unsigned C = HexDigitValue(*s++);
+
+ // If this letter is out of bound for this radix, reject it.
+ assert(C < radix && "NumericLiteralParser ctor should have rejected this");
+
+ CharVal = C;
+
+ // Add the digit to the value in the appropriate radix. If adding in digits
+ // made the value smaller, then this overflowed.
+ OldVal = Val;
+
+ // Multiply by radix, did overflow occur on the multiply?
+ Val *= RadixVal;
+ OverflowOccurred |= Val.udiv(RadixVal) != OldVal;
+
+ OldVal = Val;
+ // Add value, did overflow occur on the value?
+ Val += CharVal;
+ OverflowOccurred |= Val.ult(OldVal);
+ OverflowOccurred |= Val.ult(CharVal);
+ }
+ return OverflowOccurred;
+}
+
+llvm::APFloat NumericLiteralParser::
+GetFloatValue(const llvm::fltSemantics &Format, bool* isExact) {
+ using llvm::APFloat;
+
+ llvm::SmallVector<char,256> floatChars;
+ for (unsigned i = 0, n = ThisTokEnd-ThisTokBegin; i != n; ++i)
+ floatChars.push_back(ThisTokBegin[i]);
+
+ floatChars.push_back('\0');
+
+ APFloat V (Format, APFloat::fcZero, false);
+ APFloat::opStatus status;
+
+ status = V.convertFromString(&floatChars[0],APFloat::rmNearestTiesToEven);
+
+ if (isExact)
+ *isExact = status == APFloat::opOK;
+
+ return V;
+}
+
+void NumericLiteralParser::Diag(SourceLocation Loc, unsigned DiagID,
+ const std::string &M) {
+ PP.Diag(Loc, DiagID, M);
+ hadError = true;
+}
+
+
+CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
+ SourceLocation Loc, Preprocessor &PP) {
+ // At this point we know that the character matches the regex "L?'.*'".
+ HadError = false;
+ Value = 0;
+
+ // Determine if this is a wide character.
+ IsWide = begin[0] == 'L';
+ if (IsWide) ++begin;
+
+ // Skip over the entry quote.
+ assert(begin[0] == '\'' && "Invalid token lexed");
+ ++begin;
+
+ // FIXME: This assumes that 'int' is 32-bits in overflow calculation, and the
+ // size of "value".
+ assert(PP.getTargetInfo().getIntWidth() == 32 &&
+ "Assumes sizeof(int) == 4 for now");
+ // FIXME: This assumes that wchar_t is 32-bits for now.
+ assert(PP.getTargetInfo().getWCharWidth() == 32 &&
+ "Assumes sizeof(wchar_t) == 4 for now");
+ // FIXME: This extensively assumes that 'char' is 8-bits.
+ assert(PP.getTargetInfo().getCharWidth() == 8 &&
+ "Assumes char is 8 bits");
+
+ bool isFirstChar = true;
+ bool isMultiChar = false;
+ while (begin[0] != '\'') {
+ unsigned ResultChar;
+ if (begin[0] != '\\') // If this is a normal character, consume it.
+ ResultChar = *begin++;
+ else // Otherwise, this is an escape character.
+ ResultChar = ProcessCharEscape(begin, end, HadError, Loc, IsWide, PP);
+
+ // If this is a multi-character constant (e.g. 'abc'), handle it. These are
+ // implementation defined (C99 6.4.4.4p10).
+ if (!isFirstChar) {
+ // If this is the second character being processed, do special handling.
+ if (!isMultiChar) {
+ isMultiChar = true;
+
+ // Warn about discarding the top bits for multi-char wide-character
+ // constants (L'abcd').
+ if (IsWide)
+ PP.Diag(Loc, diag::warn_extraneous_wide_char_constant);
+ }
+
+ if (IsWide) {
+ // Emulate GCC's (unintentional?) behavior: L'ab' -> L'b'.
+ Value = 0;
+ } else {
+ // Narrow character literals act as though their value is concatenated
+ // in this implementation.
+ if (((Value << 8) >> 8) != Value)
+ PP.Diag(Loc, diag::warn_char_constant_too_large);
+ Value <<= 8;
+ }
+ }
+
+ Value += ResultChar;
+ isFirstChar = false;
+ }
+
+ // If this is a single narrow character, sign extend it (e.g. '\xFF' is "-1")
+ // if 'char' is signed for this target (C99 6.4.4.4p10). Note that multiple
+ // character constants are not sign extended in the this implementation:
+ // '\xFF\xFF' = 65536 and '\x0\xFF' = 255, which matches GCC.
+ if (!IsWide && !isMultiChar && (Value & 128) &&
+ PP.getTargetInfo().isCharSigned())
+ Value = (signed char)Value;
+}
+
+
+/// string-literal: [C99 6.4.5]
+/// " [s-char-sequence] "
+/// L" [s-char-sequence] "
+/// s-char-sequence:
+/// s-char
+/// s-char-sequence s-char
+/// s-char:
+/// any source character except the double quote ",
+/// backslash \, or newline character
+/// escape-character
+/// universal-character-name
+/// escape-character: [C99 6.4.4.4]
+/// \ escape-code
+/// universal-character-name
+/// escape-code:
+/// character-escape-code
+/// octal-escape-code
+/// hex-escape-code
+/// character-escape-code: one of
+/// n t b r f v a
+/// \ ' " ?
+/// octal-escape-code:
+/// octal-digit
+/// octal-digit octal-digit
+/// octal-digit octal-digit octal-digit
+/// hex-escape-code:
+/// x hex-digit
+/// hex-escape-code hex-digit
+/// universal-character-name:
+/// \u hex-quad
+/// \U hex-quad hex-quad
+/// hex-quad:
+/// hex-digit hex-digit hex-digit hex-digit
+///
+StringLiteralParser::
+StringLiteralParser(const Token *StringToks, unsigned NumStringToks,
+ Preprocessor &pp, TargetInfo &t)
+ : PP(pp), Target(t) {
+ // Scan all of the string portions, remember the max individual token length,
+ // computing a bound on the concatenated string length, and see whether any
+ // piece is a wide-string. If any of the string portions is a wide-string
+ // literal, the result is a wide-string literal [C99 6.4.5p4].
+ MaxTokenLength = StringToks[0].getLength();
+ SizeBound = StringToks[0].getLength()-2; // -2 for "".
+ AnyWide = StringToks[0].is(tok::wide_string_literal);
+
+ hadError = false;
+
+ // Implement Translation Phase #6: concatenation of string literals
+ /// (C99 5.1.1.2p1). The common case is only one string fragment.
+ for (unsigned i = 1; i != NumStringToks; ++i) {
+ // The string could be shorter than this if it needs cleaning, but this is a
+ // reasonable bound, which is all we need.
+ SizeBound += StringToks[i].getLength()-2; // -2 for "".
+
+ // Remember maximum string piece length.
+ if (StringToks[i].getLength() > MaxTokenLength)
+ MaxTokenLength = StringToks[i].getLength();
+
+ // Remember if we see any wide strings.
+ AnyWide |= StringToks[i].is(tok::wide_string_literal);
+ }
+
+
+ // Include space for the null terminator.
+ ++SizeBound;
+
+ // TODO: K&R warning: "traditional C rejects string constant concatenation"
+
+ // Get the width in bytes of wchar_t. If no wchar_t strings are used, do not
+ // query the target. As such, wchar_tByteWidth is only valid if AnyWide=true.
+ wchar_tByteWidth = ~0U;
+ if (AnyWide) {
+ wchar_tByteWidth = Target.getWCharWidth();
+ assert((wchar_tByteWidth & 7) == 0 && "Assumes wchar_t is byte multiple!");
+ wchar_tByteWidth /= 8;
+ }
+
+ // The output buffer size needs to be large enough to hold wide characters.
+ // This is a worst-case assumption which basically corresponds to L"" "long".
+ if (AnyWide)
+ SizeBound *= wchar_tByteWidth;
+
+ // Size the temporary buffer to hold the result string data.
+ ResultBuf.resize(SizeBound);
+
+ // Likewise, but for each string piece.
+ llvm::SmallString<512> TokenBuf;
+ TokenBuf.resize(MaxTokenLength);
+
+ // Loop over all the strings, getting their spelling, and expanding them to
+ // wide strings as appropriate.
+ ResultPtr = &ResultBuf[0]; // Next byte to fill in.
+
+ Pascal = false;
+
+ for (unsigned i = 0, e = NumStringToks; i != e; ++i) {
+ const char *ThisTokBuf = &TokenBuf[0];
+ // Get the spelling of the token, which eliminates trigraphs, etc. We know
+ // that ThisTokBuf points to a buffer that is big enough for the whole token
+ // and 'spelled' tokens can only shrink.
+ unsigned ThisTokLen = PP.getSpelling(StringToks[i], ThisTokBuf);
+ const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1; // Skip end quote.
+
+ // TODO: Input character set mapping support.
+
+ // Skip L marker for wide strings.
+ bool ThisIsWide = false;
+ if (ThisTokBuf[0] == 'L') {
+ ++ThisTokBuf;
+ ThisIsWide = true;
+ }
+
+ assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");
+ ++ThisTokBuf;
+
+ // Check if this is a pascal string
+ if (pp.getLangOptions().PascalStrings && ThisTokBuf + 1 != ThisTokEnd &&
+ ThisTokBuf[0] == '\\' && ThisTokBuf[1] == 'p') {
+
+ // If the \p sequence is found in the first token, we have a pascal string
+ // Otherwise, if we already have a pascal string, ignore the first \p
+ if (i == 0) {
+ ++ThisTokBuf;
+ Pascal = true;
+ } else if (Pascal)
+ ThisTokBuf += 2;
+ }
+
+ while (ThisTokBuf != ThisTokEnd) {
+ // Is this a span of non-escape characters?
+ if (ThisTokBuf[0] != '\\') {
+ const char *InStart = ThisTokBuf;
+ do {
+ ++ThisTokBuf;
+ } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');
+
+ // Copy the character span over.
+ unsigned Len = ThisTokBuf-InStart;
+ if (!AnyWide) {
+ memcpy(ResultPtr, InStart, Len);
+ ResultPtr += Len;
+ } else {
+ // Note: our internal rep of wide char tokens is always little-endian.
+ for (; Len; --Len, ++InStart) {
+ *ResultPtr++ = InStart[0];
+ // Add zeros at the end.
+ for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
+ *ResultPtr++ = 0;
+ }
+ }
+ continue;
+ }
+
+ // Otherwise, this is an escape character. Process it.
+ unsigned ResultChar = ProcessCharEscape(ThisTokBuf, ThisTokEnd, hadError,
+ StringToks[i].getLocation(),
+ ThisIsWide, PP);
+
+ // Note: our internal rep of wide char tokens is always little-endian.
+ *ResultPtr++ = ResultChar & 0xFF;
+
+ if (AnyWide) {
+ for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
+ *ResultPtr++ = ResultChar >> i*8;
+ }
+ }
+ }
+
+ // Add zero terminator.
+ *ResultPtr = 0;
+ if (AnyWide) {
+ for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
+ *ResultPtr++ = 0;
+ }
+
+ if (Pascal)
+ ResultBuf[0] = ResultPtr-&ResultBuf[0]-1;
+}
diff --git a/clang/lib/Lex/MacroArgs.cpp b/clang/lib/Lex/MacroArgs.cpp
new file mode 100644
index 00000000000..a26e50eb762
--- /dev/null
+++ b/clang/lib/Lex/MacroArgs.cpp
@@ -0,0 +1,225 @@
+//===--- TokenLexer.cpp - Lex from a token stream -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TokenLexer interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MacroArgs.h"
+#include "clang/Lex/MacroInfo.h"
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Basic/Diagnostic.h"
+using namespace clang;
+
+/// MacroArgs ctor function - This destroys the vector passed in.
+MacroArgs *MacroArgs::create(const MacroInfo *MI,
+ const Token *UnexpArgTokens,
+ unsigned NumToks, bool VarargsElided) {
+ assert(MI->isFunctionLike() &&
+ "Can't have args for an object-like macro!");
+
+ // Allocate memory for the MacroArgs object with the lexer tokens at the end.
+ MacroArgs *Result = (MacroArgs*)malloc(sizeof(MacroArgs) +
+ NumToks*sizeof(Token));
+ // Construct the macroargs object.
+ new (Result) MacroArgs(NumToks, VarargsElided);
+
+ // Copy the actual unexpanded tokens to immediately after the result ptr.
+ if (NumToks)
+ memcpy(const_cast<Token*>(Result->getUnexpArgument(0)),
+ UnexpArgTokens, NumToks*sizeof(Token));
+
+ return Result;
+}
+
+/// destroy - Destroy and deallocate the memory for this object.
+///
+void MacroArgs::destroy() {
+ // Run the dtor to deallocate the vectors.
+ this->~MacroArgs();
+ // Release the memory for the object.
+ free(this);
+}
+
+
+/// getArgLength - Given a pointer to an expanded or unexpanded argument,
+/// return the number of tokens, not counting the EOF, that make up the
+/// argument.
+unsigned MacroArgs::getArgLength(const Token *ArgPtr) {
+ unsigned NumArgTokens = 0;
+ for (; ArgPtr->isNot(tok::eof); ++ArgPtr)
+ ++NumArgTokens;
+ return NumArgTokens;
+}
+
+
+/// getUnexpArgument - Return the unexpanded tokens for the specified formal.
+///
+const Token *MacroArgs::getUnexpArgument(unsigned Arg) const {
+ // The unexpanded argument tokens start immediately after the MacroArgs object
+ // in memory.
+ const Token *Start = (const Token *)(this+1);
+ const Token *Result = Start;
+ // Scan to find Arg.
+ for (; Arg; ++Result) {
+ assert(Result < Start+NumUnexpArgTokens && "Invalid arg #");
+ if (Result->is(tok::eof))
+ --Arg;
+ }
+ return Result;
+}
+
+
+/// ArgNeedsPreexpansion - If we can prove that the argument won't be affected
+/// by pre-expansion, return false. Otherwise, conservatively return true.
+bool MacroArgs::ArgNeedsPreexpansion(const Token *ArgTok,
+ Preprocessor &PP) const {
+ // If there are no identifiers in the argument list, or if the identifiers are
+ // known to not be macros, pre-expansion won't modify it.
+ for (; ArgTok->isNot(tok::eof); ++ArgTok)
+ if (IdentifierInfo *II = ArgTok->getIdentifierInfo()) {
+ if (II->hasMacroDefinition() && PP.getMacroInfo(II)->isEnabled())
+ // Return true even though the macro could be a function-like macro
+ // without a following '(' token.
+ return true;
+ }
+ return false;
+}
+
+/// getPreExpArgument - Return the pre-expanded form of the specified
+/// argument.
+const std::vector<Token> &
+MacroArgs::getPreExpArgument(unsigned Arg, Preprocessor &PP) {
+ assert(Arg < NumUnexpArgTokens && "Invalid argument number!");
+
+ // If we have already computed this, return it.
+ if (PreExpArgTokens.empty())
+ PreExpArgTokens.resize(NumUnexpArgTokens);
+
+ std::vector<Token> &Result = PreExpArgTokens[Arg];
+ if (!Result.empty()) return Result;
+
+ const Token *AT = getUnexpArgument(Arg);
+ unsigned NumToks = getArgLength(AT)+1; // Include the EOF.
+
+ // Otherwise, we have to pre-expand this argument, populating Result. To do
+ // this, we set up a fake TokenLexer to lex from the unexpanded argument
+ // list. With this installed, we lex expanded tokens until we hit the EOF
+ // token at the end of the unexp list.
+ PP.EnterTokenStream(AT, NumToks, false /*disable expand*/,
+ false /*owns tokens*/);
+
+ // Lex all of the macro-expanded tokens into Result.
+ do {
+ Result.push_back(Token());
+ PP.Lex(Result.back());
+ } while (Result.back().isNot(tok::eof));
+
+ // Pop the token stream off the top of the stack. We know that the internal
+ // pointer inside of it is to the "end" of the token stream, but the stack
+ // will not otherwise be popped until the next token is lexed. The problem is
+ // that the token may be lexed sometime after the vector of tokens itself is
+ // destroyed, which would be badness.
+ PP.RemoveTopOfLexerStack();
+ return Result;
+}
+
+
+/// StringifyArgument - Implement C99 6.10.3.2p2, converting a sequence of
+/// tokens into the literal string token that should be produced by the C #
+/// preprocessor operator. If Charify is true, then it should be turned into
+/// a character literal for the Microsoft charize (#@) extension.
+///
+Token MacroArgs::StringifyArgument(const Token *ArgToks,
+ Preprocessor &PP, bool Charify) {
+ Token Tok;
+ Tok.startToken();
+ Tok.setKind(tok::string_literal);
+
+ const Token *ArgTokStart = ArgToks;
+
+ // Stringify all the tokens.
+ std::string Result = "\"";
+ // FIXME: Optimize this loop to not use std::strings.
+ bool isFirst = true;
+ for (; ArgToks->isNot(tok::eof); ++ArgToks) {
+ const Token &Tok = *ArgToks;
+ if (!isFirst && (Tok.hasLeadingSpace() || Tok.isAtStartOfLine()))
+ Result += ' ';
+ isFirst = false;
+
+ // If this is a string or character constant, escape the token as specified
+ // by 6.10.3.2p2.
+ if (Tok.is(tok::string_literal) || // "foo"
+ Tok.is(tok::wide_string_literal) || // L"foo"
+ Tok.is(tok::char_constant)) { // 'x' and L'x'.
+ Result += Lexer::Stringify(PP.getSpelling(Tok));
+ } else {
+ // Otherwise, just append the token.
+ Result += PP.getSpelling(Tok);
+ }
+ }
+
+ // If the last character of the string is a \, and if it isn't escaped, this
+ // is an invalid string literal, diagnose it as specified in C99.
+ if (Result[Result.size()-1] == '\\') {
+ // Count the number of consequtive \ characters. If even, then they are
+ // just escaped backslashes, otherwise it's an error.
+ unsigned FirstNonSlash = Result.size()-2;
+ // Guaranteed to find the starting " if nothing else.
+ while (Result[FirstNonSlash] == '\\')
+ --FirstNonSlash;
+ if ((Result.size()-1-FirstNonSlash) & 1) {
+ // Diagnose errors for things like: #define F(X) #X / F(\)
+ PP.Diag(ArgToks[-1], diag::pp_invalid_string_literal);
+ Result.erase(Result.end()-1); // remove one of the \'s.
+ }
+ }
+ Result += '"';
+
+ // If this is the charify operation and the result is not a legal character
+ // constant, diagnose it.
+ if (Charify) {
+ // First step, turn double quotes into single quotes:
+ Result[0] = '\'';
+ Result[Result.size()-1] = '\'';
+
+ // Check for bogus character.
+ bool isBad = false;
+ if (Result.size() == 3) {
+ isBad = Result[1] == '\''; // ''' is not legal. '\' already fixed above.
+ } else {
+ isBad = (Result.size() != 4 || Result[1] != '\\'); // Not '\x'
+ }
+
+ if (isBad) {
+ PP.Diag(ArgTokStart[0], diag::err_invalid_character_to_charify);
+ Result = "' '"; // Use something arbitrary, but legal.
+ }
+ }
+
+ Tok.setLength(Result.size());
+ Tok.setLocation(PP.CreateString(&Result[0], Result.size()));
+ return Tok;
+}
+
+/// getStringifiedArgument - Compute, cache, and return the specified argument
+/// that has been 'stringified' as required by the # operator.
+const Token &MacroArgs::getStringifiedArgument(unsigned ArgNo,
+ Preprocessor &PP) {
+ assert(ArgNo < NumUnexpArgTokens && "Invalid argument number!");
+ if (StringifiedArgs.empty()) {
+ StringifiedArgs.resize(getNumArguments());
+ memset(&StringifiedArgs[0], 0,
+ sizeof(StringifiedArgs[0])*getNumArguments());
+ }
+ if (StringifiedArgs[ArgNo].isNot(tok::string_literal))
+ StringifiedArgs[ArgNo] = StringifyArgument(getUnexpArgument(ArgNo), PP);
+ return StringifiedArgs[ArgNo];
+}
diff --git a/clang/lib/Lex/MacroArgs.h b/clang/lib/Lex/MacroArgs.h
new file mode 100644
index 00000000000..4b22fa18aa8
--- /dev/null
+++ b/clang/lib/Lex/MacroArgs.h
@@ -0,0 +1,109 @@
+//===--- MacroArgs.h - Formal argument info for Macros ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MacroArgs interface.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_MACROARGS_H
+#define LLVM_CLANG_MACROARGS_H
+
+#include <vector>
+
+namespace clang {
+ class MacroInfo;
+ class Preprocessor;
+ class Token;
+
+/// MacroArgs - An instance of this class captures information about
+/// the formal arguments specified to a function-like macro invocation.
+class MacroArgs {
+ /// NumUnexpArgTokens - The number of raw, unexpanded tokens for the
+ /// arguments. All of the actual argument tokens are allocated immediately
+ /// after the MacroArgs object in memory. This is all of the arguments
+ /// concatenated together, with 'EOF' markers at the end of each argument.
+ unsigned NumUnexpArgTokens;
+
+ /// PreExpArgTokens - Pre-expanded tokens for arguments that need them. Empty
+ /// if not yet computed. This includes the EOF marker at the end of the
+ /// stream.
+ std::vector<std::vector<Token> > PreExpArgTokens;
+
+ /// StringifiedArgs - This contains arguments in 'stringified' form. If the
+ /// stringified form of an argument has not yet been computed, this is empty.
+ std::vector<Token> StringifiedArgs;
+
+ /// VarargsElided - True if this is a C99 style varargs macro invocation and
+ /// there was no argument specified for the "..." argument. If the argument
+ /// was specified (even empty) or this isn't a C99 style varargs function, or
+ /// if in strict mode and the C99 varargs macro had only a ... argument, this
+ /// is false.
+ bool VarargsElided;
+
+ MacroArgs(unsigned NumToks, bool varargsElided)
+ : NumUnexpArgTokens(NumToks), VarargsElided(varargsElided) {}
+ ~MacroArgs() {}
+public:
+ /// MacroArgs ctor function - Create a new MacroArgs object with the specified
+ /// macro and argument info.
+ static MacroArgs *create(const MacroInfo *MI,
+ const Token *UnexpArgTokens,
+ unsigned NumArgTokens, bool VarargsElided);
+
+ /// destroy - Destroy and deallocate the memory for this object.
+ ///
+ void destroy();
+
+ /// ArgNeedsPreexpansion - If we can prove that the argument won't be affected
+ /// by pre-expansion, return false. Otherwise, conservatively return true.
+ bool ArgNeedsPreexpansion(const Token *ArgTok, Preprocessor &PP) const;
+
+ /// getUnexpArgument - Return a pointer to the first token of the unexpanded
+ /// token list for the specified formal.
+ ///
+ const Token *getUnexpArgument(unsigned Arg) const;
+
+ /// getArgLength - Given a pointer to an expanded or unexpanded argument,
+ /// return the number of tokens, not counting the EOF, that make up the
+ /// argument.
+ static unsigned getArgLength(const Token *ArgPtr);
+
+ /// getPreExpArgument - Return the pre-expanded form of the specified
+ /// argument.
+ const std::vector<Token> &
+ getPreExpArgument(unsigned Arg, Preprocessor &PP);
+
+ /// getStringifiedArgument - Compute, cache, and return the specified argument
+ /// that has been 'stringified' as required by the # operator.
+ const Token &getStringifiedArgument(unsigned ArgNo, Preprocessor &PP);
+
+ /// getNumArguments - Return the number of arguments passed into this macro
+ /// invocation.
+ unsigned getNumArguments() const { return NumUnexpArgTokens; }
+
+
+ /// isVarargsElidedUse - Return true if this is a C99 style varargs macro
+ /// invocation and there was no argument specified for the "..." argument. If
+ /// the argument was specified (even empty) or this isn't a C99 style varargs
+ /// function, or if in strict mode and the C99 varargs macro had only a ...
+ /// argument, this returns false.
+ bool isVarargsElidedUse() const { return VarargsElided; }
+
+ /// StringifyArgument - Implement C99 6.10.3.2p2, converting a sequence of
+ /// tokens into the literal string token that should be produced by the C #
+ /// preprocessor operator. If Charify is true, then it should be turned into
+ /// a character literal for the Microsoft charize (#@) extension.
+ ///
+ static Token StringifyArgument(const Token *ArgToks,
+ Preprocessor &PP, bool Charify = false);
+};
+
+} // end namespace clang
+
+#endif
diff --git a/clang/lib/Lex/MacroInfo.cpp b/clang/lib/Lex/MacroInfo.cpp
new file mode 100644
index 00000000000..de19ff502a6
--- /dev/null
+++ b/clang/lib/Lex/MacroInfo.cpp
@@ -0,0 +1,70 @@
+//===--- MacroInfo.cpp - Information about #defined identifiers -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MacroInfo interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/MacroInfo.h"
+#include "clang/Lex/Preprocessor.h"
+using namespace clang;
+
+MacroInfo::MacroInfo(SourceLocation DefLoc) : Location(DefLoc) {
+ IsFunctionLike = false;
+ IsC99Varargs = false;
+ IsGNUVarargs = false;
+ IsBuiltinMacro = false;
+ IsDisabled = false;
+ IsUsed = true;
+
+ ArgumentList = 0;
+ NumArguments = 0;
+}
+
+/// isIdenticalTo - Return true if the specified macro definition is equal to
+/// this macro in spelling, arguments, and whitespace. This is used to emit
+/// duplicate definition warnings. This implements the rules in C99 6.10.3.
+///
+bool MacroInfo::isIdenticalTo(const MacroInfo &Other, Preprocessor &PP) const {
+ // Check # tokens in replacement, number of args, and various flags all match.
+ if (ReplacementTokens.size() != Other.ReplacementTokens.size() ||
+ getNumArgs() != Other.getNumArgs() ||
+ isFunctionLike() != Other.isFunctionLike() ||
+ isC99Varargs() != Other.isC99Varargs() ||
+ isGNUVarargs() != Other.isGNUVarargs())
+ return false;
+
+ // Check arguments.
+ for (arg_iterator I = arg_begin(), OI = Other.arg_begin(), E = arg_end();
+ I != E; ++I, ++OI)
+ if (*I != *OI) return false;
+
+ // Check all the tokens.
+ for (unsigned i = 0, e = ReplacementTokens.size(); i != e; ++i) {
+ const Token &A = ReplacementTokens[i];
+ const Token &B = Other.ReplacementTokens[i];
+ if (A.getKind() != B.getKind() ||
+ A.isAtStartOfLine() != B.isAtStartOfLine() ||
+ A.hasLeadingSpace() != B.hasLeadingSpace())
+ return false;
+
+ // If this is an identifier, it is easy.
+ if (A.getIdentifierInfo() || B.getIdentifierInfo()) {
+ if (A.getIdentifierInfo() != B.getIdentifierInfo())
+ return false;
+ continue;
+ }
+
+ // Otherwise, check the spelling.
+ if (PP.getSpelling(A) != PP.getSpelling(B))
+ return false;
+ }
+
+ return true;
+}
diff --git a/clang/lib/Lex/Makefile b/clang/lib/Lex/Makefile
new file mode 100644
index 00000000000..187448c9922
--- /dev/null
+++ b/clang/lib/Lex/Makefile
@@ -0,0 +1,28 @@
+##===- clang/lib/Lex/Makefile ------------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+#
+# This implements the Lexer library for the C-Language front-end.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+include $(LEVEL)/Makefile.config
+
+LIBRARYNAME := clangLex
+BUILD_ARCHIVE = 1
+CXXFLAGS = -fno-rtti
+
+ifeq ($(ARCH),PowerPC)
+CXXFLAGS += -maltivec
+endif
+
+CPPFLAGS += -I$(PROJ_SRC_DIR)/../../include
+
+include $(LEVEL)/Makefile.common
+
diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
new file mode 100644
index 00000000000..b24f5b63760
--- /dev/null
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -0,0 +1,1153 @@
+//===--- PPDirectives.cpp - Directive Handling for Preprocessor -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements # directive processing for the Preprocessor.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Lex/HeaderSearch.h"
+#include "clang/Lex/MacroInfo.h"
+#include "clang/Lex/PPCallbacks.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/SourceManager.h"
+using namespace clang;
+
+//===----------------------------------------------------------------------===//
+// Utility Methods for Preprocessor Directive Handling.
+//===----------------------------------------------------------------------===//
+
+/// DiscardUntilEndOfDirective - Read and discard all tokens remaining on the
+/// current line until the tok::eom token is found.
+void Preprocessor::DiscardUntilEndOfDirective() {
+ Token Tmp;
+ do {
+ LexUnexpandedToken(Tmp);
+ } while (Tmp.isNot(tok::eom));
+}
+
+/// isCXXNamedOperator - Returns "true" if the token is a named operator in C++.
+static bool isCXXNamedOperator(const std::string &Spelling) {
+ return Spelling == "and" || Spelling == "bitand" || Spelling == "bitor" ||
+ Spelling == "compl" || Spelling == "not" || Spelling == "not_eq" ||
+ Spelling == "or" || Spelling == "xor";
+}
+
+/// ReadMacroName - Lex and validate a macro name, which occurs after a
+/// #define or #undef. This sets the token kind to eom and discards the rest
+/// of the macro line if the macro name is invalid. isDefineUndef is 1 if
+/// this is due to a a #define, 2 if #undef directive, 0 if it is something
+/// else (e.g. #ifdef).
+void Preprocessor::ReadMacroName(Token &MacroNameTok, char isDefineUndef) {
+ // Read the token, don't allow macro expansion on it.
+ LexUnexpandedToken(MacroNameTok);
+
+ // Missing macro name?
+ if (MacroNameTok.is(tok::eom))
+ return Diag(MacroNameTok, diag::err_pp_missing_macro_name);
+
+ IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
+ if (II == 0) {
+ std::string Spelling = getSpelling(MacroNameTok);
+ if (isCXXNamedOperator(Spelling))
+ // C++ 2.5p2: Alternative tokens behave the same as its primary token
+ // except for their spellings.
+ Diag(MacroNameTok, diag::err_pp_operator_used_as_macro_name, Spelling);
+ else
+ Diag(MacroNameTok, diag::err_pp_macro_not_identifier);
+ // Fall through on error.
+ } else if (isDefineUndef && II->getPPKeywordID() == tok::pp_defined) {
+ // Error if defining "defined": C99 6.10.8.4.
+ Diag(MacroNameTok, diag::err_defined_macro_name);
+ } else if (isDefineUndef && II->hasMacroDefinition() &&
+ getMacroInfo(II)->isBuiltinMacro()) {
+ // Error if defining "__LINE__" and other builtins: C99 6.10.8.4.
+ if (isDefineUndef == 1)
+ Diag(MacroNameTok, diag::pp_redef_builtin_macro);
+ else
+ Diag(MacroNameTok, diag::pp_undef_builtin_macro);
+ } else {
+ // Okay, we got a good identifier node. Return it.
+ return;
+ }
+
+ // Invalid macro name, read and discard the rest of the line. Then set the
+ // token kind to tok::eom.
+ MacroNameTok.setKind(tok::eom);
+ return DiscardUntilEndOfDirective();
+}
+
+/// CheckEndOfDirective - Ensure that the next token is a tok::eom token. If
+/// not, emit a diagnostic and consume up until the eom.
+void Preprocessor::CheckEndOfDirective(const char *DirType) {
+ Token Tmp;
+ // Lex unexpanded tokens: macros might expand to zero tokens, causing us to
+ // miss diagnosing invalid lines.
+ LexUnexpandedToken(Tmp);
+
+ // There should be no tokens after the directive, but we allow them as an
+ // extension.
+ while (Tmp.is(tok::comment)) // Skip comments in -C mode.
+ LexUnexpandedToken(Tmp);
+
+ if (Tmp.isNot(tok::eom)) {
+ Diag(Tmp, diag::ext_pp_extra_tokens_at_eol, DirType);
+ DiscardUntilEndOfDirective();
+ }
+}
+
+
+
+/// SkipExcludedConditionalBlock - We just read a #if or related directive and
+/// decided that the subsequent tokens are in the #if'd out portion of the
+/// file. Lex the rest of the file, until we see an #endif. If
+/// FoundNonSkipPortion is true, then we have already emitted code for part of
+/// this #if directive, so #else/#elif blocks should never be entered. If ElseOk
+/// is true, then #else directives are ok, if not, then we have already seen one
+/// so a #else directive is a duplicate. When this returns, the caller can lex
+/// the first valid token.
+void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
+ bool FoundNonSkipPortion,
+ bool FoundElse) {
+ ++NumSkipped;
+ assert(CurTokenLexer == 0 && CurLexer &&
+ "Lexing a macro, not a file?");
+
+ CurLexer->pushConditionalLevel(IfTokenLoc, /*isSkipping*/false,
+ FoundNonSkipPortion, FoundElse);
+
+ // Enter raw mode to disable identifier lookup (and thus macro expansion),
+ // disabling warnings, etc.
+ CurLexer->LexingRawMode = true;
+ Token Tok;
+ while (1) {
+ CurLexer->Lex(Tok);
+
+ // If this is the end of the buffer, we have an error.
+ if (Tok.is(tok::eof)) {
+ // Emit errors for each unterminated conditional on the stack, including
+ // the current one.
+ while (!CurLexer->ConditionalStack.empty()) {
+ Diag(CurLexer->ConditionalStack.back().IfLoc,
+ diag::err_pp_unterminated_conditional);
+ CurLexer->ConditionalStack.pop_back();
+ }
+
+ // Just return and let the caller lex after this #include.
+ break;
+ }
+
+ // If this token is not a preprocessor directive, just skip it.
+ if (Tok.isNot(tok::hash) || !Tok.isAtStartOfLine())
+ continue;
+
+ // We just parsed a # character at the start of a line, so we're in
+ // directive mode. Tell the lexer this so any newlines we see will be
+ // converted into an EOM token (this terminates the macro).
+ CurLexer->ParsingPreprocessorDirective = true;
+ CurLexer->KeepCommentMode = false;
+
+
+ // Read the next token, the directive flavor.
+ LexUnexpandedToken(Tok);
+
+ // If this isn't an identifier directive (e.g. is "# 1\n" or "#\n", or
+ // something bogus), skip it.
+ if (Tok.isNot(tok::identifier)) {
+ CurLexer->ParsingPreprocessorDirective = false;
+ // Restore comment saving mode.
+ CurLexer->KeepCommentMode = KeepComments;
+ continue;
+ }
+
+ // If the first letter isn't i or e, it isn't intesting to us. We know that
+ // this is safe in the face of spelling differences, because there is no way
+ // to spell an i/e in a strange way that is another letter. Skipping this
+ // allows us to avoid looking up the identifier info for #define/#undef and
+ // other common directives.
+ const char *RawCharData = SourceMgr.getCharacterData(Tok.getLocation());
+ char FirstChar = RawCharData[0];
+ if (FirstChar >= 'a' && FirstChar <= 'z' &&
+ FirstChar != 'i' && FirstChar != 'e') {
+ CurLexer->ParsingPreprocessorDirective = false;
+ // Restore comment saving mode.
+ CurLexer->KeepCommentMode = KeepComments;
+ continue;
+ }
+
+ // Get the identifier name without trigraphs or embedded newlines. Note
+ // that we can't use Tok.getIdentifierInfo() because its lookup is disabled
+ // when skipping.
+ // TODO: could do this with zero copies in the no-clean case by using
+ // strncmp below.
+ char Directive[20];
+ unsigned IdLen;
+ if (!Tok.needsCleaning() && Tok.getLength() < 20) {
+ IdLen = Tok.getLength();
+ memcpy(Directive, RawCharData, IdLen);
+ Directive[IdLen] = 0;
+ } else {
+ std::string DirectiveStr = getSpelling(Tok);
+ IdLen = DirectiveStr.size();
+ if (IdLen >= 20) {
+ CurLexer->ParsingPreprocessorDirective = false;
+ // Restore comment saving mode.
+ CurLexer->KeepCommentMode = KeepComments;
+ continue;
+ }
+ memcpy(Directive, &DirectiveStr[0], IdLen);
+ Directive[IdLen] = 0;
+ }
+
+ if (FirstChar == 'i' && Directive[1] == 'f') {
+ if ((IdLen == 2) || // "if"
+ (IdLen == 5 && !strcmp(Directive+2, "def")) || // "ifdef"
+ (IdLen == 6 && !strcmp(Directive+2, "ndef"))) { // "ifndef"
+ // We know the entire #if/#ifdef/#ifndef block will be skipped, don't
+ // bother parsing the condition.
+ DiscardUntilEndOfDirective();
+ CurLexer->pushConditionalLevel(Tok.getLocation(), /*wasskipping*/true,
+ /*foundnonskip*/false,
+ /*fnddelse*/false);
+ }
+ } else if (FirstChar == 'e') {
+ if (IdLen == 5 && !strcmp(Directive+1, "ndif")) { // "endif"
+ CheckEndOfDirective("#endif");
+ PPConditionalInfo CondInfo;
+ CondInfo.WasSkipping = true; // Silence bogus warning.
+ bool InCond = CurLexer->popConditionalLevel(CondInfo);
+ InCond = InCond; // Silence warning in no-asserts mode.
+ assert(!InCond && "Can't be skipping if not in a conditional!");
+
+ // If we popped the outermost skipping block, we're done skipping!
+ if (!CondInfo.WasSkipping)
+ break;
+ } else if (IdLen == 4 && !strcmp(Directive+1, "lse")) { // "else".
+ // #else directive in a skipping conditional. If not in some other
+ // skipping conditional, and if #else hasn't already been seen, enter it
+ // as a non-skipping conditional.
+ CheckEndOfDirective("#else");
+ PPConditionalInfo &CondInfo = CurLexer->peekConditionalLevel();
+
+ // If this is a #else with a #else before it, report the error.
+ if (CondInfo.FoundElse) Diag(Tok, diag::pp_err_else_after_else);
+
+ // Note that we've seen a #else in this conditional.
+ CondInfo.FoundElse = true;
+
+ // If the conditional is at the top level, and the #if block wasn't
+ // entered, enter the #else block now.
+ if (!CondInfo.WasSkipping && !CondInfo.FoundNonSkip) {
+ CondInfo.FoundNonSkip = true;
+ break;
+ }
+ } else if (IdLen == 4 && !strcmp(Directive+1, "lif")) { // "elif".
+ PPConditionalInfo &CondInfo = CurLexer->peekConditionalLevel();
+
+ bool ShouldEnter;
+ // If this is in a skipping block or if we're already handled this #if
+ // block, don't bother parsing the condition.
+ if (CondInfo.WasSkipping || CondInfo.FoundNonSkip) {
+ DiscardUntilEndOfDirective();
+ ShouldEnter = false;
+ } else {
+ // Restore the value of LexingRawMode so that identifiers are
+ // looked up, etc, inside the #elif expression.
+ assert(CurLexer->LexingRawMode && "We have to be skipping here!");
+ CurLexer->LexingRawMode = false;
+ IdentifierInfo *IfNDefMacro = 0;
+ ShouldEnter = EvaluateDirectiveExpression(IfNDefMacro);
+ CurLexer->LexingRawMode = true;
+ }
+
+ // If this is a #elif with a #else before it, report the error.
+ if (CondInfo.FoundElse) Diag(Tok, diag::pp_err_elif_after_else);
+
+ // If this condition is true, enter it!
+ if (ShouldEnter) {
+ CondInfo.FoundNonSkip = true;
+ break;
+ }
+ }
+ }
+
+ CurLexer->ParsingPreprocessorDirective = false;
+ // Restore comment saving mode.
+ CurLexer->KeepCommentMode = KeepComments;
+ }
+
+ // Finally, if we are out of the conditional (saw an #endif or ran off the end
+ // of the file, just stop skipping and return to lexing whatever came after
+ // the #if block.
+ CurLexer->LexingRawMode = false;
+}
+
+/// LookupFile - Given a "foo" or <foo> reference, look up the indicated file,
+/// return null on failure. isAngled indicates whether the file reference is
+/// for system #include's or not (i.e. using <> instead of "").
+const FileEntry *Preprocessor::LookupFile(const char *FilenameStart,
+ const char *FilenameEnd,
+ bool isAngled,
+ const DirectoryLookup *FromDir,
+ const DirectoryLookup *&CurDir) {
+ // If the header lookup mechanism may be relative to the current file, pass in
+ // info about where the current file is.
+ const FileEntry *CurFileEnt = 0;
+ if (!FromDir) {
+ SourceLocation FileLoc = getCurrentFileLexer()->getFileLoc();
+ CurFileEnt = SourceMgr.getFileEntryForLoc(FileLoc);
+ }
+
+ // Do a standard file entry lookup.
+ CurDir = CurDirLookup;
+ const FileEntry *FE =
+ HeaderInfo.LookupFile(FilenameStart, FilenameEnd,
+ isAngled, FromDir, CurDir, CurFileEnt);
+ if (FE) return FE;
+
+ // Otherwise, see if this is a subframework header. If so, this is relative
+ // to one of the headers on the #include stack. Walk the list of the current
+ // headers on the #include stack and pass them to HeaderInfo.
+ if (CurLexer && !CurLexer->Is_PragmaLexer) {
+ if ((CurFileEnt = SourceMgr.getFileEntryForLoc(CurLexer->getFileLoc())))
+ if ((FE = HeaderInfo.LookupSubframeworkHeader(FilenameStart, FilenameEnd,
+ CurFileEnt)))
+ return FE;
+ }
+
+ for (unsigned i = 0, e = IncludeMacroStack.size(); i != e; ++i) {
+ IncludeStackInfo &ISEntry = IncludeMacroStack[e-i-1];
+ if (ISEntry.TheLexer && !ISEntry.TheLexer->Is_PragmaLexer) {
+ if ((CurFileEnt =
+ SourceMgr.getFileEntryForLoc(ISEntry.TheLexer->getFileLoc())))
+ if ((FE = HeaderInfo.LookupSubframeworkHeader(FilenameStart,
+ FilenameEnd, CurFileEnt)))
+ return FE;
+ }
+ }
+
+ // Otherwise, we really couldn't find the file.
+ return 0;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Preprocessor Directive Handling.
+//===----------------------------------------------------------------------===//
+
+/// HandleDirective - This callback is invoked when the lexer sees a # token
+/// at the start of a line. This consumes the directive, modifies the
+/// lexer/preprocessor state, and advances the lexer(s) so that the next token
+/// read is the correct one.
+void Preprocessor::HandleDirective(Token &Result) {
+ // FIXME: Traditional: # with whitespace before it not recognized by K&R?
+
+ // We just parsed a # character at the start of a line, so we're in directive
+ // mode. Tell the lexer this so any newlines we see will be converted into an
+ // EOM token (which terminates the directive).
+ CurLexer->ParsingPreprocessorDirective = true;
+
+ ++NumDirectives;
+
+ // We are about to read a token. For the multiple-include optimization FA to
+ // work, we have to remember if we had read any tokens *before* this
+ // pp-directive.
+ bool ReadAnyTokensBeforeDirective = CurLexer->MIOpt.getHasReadAnyTokensVal();
+
+ // Read the next token, the directive flavor. This isn't expanded due to
+ // C99 6.10.3p8.
+ LexUnexpandedToken(Result);
+
+ // C99 6.10.3p11: Is this preprocessor directive in macro invocation? e.g.:
+ // #define A(x) #x
+ // A(abc
+ // #warning blah
+ // def)
+ // If so, the user is relying on non-portable behavior, emit a diagnostic.
+ if (InMacroArgs)
+ Diag(Result, diag::ext_embedded_directive);
+
+TryAgain:
+ switch (Result.getKind()) {
+ case tok::eom:
+ return; // null directive.
+ case tok::comment:
+ // Handle stuff like "# /*foo*/ define X" in -E -C mode.
+ LexUnexpandedToken(Result);
+ goto TryAgain;
+
+ case tok::numeric_constant:
+ // FIXME: implement # 7 line numbers!
+ DiscardUntilEndOfDirective();
+ return;
+ default:
+ IdentifierInfo *II = Result.getIdentifierInfo();
+ if (II == 0) break; // Not an identifier.
+
+ // Ask what the preprocessor keyword ID is.
+ switch (II->getPPKeywordID()) {
+ default: break;
+ // C99 6.10.1 - Conditional Inclusion.
+ case tok::pp_if:
+ return HandleIfDirective(Result, ReadAnyTokensBeforeDirective);
+ case tok::pp_ifdef:
+ return HandleIfdefDirective(Result, false, true/*not valid for miopt*/);
+ case tok::pp_ifndef:
+ return HandleIfdefDirective(Result, true, ReadAnyTokensBeforeDirective);
+ case tok::pp_elif:
+ return HandleElifDirective(Result);
+ case tok::pp_else:
+ return HandleElseDirective(Result);
+ case tok::pp_endif:
+ return HandleEndifDirective(Result);
+
+ // C99 6.10.2 - Source File Inclusion.
+ case tok::pp_include:
+ return HandleIncludeDirective(Result); // Handle #include.
+
+ // C99 6.10.3 - Macro Replacement.
+ case tok::pp_define:
+ return HandleDefineDirective(Result);
+ case tok::pp_undef:
+ return HandleUndefDirective(Result);
+
+ // C99 6.10.4 - Line Control.
+ case tok::pp_line:
+ // FIXME: implement #line
+ DiscardUntilEndOfDirective();
+ return;
+
+ // C99 6.10.5 - Error Directive.
+ case tok::pp_error:
+ return HandleUserDiagnosticDirective(Result, false);
+
+ // C99 6.10.6 - Pragma Directive.
+ case tok::pp_pragma:
+ return HandlePragmaDirective();
+
+ // GNU Extensions.
+ case tok::pp_import:
+ return HandleImportDirective(Result);
+ case tok::pp_include_next:
+ return HandleIncludeNextDirective(Result);
+
+ case tok::pp_warning:
+ Diag(Result, diag::ext_pp_warning_directive);
+ return HandleUserDiagnosticDirective(Result, true);
+ case tok::pp_ident:
+ return HandleIdentSCCSDirective(Result);
+ case tok::pp_sccs:
+ return HandleIdentSCCSDirective(Result);
+ case tok::pp_assert:
+ //isExtension = true; // FIXME: implement #assert
+ break;
+ case tok::pp_unassert:
+ //isExtension = true; // FIXME: implement #unassert
+ break;
+ }
+ break;
+ }
+
+ // If we reached here, the preprocessing token is not valid!
+ Diag(Result, diag::err_pp_invalid_directive);
+
+ // Read the rest of the PP line.
+ DiscardUntilEndOfDirective();
+
+ // Okay, we're done parsing the directive.
+}
+
+void Preprocessor::HandleUserDiagnosticDirective(Token &Tok,
+ bool isWarning) {
+ // Read the rest of the line raw. We do this because we don't want macros
+ // to be expanded and we don't require that the tokens be valid preprocessing
+ // tokens. For example, this is allowed: "#warning ` 'foo". GCC does
+ // collapse multiple consequtive white space between tokens, but this isn't
+ // specified by the standard.
+ std::string Message = CurLexer->ReadToEndOfLine();
+
+ unsigned DiagID = isWarning ? diag::pp_hash_warning : diag::err_pp_hash_error;
+ return Diag(Tok, DiagID, Message);
+}
+
+/// HandleIdentSCCSDirective - Handle a #ident/#sccs directive.
+///
+void Preprocessor::HandleIdentSCCSDirective(Token &Tok) {
+ // Yes, this directive is an extension.
+ Diag(Tok, diag::ext_pp_ident_directive);
+
+ // Read the string argument.
+ Token StrTok;
+ Lex(StrTok);
+
+ // If the token kind isn't a string, it's a malformed directive.
+ if (StrTok.isNot(tok::string_literal) &&
+ StrTok.isNot(tok::wide_string_literal))
+ return Diag(StrTok, diag::err_pp_malformed_ident);
+
+ // Verify that there is nothing after the string, other than EOM.
+ CheckEndOfDirective("#ident");
+
+ if (Callbacks)
+ Callbacks->Ident(Tok.getLocation(), getSpelling(StrTok));
+}
+
+//===----------------------------------------------------------------------===//
+// Preprocessor Include Directive Handling.
+//===----------------------------------------------------------------------===//
+
+/// GetIncludeFilenameSpelling - Turn the specified lexer token into a fully
+/// checked and spelled filename, e.g. as an operand of #include. This returns
+/// true if the input filename was in <>'s or false if it were in ""'s. The
+/// caller is expected to provide a buffer that is large enough to hold the
+/// spelling of the filename, but is also expected to handle the case when
+/// this method decides to use a different buffer.
+bool Preprocessor::GetIncludeFilenameSpelling(SourceLocation Loc,
+ const char *&BufStart,
+ const char *&BufEnd) {
+ // Get the text form of the filename.
+ assert(BufStart != BufEnd && "Can't have tokens with empty spellings!");
+
+ // Make sure the filename is <x> or "x".
+ bool isAngled;
+ if (BufStart[0] == '<') {
+ if (BufEnd[-1] != '>') {
+ Diag(Loc, diag::err_pp_expects_filename);
+ BufStart = 0;
+ return true;
+ }
+ isAngled = true;
+ } else if (BufStart[0] == '"') {
+ if (BufEnd[-1] != '"') {
+ Diag(Loc, diag::err_pp_expects_filename);
+ BufStart = 0;
+ return true;
+ }
+ isAngled = false;
+ } else {
+ Diag(Loc, diag::err_pp_expects_filename);
+ BufStart = 0;
+ return true;
+ }
+
+ // Diagnose #include "" as invalid.
+ if (BufEnd-BufStart <= 2) {
+ Diag(Loc, diag::err_pp_empty_filename);
+ BufStart = 0;
+ return "";
+ }
+
+ // Skip the brackets.
+ ++BufStart;
+ --BufEnd;
+ return isAngled;
+}
+
+/// ConcatenateIncludeName - Handle cases where the #include name is expanded
+/// from a macro as multiple tokens, which need to be glued together. This
+/// occurs for code like:
+/// #define FOO <a/b.h>
+/// #include FOO
+/// because in this case, "<a/b.h>" is returned as 7 tokens, not one.
+///
+/// This code concatenates and consumes tokens up to the '>' token. It returns
+/// false if the > was found, otherwise it returns true if it finds and consumes
+/// the EOM marker.
+static bool ConcatenateIncludeName(llvm::SmallVector<char, 128> &FilenameBuffer,
+ Preprocessor &PP) {
+ Token CurTok;
+
+ PP.Lex(CurTok);
+ while (CurTok.isNot(tok::eom)) {
+ // Append the spelling of this token to the buffer. If there was a space
+ // before it, add it now.
+ if (CurTok.hasLeadingSpace())
+ FilenameBuffer.push_back(' ');
+
+ // Get the spelling of the token, directly into FilenameBuffer if possible.
+ unsigned PreAppendSize = FilenameBuffer.size();
+ FilenameBuffer.resize(PreAppendSize+CurTok.getLength());
+
+ const char *BufPtr = &FilenameBuffer[PreAppendSize];
+ unsigned ActualLen = PP.getSpelling(CurTok, BufPtr);
+
+ // If the token was spelled somewhere else, copy it into FilenameBuffer.
+ if (BufPtr != &FilenameBuffer[PreAppendSize])
+ memcpy(&FilenameBuffer[PreAppendSize], BufPtr, ActualLen);
+
+ // Resize FilenameBuffer to the correct size.
+ if (CurTok.getLength() != ActualLen)
+ FilenameBuffer.resize(PreAppendSize+ActualLen);
+
+ // If we found the '>' marker, return success.
+ if (CurTok.is(tok::greater))
+ return false;
+
+ PP.Lex(CurTok);
+ }
+
+ // If we hit the eom marker, emit an error and return true so that the caller
+ // knows the EOM has been read.
+ PP.Diag(CurTok.getLocation(), diag::err_pp_expects_filename);
+ return true;
+}
+
+/// HandleIncludeDirective - The "#include" tokens have just been read, read the
+/// file to be included from the lexer, then include it! This is a common
+/// routine with functionality shared between #include, #include_next and
+/// #import.
+void Preprocessor::HandleIncludeDirective(Token &IncludeTok,
+ const DirectoryLookup *LookupFrom,
+ bool isImport) {
+
+ Token FilenameTok;
+ CurLexer->LexIncludeFilename(FilenameTok);
+
+ // Reserve a buffer to get the spelling.
+ llvm::SmallVector<char, 128> FilenameBuffer;
+ const char *FilenameStart, *FilenameEnd;
+
+ switch (FilenameTok.getKind()) {
+ case tok::eom:
+ // If the token kind is EOM, the error has already been diagnosed.
+ return;
+
+ case tok::angle_string_literal:
+ case tok::string_literal: {
+ FilenameBuffer.resize(FilenameTok.getLength());
+ FilenameStart = &FilenameBuffer[0];
+ unsigned Len = getSpelling(FilenameTok, FilenameStart);
+ FilenameEnd = FilenameStart+Len;
+ break;
+ }
+
+ case tok::less:
+ // This could be a <foo/bar.h> file coming from a macro expansion. In this
+ // case, glue the tokens together into FilenameBuffer and interpret those.
+ FilenameBuffer.push_back('<');
+ if (ConcatenateIncludeName(FilenameBuffer, *this))
+ return; // Found <eom> but no ">"? Diagnostic already emitted.
+ FilenameStart = &FilenameBuffer[0];
+ FilenameEnd = &FilenameBuffer[FilenameBuffer.size()];
+ break;
+ default:
+ Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename);
+ DiscardUntilEndOfDirective();
+ return;
+ }
+
+ bool isAngled = GetIncludeFilenameSpelling(FilenameTok.getLocation(),
+ FilenameStart, FilenameEnd);
+ // If GetIncludeFilenameSpelling set the start ptr to null, there was an
+ // error.
+ if (FilenameStart == 0) {
+ DiscardUntilEndOfDirective();
+ return;
+ }
+
+ // Verify that there is nothing after the filename, other than EOM. Use the
+ // preprocessor to lex this in case lexing the filename entered a macro.
+ CheckEndOfDirective("#include");
+
+ // Check that we don't have infinite #include recursion.
+ if (IncludeMacroStack.size() == MaxAllowedIncludeStackDepth-1)
+ return Diag(FilenameTok, diag::err_pp_include_too_deep);
+
+ // Search include directories.
+ const DirectoryLookup *CurDir;
+ const FileEntry *File = LookupFile(FilenameStart, FilenameEnd,
+ isAngled, LookupFrom, CurDir);
+ if (File == 0)
+ return Diag(FilenameTok, diag::err_pp_file_not_found,
+ std::string(FilenameStart, FilenameEnd));
+
+ // Ask HeaderInfo if we should enter this #include file.
+ if (!HeaderInfo.ShouldEnterIncludeFile(File, isImport)) {
+ // If it returns true, #including this file will have no effect.
+ return;
+ }
+
+ // Look up the file, create a File ID for it.
+ unsigned FileID = SourceMgr.createFileID(File, FilenameTok.getLocation());
+ if (FileID == 0)
+ return Diag(FilenameTok, diag::err_pp_file_not_found,
+ std::string(FilenameStart, FilenameEnd));
+
+ // Finally, if all is good, enter the new file!
+ EnterSourceFile(FileID, CurDir);
+}
+
+/// HandleIncludeNextDirective - Implements #include_next.
+///
+void Preprocessor::HandleIncludeNextDirective(Token &IncludeNextTok) {
+ Diag(IncludeNextTok, diag::ext_pp_include_next_directive);
+
+ // #include_next is like #include, except that we start searching after
+ // the current found directory. If we can't do this, issue a
+ // diagnostic.
+ const DirectoryLookup *Lookup = CurDirLookup;
+ if (isInPrimaryFile()) {
+ Lookup = 0;
+ Diag(IncludeNextTok, diag::pp_include_next_in_primary);
+ } else if (Lookup == 0) {
+ Diag(IncludeNextTok, diag::pp_include_next_absolute_path);
+ } else {
+ // Start looking up in the next directory.
+ ++Lookup;
+ }
+
+ return HandleIncludeDirective(IncludeNextTok, Lookup);
+}
+
+/// HandleImportDirective - Implements #import.
+///
+void Preprocessor::HandleImportDirective(Token &ImportTok) {
+ Diag(ImportTok, diag::ext_pp_import_directive);
+
+ return HandleIncludeDirective(ImportTok, 0, true);
+}
+
+//===----------------------------------------------------------------------===//
+// Preprocessor Macro Directive Handling.
+//===----------------------------------------------------------------------===//
+
+/// ReadMacroDefinitionArgList - The ( starting an argument list of a macro
+/// definition has just been read. Lex the rest of the arguments and the
+/// closing ), updating MI with what we learn. Return true if an error occurs
+/// parsing the arg list.
+bool Preprocessor::ReadMacroDefinitionArgList(MacroInfo *MI) {
+ llvm::SmallVector<IdentifierInfo*, 32> Arguments;
+
+ Token Tok;
+ while (1) {
+ LexUnexpandedToken(Tok);
+ switch (Tok.getKind()) {
+ case tok::r_paren:
+ // Found the end of the argument list.
+ if (Arguments.empty()) { // #define FOO()
+ MI->setArgumentList(Arguments.begin(), Arguments.end());
+ return false;
+ }
+ // Otherwise we have #define FOO(A,)
+ Diag(Tok, diag::err_pp_expected_ident_in_arg_list);
+ return true;
+ case tok::ellipsis: // #define X(... -> C99 varargs
+ // Warn if use of C99 feature in non-C99 mode.
+ if (!Features.C99) Diag(Tok, diag::ext_variadic_macro);
+
+ // Lex the token after the identifier.
+ LexUnexpandedToken(Tok);
+ if (Tok.isNot(tok::r_paren)) {
+ Diag(Tok, diag::err_pp_missing_rparen_in_macro_def);
+ return true;
+ }
+ // Add the __VA_ARGS__ identifier as an argument.
+ Arguments.push_back(Ident__VA_ARGS__);
+ MI->setIsC99Varargs();
+ MI->setArgumentList(Arguments.begin(), Arguments.end());
+ return false;
+ case tok::eom: // #define X(
+ Diag(Tok, diag::err_pp_missing_rparen_in_macro_def);
+ return true;
+ default:
+ // Handle keywords and identifiers here to accept things like
+ // #define Foo(for) for.
+ IdentifierInfo *II = Tok.getIdentifierInfo();
+ if (II == 0) {
+ // #define X(1
+ Diag(Tok, diag::err_pp_invalid_tok_in_arg_list);
+ return true;
+ }
+
+ // If this is already used as an argument, it is used multiple times (e.g.
+ // #define X(A,A.
+ if (std::find(Arguments.begin(), Arguments.end(), II) !=
+ Arguments.end()) { // C99 6.10.3p6
+ Diag(Tok, diag::err_pp_duplicate_name_in_arg_list, II->getName());
+ return true;
+ }
+
+ // Add the argument to the macro info.
+ Arguments.push_back(II);
+
+ // Lex the token after the identifier.
+ LexUnexpandedToken(Tok);
+
+ switch (Tok.getKind()) {
+ default: // #define X(A B
+ Diag(Tok, diag::err_pp_expected_comma_in_arg_list);
+ return true;
+ case tok::r_paren: // #define X(A)
+ MI->setArgumentList(Arguments.begin(), Arguments.end());
+ return false;
+ case tok::comma: // #define X(A,
+ break;
+ case tok::ellipsis: // #define X(A... -> GCC extension
+ // Diagnose extension.
+ Diag(Tok, diag::ext_named_variadic_macro);
+
+ // Lex the token after the identifier.
+ LexUnexpandedToken(Tok);
+ if (Tok.isNot(tok::r_paren)) {
+ Diag(Tok, diag::err_pp_missing_rparen_in_macro_def);
+ return true;
+ }
+
+ MI->setIsGNUVarargs();
+ MI->setArgumentList(Arguments.begin(), Arguments.end());
+ return false;
+ }
+ }
+ }
+}
+
+/// HandleDefineDirective - Implements #define. This consumes the entire macro
+/// line then lets the caller lex the next real token.
+void Preprocessor::HandleDefineDirective(Token &DefineTok) {
+ ++NumDefined;
+
+ Token MacroNameTok;
+ ReadMacroName(MacroNameTok, 1);
+
+ // Error reading macro name? If so, diagnostic already issued.
+ if (MacroNameTok.is(tok::eom))
+ return;
+
+ // If we are supposed to keep comments in #defines, reenable comment saving
+ // mode.
+ CurLexer->KeepCommentMode = KeepMacroComments;
+
+ // Create the new macro.
+ MacroInfo *MI = new MacroInfo(MacroNameTok.getLocation());
+
+ Token Tok;
+ LexUnexpandedToken(Tok);
+
+ // If this is a function-like macro definition, parse the argument list,
+ // marking each of the identifiers as being used as macro arguments. Also,
+ // check other constraints on the first token of the macro body.
+ if (Tok.is(tok::eom)) {
+ // If there is no body to this macro, we have no special handling here.
+ } else if (Tok.is(tok::l_paren) && !Tok.hasLeadingSpace()) {
+ // This is a function-like macro definition. Read the argument list.
+ MI->setIsFunctionLike();
+ if (ReadMacroDefinitionArgList(MI)) {
+ // Forget about MI.
+ delete MI;
+ // Throw away the rest of the line.
+ if (CurLexer->ParsingPreprocessorDirective)
+ DiscardUntilEndOfDirective();
+ return;
+ }
+
+ // Read the first token after the arg list for down below.
+ LexUnexpandedToken(Tok);
+ } else if (!Tok.hasLeadingSpace()) {
+ // C99 requires whitespace between the macro definition and the body. Emit
+ // a diagnostic for something like "#define X+".
+ if (Features.C99) {
+ Diag(Tok, diag::ext_c99_whitespace_required_after_macro_name);
+ } else {
+ // FIXME: C90/C++ do not get this diagnostic, but it does get a similar
+ // one in some cases!
+ }
+ } else {
+ // This is a normal token with leading space. Clear the leading space
+ // marker on the first token to get proper expansion.
+ Tok.clearFlag(Token::LeadingSpace);
+ }
+
+ // If this is a definition of a variadic C99 function-like macro, not using
+ // the GNU named varargs extension, enabled __VA_ARGS__.
+
+ // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro.
+ // This gets unpoisoned where it is allowed.
+ assert(Ident__VA_ARGS__->isPoisoned() && "__VA_ARGS__ should be poisoned!");
+ if (MI->isC99Varargs())
+ Ident__VA_ARGS__->setIsPoisoned(false);
+
+ // Read the rest of the macro body.
+ if (MI->isObjectLike()) {
+ // Object-like macros are very simple, just read their body.
+ while (Tok.isNot(tok::eom)) {
+ MI->AddTokenToBody(Tok);
+ // Get the next token of the macro.
+ LexUnexpandedToken(Tok);
+ }
+
+ } else {
+ // Otherwise, read the body of a function-like macro. This has to validate
+ // the # (stringize) operator.
+ while (Tok.isNot(tok::eom)) {
+ MI->AddTokenToBody(Tok);
+
+ // Check C99 6.10.3.2p1: ensure that # operators are followed by macro
+ // parameters in function-like macro expansions.
+ if (Tok.isNot(tok::hash)) {
+ // Get the next token of the macro.
+ LexUnexpandedToken(Tok);
+ continue;
+ }
+
+ // Get the next token of the macro.
+ LexUnexpandedToken(Tok);
+
+ // Not a macro arg identifier?
+ if (!Tok.getIdentifierInfo() ||
+ MI->getArgumentNum(Tok.getIdentifierInfo()) == -1) {
+ Diag(Tok, diag::err_pp_stringize_not_parameter);
+ delete MI;
+
+ // Disable __VA_ARGS__ again.
+ Ident__VA_ARGS__->setIsPoisoned(true);
+ return;
+ }
+
+ // Things look ok, add the param name token to the macro.
+ MI->AddTokenToBody(Tok);
+
+ // Get the next token of the macro.
+ LexUnexpandedToken(Tok);
+ }
+ }
+
+
+ // Disable __VA_ARGS__ again.
+ Ident__VA_ARGS__->setIsPoisoned(true);
+
+ // Check that there is no paste (##) operator at the begining or end of the
+ // replacement list.
+ unsigned NumTokens = MI->getNumTokens();
+ if (NumTokens != 0) {
+ if (MI->getReplacementToken(0).is(tok::hashhash)) {
+ Diag(MI->getReplacementToken(0), diag::err_paste_at_start);
+ delete MI;
+ return;
+ }
+ if (MI->getReplacementToken(NumTokens-1).is(tok::hashhash)) {
+ Diag(MI->getReplacementToken(NumTokens-1), diag::err_paste_at_end);
+ delete MI;
+ return;
+ }
+ }
+
+ // If this is the primary source file, remember that this macro hasn't been
+ // used yet.
+ if (isInPrimaryFile())
+ MI->setIsUsed(false);
+
+ // Finally, if this identifier already had a macro defined for it, verify that
+ // the macro bodies are identical and free the old definition.
+ if (MacroInfo *OtherMI = getMacroInfo(MacroNameTok.getIdentifierInfo())) {
+ if (!OtherMI->isUsed())
+ Diag(OtherMI->getDefinitionLoc(), diag::pp_macro_not_used);
+
+ // Macros must be identical. This means all tokes and whitespace separation
+ // must be the same. C99 6.10.3.2.
+ if (!MI->isIdenticalTo(*OtherMI, *this)) {
+ Diag(MI->getDefinitionLoc(), diag::ext_pp_macro_redef,
+ MacroNameTok.getIdentifierInfo()->getName());
+ Diag(OtherMI->getDefinitionLoc(), diag::ext_pp_macro_redef2);
+ }
+ delete OtherMI;
+ }
+
+ setMacroInfo(MacroNameTok.getIdentifierInfo(), MI);
+}
+
+/// HandleUndefDirective - Implements #undef.
+///
+void Preprocessor::HandleUndefDirective(Token &UndefTok) {
+ ++NumUndefined;
+
+ Token MacroNameTok;
+ ReadMacroName(MacroNameTok, 2);
+
+ // Error reading macro name? If so, diagnostic already issued.
+ if (MacroNameTok.is(tok::eom))
+ return;
+
+ // Check to see if this is the last token on the #undef line.
+ CheckEndOfDirective("#undef");
+
+ // Okay, we finally have a valid identifier to undef.
+ MacroInfo *MI = getMacroInfo(MacroNameTok.getIdentifierInfo());
+
+ // If the macro is not defined, this is a noop undef, just return.
+ if (MI == 0) return;
+
+ if (!MI->isUsed())
+ Diag(MI->getDefinitionLoc(), diag::pp_macro_not_used);
+
+ // Free macro definition.
+ delete MI;
+ setMacroInfo(MacroNameTok.getIdentifierInfo(), 0);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Preprocessor Conditional Directive Handling.
+//===----------------------------------------------------------------------===//
+
+/// HandleIfdefDirective - Implements the #ifdef/#ifndef directive. isIfndef is
+/// true when this is a #ifndef directive. ReadAnyTokensBeforeDirective is true
+/// if any tokens have been returned or pp-directives activated before this
+/// #ifndef has been lexed.
+///
+void Preprocessor::HandleIfdefDirective(Token &Result, bool isIfndef,
+ bool ReadAnyTokensBeforeDirective) {
+ ++NumIf;
+ Token DirectiveTok = Result;
+
+ Token MacroNameTok;
+ ReadMacroName(MacroNameTok);
+
+ // Error reading macro name? If so, diagnostic already issued.
+ if (MacroNameTok.is(tok::eom)) {
+ // Skip code until we get to #endif. This helps with recovery by not
+ // emitting an error when the #endif is reached.
+ SkipExcludedConditionalBlock(DirectiveTok.getLocation(),
+ /*Foundnonskip*/false, /*FoundElse*/false);
+ return;
+ }
+
+ // Check to see if this is the last token on the #if[n]def line.
+ CheckEndOfDirective(isIfndef ? "#ifndef" : "#ifdef");
+
+ if (CurLexer->getConditionalStackDepth() == 0) {
+ // If the start of a top-level #ifdef, inform MIOpt.
+ if (!ReadAnyTokensBeforeDirective) {
+ assert(isIfndef && "#ifdef shouldn't reach here");
+ CurLexer->MIOpt.EnterTopLevelIFNDEF(MacroNameTok.getIdentifierInfo());
+ } else
+ CurLexer->MIOpt.EnterTopLevelConditional();
+ }
+
+ IdentifierInfo *MII = MacroNameTok.getIdentifierInfo();
+ MacroInfo *MI = getMacroInfo(MII);
+
+ // If there is a macro, process it.
+ if (MI) // Mark it used.
+ MI->setIsUsed(true);
+
+ // Should we include the stuff contained by this directive?
+ if (!MI == isIfndef) {
+ // Yes, remember that we are inside a conditional, then lex the next token.
+ CurLexer->pushConditionalLevel(DirectiveTok.getLocation(), /*wasskip*/false,
+ /*foundnonskip*/true, /*foundelse*/false);
+ } else {
+ // No, skip the contents of this block and return the first token after it.
+ SkipExcludedConditionalBlock(DirectiveTok.getLocation(),
+ /*Foundnonskip*/false,
+ /*FoundElse*/false);
+ }
+}
+
+/// HandleIfDirective - Implements the #if directive.
+///
+void Preprocessor::HandleIfDirective(Token &IfToken,
+ bool ReadAnyTokensBeforeDirective) {
+ ++NumIf;
+
+ // Parse and evaluation the conditional expression.
+ IdentifierInfo *IfNDefMacro = 0;
+ bool ConditionalTrue = EvaluateDirectiveExpression(IfNDefMacro);
+
+ // Should we include the stuff contained by this directive?
+ if (ConditionalTrue) {
+ // If this condition is equivalent to #ifndef X, and if this is the first
+ // directive seen, handle it for the multiple-include optimization.
+ if (CurLexer->getConditionalStackDepth() == 0) {
+ if (!ReadAnyTokensBeforeDirective && IfNDefMacro)
+ CurLexer->MIOpt.EnterTopLevelIFNDEF(IfNDefMacro);
+ else
+ CurLexer->MIOpt.EnterTopLevelConditional();
+ }
+
+ // Yes, remember that we are inside a conditional, then lex the next token.
+ CurLexer->pushConditionalLevel(IfToken.getLocation(), /*wasskip*/false,
+ /*foundnonskip*/true, /*foundelse*/false);
+ } else {
+ // No, skip the contents of this block and return the first token after it.
+ SkipExcludedConditionalBlock(IfToken.getLocation(), /*Foundnonskip*/false,
+ /*FoundElse*/false);
+ }
+}
+
+/// HandleEndifDirective - Implements the #endif directive.
+///
+void Preprocessor::HandleEndifDirective(Token &EndifToken) {
+ ++NumEndif;
+
+ // Check that this is the whole directive.
+ CheckEndOfDirective("#endif");
+
+ PPConditionalInfo CondInfo;
+ if (CurLexer->popConditionalLevel(CondInfo)) {
+ // No conditionals on the stack: this is an #endif without an #if.
+ return Diag(EndifToken, diag::err_pp_endif_without_if);
+ }
+
+ // If this the end of a top-level #endif, inform MIOpt.
+ if (CurLexer->getConditionalStackDepth() == 0)
+ CurLexer->MIOpt.ExitTopLevelConditional();
+
+ assert(!CondInfo.WasSkipping && !CurLexer->LexingRawMode &&
+ "This code should only be reachable in the non-skipping case!");
+}
+
+
+void Preprocessor::HandleElseDirective(Token &Result) {
+ ++NumElse;
+
+ // #else directive in a non-skipping conditional... start skipping.
+ CheckEndOfDirective("#else");
+
+ PPConditionalInfo CI;
+ if (CurLexer->popConditionalLevel(CI))
+ return Diag(Result, diag::pp_err_else_without_if);
+
+ // If this is a top-level #else, inform the MIOpt.
+ if (CurLexer->getConditionalStackDepth() == 0)
+ CurLexer->MIOpt.EnterTopLevelConditional();
+
+ // If this is a #else with a #else before it, report the error.
+ if (CI.FoundElse) Diag(Result, diag::pp_err_else_after_else);
+
+ // Finally, skip the rest of the contents of this block and return the first
+ // token after it.
+ return SkipExcludedConditionalBlock(CI.IfLoc, /*Foundnonskip*/true,
+ /*FoundElse*/true);
+}
+
+void Preprocessor::HandleElifDirective(Token &ElifToken) {
+ ++NumElse;
+
+ // #elif directive in a non-skipping conditional... start skipping.
+ // We don't care what the condition is, because we will always skip it (since
+ // the block immediately before it was included).
+ DiscardUntilEndOfDirective();
+
+ PPConditionalInfo CI;
+ if (CurLexer->popConditionalLevel(CI))
+ return Diag(ElifToken, diag::pp_err_elif_without_if);
+
+ // If this is a top-level #elif, inform the MIOpt.
+ if (CurLexer->getConditionalStackDepth() == 0)
+ CurLexer->MIOpt.EnterTopLevelConditional();
+
+ // If this is a #elif with a #else before it, report the error.
+ if (CI.FoundElse) Diag(ElifToken, diag::pp_err_elif_after_else);
+
+ // Finally, skip the rest of the contents of this block and return the first
+ // token after it.
+ return SkipExcludedConditionalBlock(CI.IfLoc, /*Foundnonskip*/true,
+ /*FoundElse*/CI.FoundElse);
+}
+
diff --git a/clang/lib/Lex/PPExpressions.cpp b/clang/lib/Lex/PPExpressions.cpp
new file mode 100644
index 00000000000..cca76289176
--- /dev/null
+++ b/clang/lib/Lex/PPExpressions.cpp
@@ -0,0 +1,639 @@
+//===--- PPExpressions.cpp - Preprocessor Expression Evaluation -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Preprocessor::EvaluateDirectiveExpression method,
+// which parses and evaluates integer constant expressions for #if directives.
+//
+//===----------------------------------------------------------------------===//
+//
+// FIXME: implement testing for #assert's.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Lex/MacroInfo.h"
+#include "clang/Lex/LiteralSupport.h"
+#include "clang/Basic/TargetInfo.h"
+#include "clang/Basic/TokenKinds.h"
+#include "clang/Basic/Diagnostic.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/SmallString.h"
+using namespace clang;
+
+static bool EvaluateDirectiveSubExpr(llvm::APSInt &LHS, unsigned MinPrec,
+ Token &PeekTok, bool ValueLive,
+ Preprocessor &PP);
+
+/// DefinedTracker - This struct is used while parsing expressions to keep track
+/// of whether !defined(X) has been seen.
+///
+/// With this simple scheme, we handle the basic forms:
+/// !defined(X) and !defined X
+/// but we also trivially handle (silly) stuff like:
+/// !!!defined(X) and +!defined(X) and !+!+!defined(X) and !(defined(X)).
+struct DefinedTracker {
+ /// Each time a Value is evaluated, it returns information about whether the
+ /// parsed value is of the form defined(X), !defined(X) or is something else.
+ enum TrackerState {
+ DefinedMacro, // defined(X)
+ NotDefinedMacro, // !defined(X)
+ Unknown // Something else.
+ } State;
+ /// TheMacro - When the state is DefinedMacro or NotDefinedMacro, this
+ /// indicates the macro that was checked.
+ IdentifierInfo *TheMacro;
+};
+
+
+
+/// EvaluateValue - Evaluate the token PeekTok (and any others needed) and
+/// return the computed value in Result. Return true if there was an error
+/// parsing. This function also returns information about the form of the
+/// expression in DT. See above for information on what DT means.
+///
+/// If ValueLive is false, then this value is being evaluated in a context where
+/// the result is not used. As such, avoid diagnostics that relate to
+/// evaluation.
+static bool EvaluateValue(llvm::APSInt &Result, Token &PeekTok,
+ DefinedTracker &DT, bool ValueLive,
+ Preprocessor &PP) {
+ Result = 0;
+ DT.State = DefinedTracker::Unknown;
+
+ // If this token's spelling is a pp-identifier, check to see if it is
+ // 'defined' or if it is a macro. Note that we check here because many
+ // keywords are pp-identifiers, so we can't check the kind.
+ if (IdentifierInfo *II = PeekTok.getIdentifierInfo()) {
+ // If this identifier isn't 'defined' and it wasn't macro expanded, it turns
+ // into a simple 0, unless it is the C++ keyword "true", in which case it
+ // turns into "1".
+ if (II->getPPKeywordID() != tok::pp_defined) {
+ PP.Diag(PeekTok, diag::warn_pp_undef_identifier, II->getName());
+ Result = II->getTokenID() == tok::kw_true;
+ Result.setIsUnsigned(false); // "0" is signed intmax_t 0.
+ PP.LexNonComment(PeekTok);
+ return false;
+ }
+
+ // Handle "defined X" and "defined(X)".
+
+ // Get the next token, don't expand it.
+ PP.LexUnexpandedToken(PeekTok);
+
+ // Two options, it can either be a pp-identifier or a (.
+ bool InParens = false;
+ if (PeekTok.is(tok::l_paren)) {
+ // Found a paren, remember we saw it and skip it.
+ InParens = true;
+ PP.LexUnexpandedToken(PeekTok);
+ }
+
+ // If we don't have a pp-identifier now, this is an error.
+ if ((II = PeekTok.getIdentifierInfo()) == 0) {
+ PP.Diag(PeekTok, diag::err_pp_defined_requires_identifier);
+ return true;
+ }
+
+ // Otherwise, we got an identifier, is it defined to something?
+ Result = II->hasMacroDefinition();
+ Result.setIsUnsigned(false); // Result is signed intmax_t.
+
+ // If there is a macro, mark it used.
+ if (Result != 0 && ValueLive) {
+ MacroInfo *Macro = PP.getMacroInfo(II);
+ Macro->setIsUsed(true);
+ }
+
+ // Consume identifier.
+ PP.LexNonComment(PeekTok);
+
+ // If we are in parens, ensure we have a trailing ).
+ if (InParens) {
+ if (PeekTok.isNot(tok::r_paren)) {
+ PP.Diag(PeekTok, diag::err_pp_missing_rparen);
+ return true;
+ }
+ // Consume the ).
+ PP.LexNonComment(PeekTok);
+ }
+
+ // Success, remember that we saw defined(X).
+ DT.State = DefinedTracker::DefinedMacro;
+ DT.TheMacro = II;
+ return false;
+ }
+
+ switch (PeekTok.getKind()) {
+ default: // Non-value token.
+ PP.Diag(PeekTok, diag::err_pp_expr_bad_token);
+ return true;
+ case tok::eom:
+ case tok::r_paren:
+ // If there is no expression, report and exit.
+ PP.Diag(PeekTok, diag::err_pp_expected_value_in_expr);
+ return true;
+ case tok::numeric_constant: {
+ llvm::SmallString<64> IntegerBuffer;
+ IntegerBuffer.resize(PeekTok.getLength());
+ const char *ThisTokBegin = &IntegerBuffer[0];
+ unsigned ActualLength = PP.getSpelling(PeekTok, ThisTokBegin);
+ NumericLiteralParser Literal(ThisTokBegin, ThisTokBegin+ActualLength,
+ PeekTok.getLocation(), PP);
+ if (Literal.hadError)
+ return true; // a diagnostic was already reported.
+
+ if (Literal.isFloatingLiteral() || Literal.isImaginary) {
+ PP.Diag(PeekTok, diag::err_pp_illegal_floating_literal);
+ return true;
+ }
+ assert(Literal.isIntegerLiteral() && "Unknown ppnumber");
+
+ // long long is a C99 feature.
+ if (!PP.getLangOptions().C99 && !PP.getLangOptions().CPlusPlus0x
+ && Literal.isLongLong)
+ PP.Diag(PeekTok, diag::ext_longlong);
+
+ // Parse the integer literal into Result.
+ if (Literal.GetIntegerValue(Result)) {
+ // Overflow parsing integer literal.
+ if (ValueLive) PP.Diag(PeekTok, diag::warn_integer_too_large);
+ Result.setIsUnsigned(true);
+ } else {
+ // Set the signedness of the result to match whether there was a U suffix
+ // or not.
+ Result.setIsUnsigned(Literal.isUnsigned);
+
+ // Detect overflow based on whether the value is signed. If signed
+ // and if the value is too large, emit a warning "integer constant is so
+ // large that it is unsigned" e.g. on 12345678901234567890 where intmax_t
+ // is 64-bits.
+ if (!Literal.isUnsigned && Result.isNegative()) {
+ if (ValueLive)PP.Diag(PeekTok, diag::warn_integer_too_large_for_signed);
+ Result.setIsUnsigned(true);
+ }
+ }
+
+ // Consume the token.
+ PP.LexNonComment(PeekTok);
+ return false;
+ }
+ case tok::char_constant: { // 'x'
+ llvm::SmallString<32> CharBuffer;
+ CharBuffer.resize(PeekTok.getLength());
+ const char *ThisTokBegin = &CharBuffer[0];
+ unsigned ActualLength = PP.getSpelling(PeekTok, ThisTokBegin);
+ CharLiteralParser Literal(ThisTokBegin, ThisTokBegin+ActualLength,
+ PeekTok.getLocation(), PP);
+ if (Literal.hadError())
+ return true; // A diagnostic was already emitted.
+
+ // Character literals are always int or wchar_t, expand to intmax_t.
+ TargetInfo &TI = PP.getTargetInfo();
+ unsigned NumBits = TI.getCharWidth(Literal.isWide());
+
+ // Set the width.
+ llvm::APSInt Val(NumBits);
+ // Set the value.
+ Val = Literal.getValue();
+ // Set the signedness.
+ Val.setIsUnsigned(!TI.isCharSigned());
+
+ if (Result.getBitWidth() > Val.getBitWidth()) {
+ Result = Val.extend(Result.getBitWidth());
+ } else {
+ assert(Result.getBitWidth() == Val.getBitWidth() &&
+ "intmax_t smaller than char/wchar_t?");
+ Result = Val;
+ }
+
+ // Consume the token.
+ PP.LexNonComment(PeekTok);
+ return false;
+ }
+ case tok::l_paren:
+ PP.LexNonComment(PeekTok); // Eat the (.
+ // Parse the value and if there are any binary operators involved, parse
+ // them.
+ if (EvaluateValue(Result, PeekTok, DT, ValueLive, PP)) return true;
+
+ // If this is a silly value like (X), which doesn't need parens, check for
+ // !(defined X).
+ if (PeekTok.is(tok::r_paren)) {
+ // Just use DT unmodified as our result.
+ } else {
+ if (EvaluateDirectiveSubExpr(Result, 1, PeekTok, ValueLive, PP))
+ return true;
+
+ if (PeekTok.isNot(tok::r_paren)) {
+ PP.Diag(PeekTok, diag::err_pp_expected_rparen);
+ return true;
+ }
+ DT.State = DefinedTracker::Unknown;
+ }
+ PP.LexNonComment(PeekTok); // Eat the ).
+ return false;
+
+ case tok::plus:
+ // Unary plus doesn't modify the value.
+ PP.LexNonComment(PeekTok);
+ return EvaluateValue(Result, PeekTok, DT, ValueLive, PP);
+ case tok::minus: {
+ SourceLocation Loc = PeekTok.getLocation();
+ PP.LexNonComment(PeekTok);
+ if (EvaluateValue(Result, PeekTok, DT, ValueLive, PP)) return true;
+ // C99 6.5.3.3p3: The sign of the result matches the sign of the operand.
+ Result = -Result;
+
+ bool Overflow = false;
+ if (Result.isUnsigned())
+ Overflow = Result.isNegative();
+ else if (Result.isMinSignedValue())
+ Overflow = true; // -MININT is the only thing that overflows.
+
+ // If this operator is live and overflowed, report the issue.
+ if (Overflow && ValueLive)
+ PP.Diag(Loc, diag::warn_pp_expr_overflow);
+
+ DT.State = DefinedTracker::Unknown;
+ return false;
+ }
+
+ case tok::tilde:
+ PP.LexNonComment(PeekTok);
+ if (EvaluateValue(Result, PeekTok, DT, ValueLive, PP)) return true;
+ // C99 6.5.3.3p4: The sign of the result matches the sign of the operand.
+ Result = ~Result;
+ DT.State = DefinedTracker::Unknown;
+ return false;
+
+ case tok::exclaim:
+ PP.LexNonComment(PeekTok);
+ if (EvaluateValue(Result, PeekTok, DT, ValueLive, PP)) return true;
+ Result = !Result;
+ // C99 6.5.3.3p5: The sign of the result is 'int', aka it is signed.
+ Result.setIsUnsigned(false);
+
+ if (DT.State == DefinedTracker::DefinedMacro)
+ DT.State = DefinedTracker::NotDefinedMacro;
+ else if (DT.State == DefinedTracker::NotDefinedMacro)
+ DT.State = DefinedTracker::DefinedMacro;
+ return false;
+
+ // FIXME: Handle #assert
+ }
+}
+
+
+
+/// getPrecedence - Return the precedence of the specified binary operator
+/// token. This returns:
+/// ~0 - Invalid token.
+/// 14 - *,/,%
+/// 13 - -,+
+/// 12 - <<,>>
+/// 11 - >=, <=, >, <
+/// 10 - ==, !=
+/// 9 - &
+/// 8 - ^
+/// 7 - |
+/// 6 - &&
+/// 5 - ||
+/// 4 - ?
+/// 3 - :
+/// 0 - eom, )
+static unsigned getPrecedence(tok::TokenKind Kind) {
+ switch (Kind) {
+ default: return ~0U;
+ case tok::percent:
+ case tok::slash:
+ case tok::star: return 14;
+ case tok::plus:
+ case tok::minus: return 13;
+ case tok::lessless:
+ case tok::greatergreater: return 12;
+ case tok::lessequal:
+ case tok::less:
+ case tok::greaterequal:
+ case tok::greater: return 11;
+ case tok::exclaimequal:
+ case tok::equalequal: return 10;
+ case tok::amp: return 9;
+ case tok::caret: return 8;
+ case tok::pipe: return 7;
+ case tok::ampamp: return 6;
+ case tok::pipepipe: return 5;
+ case tok::question: return 4;
+ case tok::colon: return 3;
+ case tok::comma: return 2;
+ case tok::r_paren: return 0; // Lowest priority, end of expr.
+ case tok::eom: return 0; // Lowest priority, end of macro.
+ }
+}
+
+
+/// EvaluateDirectiveSubExpr - Evaluate the subexpression whose first token is
+/// PeekTok, and whose precedence is PeekPrec.
+///
+/// If ValueLive is false, then this value is being evaluated in a context where
+/// the result is not used. As such, avoid diagnostics that relate to
+/// evaluation.
+static bool EvaluateDirectiveSubExpr(llvm::APSInt &LHS, unsigned MinPrec,
+ Token &PeekTok, bool ValueLive,
+ Preprocessor &PP) {
+ unsigned PeekPrec = getPrecedence(PeekTok.getKind());
+ // If this token isn't valid, report the error.
+ if (PeekPrec == ~0U) {
+ PP.Diag(PeekTok, diag::err_pp_expr_bad_token);
+ return true;
+ }
+
+ while (1) {
+ // If this token has a lower precedence than we are allowed to parse, return
+ // it so that higher levels of the recursion can parse it.
+ if (PeekPrec < MinPrec)
+ return false;
+
+ tok::TokenKind Operator = PeekTok.getKind();
+
+ // If this is a short-circuiting operator, see if the RHS of the operator is
+ // dead. Note that this cannot just clobber ValueLive. Consider
+ // "0 && 1 ? 4 : 1 / 0", which is parsed as "(0 && 1) ? 4 : (1 / 0)". In
+ // this example, the RHS of the && being dead does not make the rest of the
+ // expr dead.
+ bool RHSIsLive;
+ if (Operator == tok::ampamp && LHS == 0)
+ RHSIsLive = false; // RHS of "0 && x" is dead.
+ else if (Operator == tok::pipepipe && LHS != 0)
+ RHSIsLive = false; // RHS of "1 || x" is dead.
+ else if (Operator == tok::question && LHS == 0)
+ RHSIsLive = false; // RHS (x) of "0 ? x : y" is dead.
+ else
+ RHSIsLive = ValueLive;
+
+ // Consume the operator, saving the operator token for error reporting.
+ Token OpToken = PeekTok;
+ PP.LexNonComment(PeekTok);
+
+ llvm::APSInt RHS(LHS.getBitWidth());
+ // Parse the RHS of the operator.
+ DefinedTracker DT;
+ if (EvaluateValue(RHS, PeekTok, DT, RHSIsLive, PP)) return true;
+
+ // Remember the precedence of this operator and get the precedence of the
+ // operator immediately to the right of the RHS.
+ unsigned ThisPrec = PeekPrec;
+ PeekPrec = getPrecedence(PeekTok.getKind());
+
+ // If this token isn't valid, report the error.
+ if (PeekPrec == ~0U) {
+ PP.Diag(PeekTok, diag::err_pp_expr_bad_token);
+ return true;
+ }
+
+ bool isRightAssoc = Operator == tok::question;
+
+ // Get the precedence of the operator to the right of the RHS. If it binds
+ // more tightly with RHS than we do, evaluate it completely first.
+ if (ThisPrec < PeekPrec ||
+ (ThisPrec == PeekPrec && isRightAssoc)) {
+ if (EvaluateDirectiveSubExpr(RHS, ThisPrec+1, PeekTok, RHSIsLive, PP))
+ return true;
+ PeekPrec = getPrecedence(PeekTok.getKind());
+ }
+ assert(PeekPrec <= ThisPrec && "Recursion didn't work!");
+
+ // Usual arithmetic conversions (C99 6.3.1.8p1): result is unsigned if
+ // either operand is unsigned. Don't do this for x and y in "x ? y : z".
+ llvm::APSInt Res(LHS.getBitWidth());
+ if (Operator != tok::question) {
+ Res.setIsUnsigned(LHS.isUnsigned()|RHS.isUnsigned());
+ // If this just promoted something from signed to unsigned, and if the
+ // value was negative, warn about it.
+ if (ValueLive && Res.isUnsigned()) {
+ if (!LHS.isUnsigned() && LHS.isNegative())
+ PP.Diag(OpToken, diag::warn_pp_convert_lhs_to_positive,
+ LHS.toStringSigned() + " to " + LHS.toStringUnsigned());
+ if (!RHS.isUnsigned() && RHS.isNegative())
+ PP.Diag(OpToken, diag::warn_pp_convert_rhs_to_positive,
+ RHS.toStringSigned() + " to " + RHS.toStringUnsigned());
+ }
+ LHS.setIsUnsigned(Res.isUnsigned());
+ RHS.setIsUnsigned(Res.isUnsigned());
+ }
+
+ // FIXME: All of these should detect and report overflow??
+ bool Overflow = false;
+ switch (Operator) {
+ default: assert(0 && "Unknown operator token!");
+ case tok::percent:
+ if (RHS == 0) {
+ if (ValueLive) PP.Diag(OpToken, diag::err_pp_remainder_by_zero);
+ return true;
+ }
+ Res = LHS % RHS;
+ break;
+ case tok::slash:
+ if (RHS == 0) {
+ if (ValueLive) PP.Diag(OpToken, diag::err_pp_division_by_zero);
+ return true;
+ }
+ Res = LHS / RHS;
+ if (LHS.isSigned())
+ Overflow = LHS.isMinSignedValue() && RHS.isAllOnesValue(); // MININT/-1
+ break;
+ case tok::star:
+ Res = LHS * RHS;
+ if (LHS != 0 && RHS != 0)
+ Overflow = Res/RHS != LHS || Res/LHS != RHS;
+ break;
+ case tok::lessless: {
+ // Determine whether overflow is about to happen.
+ unsigned ShAmt = static_cast<unsigned>(RHS.getLimitedValue());
+ if (ShAmt >= LHS.getBitWidth())
+ Overflow = true, ShAmt = LHS.getBitWidth()-1;
+ else if (LHS.isUnsigned())
+ Overflow = ShAmt > LHS.countLeadingZeros();
+ else if (LHS.isNonNegative())
+ Overflow = ShAmt >= LHS.countLeadingZeros(); // Don't allow sign change.
+ else
+ Overflow = ShAmt >= LHS.countLeadingOnes();
+
+ Res = LHS << ShAmt;
+ break;
+ }
+ case tok::greatergreater: {
+ // Determine whether overflow is about to happen.
+ unsigned ShAmt = static_cast<unsigned>(RHS.getLimitedValue());
+ if (ShAmt >= LHS.getBitWidth())
+ Overflow = true, ShAmt = LHS.getBitWidth()-1;
+ Res = LHS >> ShAmt;
+ break;
+ }
+ case tok::plus:
+ Res = LHS + RHS;
+ if (LHS.isUnsigned())
+ Overflow = Res.ult(LHS);
+ else if (LHS.isNonNegative() == RHS.isNonNegative() &&
+ Res.isNonNegative() != LHS.isNonNegative())
+ Overflow = true; // Overflow for signed addition.
+ break;
+ case tok::minus:
+ Res = LHS - RHS;
+ if (LHS.isUnsigned())
+ Overflow = Res.ugt(LHS);
+ else if (LHS.isNonNegative() != RHS.isNonNegative() &&
+ Res.isNonNegative() != LHS.isNonNegative())
+ Overflow = true; // Overflow for signed subtraction.
+ break;
+ case tok::lessequal:
+ Res = LHS <= RHS;
+ Res.setIsUnsigned(false); // C99 6.5.8p6, result is always int (signed)
+ break;
+ case tok::less:
+ Res = LHS < RHS;
+ Res.setIsUnsigned(false); // C99 6.5.8p6, result is always int (signed)
+ break;
+ case tok::greaterequal:
+ Res = LHS >= RHS;
+ Res.setIsUnsigned(false); // C99 6.5.8p6, result is always int (signed)
+ break;
+ case tok::greater:
+ Res = LHS > RHS;
+ Res.setIsUnsigned(false); // C99 6.5.8p6, result is always int (signed)
+ break;
+ case tok::exclaimequal:
+ Res = LHS != RHS;
+ Res.setIsUnsigned(false); // C99 6.5.9p3, result is always int (signed)
+ break;
+ case tok::equalequal:
+ Res = LHS == RHS;
+ Res.setIsUnsigned(false); // C99 6.5.9p3, result is always int (signed)
+ break;
+ case tok::amp:
+ Res = LHS & RHS;
+ break;
+ case tok::caret:
+ Res = LHS ^ RHS;
+ break;
+ case tok::pipe:
+ Res = LHS | RHS;
+ break;
+ case tok::ampamp:
+ Res = (LHS != 0 && RHS != 0);
+ Res.setIsUnsigned(false); // C99 6.5.13p3, result is always int (signed)
+ break;
+ case tok::pipepipe:
+ Res = (LHS != 0 || RHS != 0);
+ Res.setIsUnsigned(false); // C99 6.5.14p3, result is always int (signed)
+ break;
+ case tok::comma:
+ PP.Diag(OpToken, diag::ext_pp_comma_expr);
+ Res = RHS; // LHS = LHS,RHS -> RHS.
+ break;
+ case tok::question: {
+ // Parse the : part of the expression.
+ if (PeekTok.isNot(tok::colon)) {
+ PP.Diag(OpToken, diag::err_pp_question_without_colon);
+ return true;
+ }
+ // Consume the :.
+ PP.LexNonComment(PeekTok);
+
+ // Evaluate the value after the :.
+ bool AfterColonLive = ValueLive && LHS == 0;
+ llvm::APSInt AfterColonVal(LHS.getBitWidth());
+ DefinedTracker DT;
+ if (EvaluateValue(AfterColonVal, PeekTok, DT, AfterColonLive, PP))
+ return true;
+
+ // Parse anything after the : RHS that has a higher precedence than ?.
+ if (EvaluateDirectiveSubExpr(AfterColonVal, ThisPrec+1,
+ PeekTok, AfterColonLive, PP))
+ return true;
+
+ // Now that we have the condition, the LHS and the RHS of the :, evaluate.
+ Res = LHS != 0 ? RHS : AfterColonVal;
+
+ // Usual arithmetic conversions (C99 6.3.1.8p1): result is unsigned if
+ // either operand is unsigned.
+ Res.setIsUnsigned(RHS.isUnsigned() | AfterColonVal.isUnsigned());
+
+ // Figure out the precedence of the token after the : part.
+ PeekPrec = getPrecedence(PeekTok.getKind());
+ break;
+ }
+ case tok::colon:
+ // Don't allow :'s to float around without being part of ?: exprs.
+ PP.Diag(OpToken, diag::err_pp_colon_without_question);
+ return true;
+ }
+
+ // If this operator is live and overflowed, report the issue.
+ if (Overflow && ValueLive)
+ PP.Diag(OpToken, diag::warn_pp_expr_overflow);
+
+ // Put the result back into 'LHS' for our next iteration.
+ LHS = Res;
+ }
+
+ return false;
+}
+
+/// EvaluateDirectiveExpression - Evaluate an integer constant expression that
+/// may occur after a #if or #elif directive. If the expression is equivalent
+/// to "!defined(X)" return X in IfNDefMacro.
+bool Preprocessor::
+EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) {
+ // Peek ahead one token.
+ Token Tok;
+ Lex(Tok);
+
+ // C99 6.10.1p3 - All expressions are evaluated as intmax_t or uintmax_t.
+ unsigned BitWidth = getTargetInfo().getIntMaxTWidth();
+
+ llvm::APSInt ResVal(BitWidth);
+ DefinedTracker DT;
+ if (EvaluateValue(ResVal, Tok, DT, true, *this)) {
+ // Parse error, skip the rest of the macro line.
+ if (Tok.isNot(tok::eom))
+ DiscardUntilEndOfDirective();
+ return false;
+ }
+
+ // If we are at the end of the expression after just parsing a value, there
+ // must be no (unparenthesized) binary operators involved, so we can exit
+ // directly.
+ if (Tok.is(tok::eom)) {
+ // If the expression we parsed was of the form !defined(macro), return the
+ // macro in IfNDefMacro.
+ if (DT.State == DefinedTracker::NotDefinedMacro)
+ IfNDefMacro = DT.TheMacro;
+
+ return ResVal != 0;
+ }
+
+ // Otherwise, we must have a binary operator (e.g. "#if 1 < 2"), so parse the
+ // operator and the stuff after it.
+ if (EvaluateDirectiveSubExpr(ResVal, 1, Tok, true, *this)) {
+ // Parse error, skip the rest of the macro line.
+ if (Tok.isNot(tok::eom))
+ DiscardUntilEndOfDirective();
+ return false;
+ }
+
+ // If we aren't at the tok::eom token, something bad happened, like an extra
+ // ')' token.
+ if (Tok.isNot(tok::eom)) {
+ Diag(Tok, diag::err_pp_expected_eol);
+ DiscardUntilEndOfDirective();
+ }
+
+ return ResVal != 0;
+}
+
diff --git a/clang/lib/Lex/PPLexerChange.cpp b/clang/lib/Lex/PPLexerChange.cpp
new file mode 100644
index 00000000000..bd0ff7f94a1
--- /dev/null
+++ b/clang/lib/Lex/PPLexerChange.cpp
@@ -0,0 +1,401 @@
+//===--- PPLexerChange.cpp - Handle changing lexers in the preprocessor ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements pieces of the Preprocessor interface that manage the
+// current lexer stack.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Lex/HeaderSearch.h"
+#include "clang/Lex/MacroInfo.h"
+#include "clang/Lex/PPCallbacks.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/SourceManager.h"
+using namespace clang;
+
+PPCallbacks::~PPCallbacks() {
+}
+
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Methods.
+//===----------------------------------------------------------------------===//
+
+/// isInPrimaryFile - Return true if we're in the top-level file, not in a
+/// #include. This looks through macro expansions and active _Pragma lexers.
+bool Preprocessor::isInPrimaryFile() const {
+ if (CurLexer && !CurLexer->Is_PragmaLexer)
+ return IncludeMacroStack.empty();
+
+ // If there are any stacked lexers, we're in a #include.
+ assert(IncludeMacroStack[0].TheLexer &&
+ !IncludeMacroStack[0].TheLexer->Is_PragmaLexer &&
+ "Top level include stack isn't our primary lexer?");
+ for (unsigned i = 1, e = IncludeMacroStack.size(); i != e; ++i)
+ if (IncludeMacroStack[i].TheLexer &&
+ !IncludeMacroStack[i].TheLexer->Is_PragmaLexer)
+ return false;
+ return true;
+}
+
+/// getCurrentLexer - Return the current file lexer being lexed from. Note
+/// that this ignores any potentially active macro expansions and _Pragma
+/// expansions going on at the time.
+Lexer *Preprocessor::getCurrentFileLexer() const {
+ if (CurLexer && !CurLexer->Is_PragmaLexer) return CurLexer;
+
+ // Look for a stacked lexer.
+ for (unsigned i = IncludeMacroStack.size(); i != 0; --i) {
+ Lexer *L = IncludeMacroStack[i-1].TheLexer;
+ if (L && !L->Is_PragmaLexer) // Ignore macro & _Pragma expansions.
+ return L;
+ }
+ return 0;
+}
+
+/// LookAhead - This peeks ahead N tokens and returns that token without
+/// consuming any tokens. LookAhead(0) returns 'Tok', LookAhead(1) returns
+/// the token after Tok, etc.
+///
+/// NOTE: is a relatively expensive method, so it should not be used in common
+/// code paths if possible!
+///
+Token Preprocessor::LookAhead(unsigned N) {
+ // FIXME: Optimize the case where multiple lookahead calls are used back to
+ // back. Consider if the the parser contained (dynamically):
+ // Lookahead(1); Lookahead(1); Lookahead(1)
+ // This would return the same token 3 times, but would end up making lots of
+ // token stream lexers to do it. To handle this common case, see if the top
+ // of the lexer stack is a TokenStreamLexer with macro expansion disabled. If
+ // so, see if it has 'N' tokens available in it. If so, just return the
+ // token.
+
+ // FIXME: Optimize the case when the parser does multiple nearby lookahead
+ // calls. For example, consider:
+ // Lookahead(0); Lookahead(1); Lookahead(2);
+ // The previous optimization won't apply, and there won't be any space left in
+ // the array that was previously new'd. To handle this, always round up the
+ // size we new to a multiple of 16 tokens. If the previous buffer has space
+ // left, we can just grow it. This means we only have to do the new 1/16th as
+ // often.
+
+ Token *LookaheadTokens = new Token[N];
+
+ // Read N+1 tokens into LookaheadTokens. After this loop, Tok is the token
+ // to return.
+ Token Tok;
+ unsigned NumTokens = 0;
+ for (; N != ~0U; --N, ++NumTokens) {
+ Lex(Tok);
+ LookaheadTokens[NumTokens] = Tok;
+
+ // If we got to EOF, don't lex past it. This will cause LookAhead to return
+ // the EOF token.
+ if (Tok.is(tok::eof))
+ break;
+ }
+
+ // Okay, at this point, we have the token we want to return in Tok. However,
+ // we read it and a bunch of other stuff (in LookaheadTokens) that we must
+ // allow subsequent calls to 'Lex' to return. To do this, we push a new token
+ // lexer onto the lexer stack with the tokens we read here. This passes
+ // ownership of LookaheadTokens to EnterTokenStream.
+ //
+ // Note that we disable macro expansion of the tokens from this buffer, since
+ // any macros have already been expanded, and the internal preprocessor state
+ // may already read past new macros. Consider something like LookAhead(1) on
+ // X
+ // #define X 14
+ // Y
+ // The lookahead call should return 'Y', and the next Lex call should return
+ // 'X' even though X -> 14 has already been entered as a macro.
+ //
+ EnterTokenStream(LookaheadTokens, NumTokens, true /*DisableExpansion*/,
+ true /*OwnsTokens*/);
+ return Tok;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Methods for Entering and Callbacks for leaving various contexts
+//===----------------------------------------------------------------------===//
+
+/// EnterSourceFile - Add a source file to the top of the include stack and
+/// start lexing tokens from it instead of the current buffer. Return true
+/// on failure.
+void Preprocessor::EnterSourceFile(unsigned FileID,
+ const DirectoryLookup *CurDir) {
+ assert(CurTokenLexer == 0 && "Cannot #include a file inside a macro!");
+ ++NumEnteredSourceFiles;
+
+ if (MaxIncludeStackDepth < IncludeMacroStack.size())
+ MaxIncludeStackDepth = IncludeMacroStack.size();
+
+ Lexer *TheLexer = new Lexer(SourceLocation::getFileLoc(FileID, 0), *this);
+ EnterSourceFileWithLexer(TheLexer, CurDir);
+}
+
+/// EnterSourceFile - Add a source file to the top of the include stack and
+/// start lexing tokens from it instead of the current buffer.
+void Preprocessor::EnterSourceFileWithLexer(Lexer *TheLexer,
+ const DirectoryLookup *CurDir) {
+
+ // Add the current lexer to the include stack.
+ if (CurLexer || CurTokenLexer)
+ IncludeMacroStack.push_back(IncludeStackInfo(CurLexer, CurDirLookup,
+ CurTokenLexer));
+
+ CurLexer = TheLexer;
+ CurDirLookup = CurDir;
+ CurTokenLexer = 0;
+
+ // Notify the client, if desired, that we are in a new source file.
+ if (Callbacks && !CurLexer->Is_PragmaLexer) {
+ DirectoryLookup::DirType FileType = DirectoryLookup::NormalHeaderDir;
+
+ // Get the file entry for the current file.
+ if (const FileEntry *FE =
+ SourceMgr.getFileEntryForLoc(CurLexer->getFileLoc()))
+ FileType = HeaderInfo.getFileDirFlavor(FE);
+
+ Callbacks->FileChanged(CurLexer->getFileLoc(),
+ PPCallbacks::EnterFile, FileType);
+ }
+}
+
+
+
+/// EnterMacro - Add a Macro to the top of the include stack and start lexing
+/// tokens from it instead of the current buffer.
+void Preprocessor::EnterMacro(Token &Tok, MacroArgs *Args) {
+ IncludeMacroStack.push_back(IncludeStackInfo(CurLexer, CurDirLookup,
+ CurTokenLexer));
+ CurLexer = 0;
+ CurDirLookup = 0;
+
+ if (NumCachedTokenLexers == 0) {
+ CurTokenLexer = new TokenLexer(Tok, Args, *this);
+ } else {
+ CurTokenLexer = TokenLexerCache[--NumCachedTokenLexers];
+ CurTokenLexer->Init(Tok, Args);
+ }
+}
+
+/// EnterTokenStream - Add a "macro" context to the top of the include stack,
+/// which will cause the lexer to start returning the specified tokens.
+///
+/// If DisableMacroExpansion is true, tokens lexed from the token stream will
+/// not be subject to further macro expansion. Otherwise, these tokens will
+/// be re-macro-expanded when/if expansion is enabled.
+///
+/// If OwnsTokens is false, this method assumes that the specified stream of
+/// tokens has a permanent owner somewhere, so they do not need to be copied.
+/// If it is true, it assumes the array of tokens is allocated with new[] and
+/// must be freed.
+///
+void Preprocessor::EnterTokenStream(const Token *Toks, unsigned NumToks,
+ bool DisableMacroExpansion,
+ bool OwnsTokens) {
+ // Save our current state.
+ IncludeMacroStack.push_back(IncludeStackInfo(CurLexer, CurDirLookup,
+ CurTokenLexer));
+ CurLexer = 0;
+ CurDirLookup = 0;
+
+ // Create a macro expander to expand from the specified token stream.
+ if (NumCachedTokenLexers == 0) {
+ CurTokenLexer = new TokenLexer(Toks, NumToks, DisableMacroExpansion,
+ OwnsTokens, *this);
+ } else {
+ CurTokenLexer = TokenLexerCache[--NumCachedTokenLexers];
+ CurTokenLexer->Init(Toks, NumToks, DisableMacroExpansion, OwnsTokens);
+ }
+}
+
+/// HandleEndOfFile - This callback is invoked when the lexer hits the end of
+/// the current file. This either returns the EOF token or pops a level off
+/// the include stack and keeps going.
+bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) {
+ assert(!CurTokenLexer &&
+ "Ending a file when currently in a macro!");
+
+ // See if this file had a controlling macro.
+ if (CurLexer) { // Not ending a macro, ignore it.
+ if (const IdentifierInfo *ControllingMacro =
+ CurLexer->MIOpt.GetControllingMacroAtEndOfFile()) {
+ // Okay, this has a controlling macro, remember in PerFileInfo.
+ if (const FileEntry *FE =
+ SourceMgr.getFileEntryForLoc(CurLexer->getFileLoc()))
+ HeaderInfo.SetFileControllingMacro(FE, ControllingMacro);
+ }
+ }
+
+ // If this is a #include'd file, pop it off the include stack and continue
+ // lexing the #includer file.
+ if (!IncludeMacroStack.empty()) {
+ // We're done with the #included file.
+ RemoveTopOfLexerStack();
+
+ // Notify the client, if desired, that we are in a new source file.
+ if (Callbacks && !isEndOfMacro && CurLexer) {
+ DirectoryLookup::DirType FileType = DirectoryLookup::NormalHeaderDir;
+
+ // Get the file entry for the current file.
+ if (const FileEntry *FE =
+ SourceMgr.getFileEntryForLoc(CurLexer->getFileLoc()))
+ FileType = HeaderInfo.getFileDirFlavor(FE);
+
+ Callbacks->FileChanged(CurLexer->getSourceLocation(CurLexer->BufferPtr),
+ PPCallbacks::ExitFile, FileType);
+ }
+
+ // Client should lex another token.
+ return false;
+ }
+
+ // If the file ends with a newline, form the EOF token on the newline itself,
+ // rather than "on the line following it", which doesn't exist. This makes
+ // diagnostics relating to the end of file include the last file that the user
+ // actually typed, which is goodness.
+ const char *EndPos = CurLexer->BufferEnd;
+ if (EndPos != CurLexer->BufferStart &&
+ (EndPos[-1] == '\n' || EndPos[-1] == '\r')) {
+ --EndPos;
+
+ // Handle \n\r and \r\n:
+ if (EndPos != CurLexer->BufferStart &&
+ (EndPos[-1] == '\n' || EndPos[-1] == '\r') &&
+ EndPos[-1] != EndPos[0])
+ --EndPos;
+ }
+
+ Result.startToken();
+ CurLexer->BufferPtr = EndPos;
+ CurLexer->FormTokenWithChars(Result, EndPos);
+ Result.setKind(tok::eof);
+
+ // We're done with the #included file.
+ delete CurLexer;
+ CurLexer = 0;
+
+ // This is the end of the top-level file. If the diag::pp_macro_not_used
+ // diagnostic is enabled, look for macros that have not been used.
+ if (Diags.getDiagnosticLevel(diag::pp_macro_not_used) != Diagnostic::Ignored){
+ for (llvm::DenseMap<IdentifierInfo*, MacroInfo*>::iterator I =
+ Macros.begin(), E = Macros.end(); I != E; ++I) {
+ if (!I->second->isUsed())
+ Diag(I->second->getDefinitionLoc(), diag::pp_macro_not_used);
+ }
+ }
+ return true;
+}
+
+/// HandleEndOfTokenLexer - This callback is invoked when the current TokenLexer
+/// hits the end of its token stream.
+bool Preprocessor::HandleEndOfTokenLexer(Token &Result) {
+ assert(CurTokenLexer && !CurLexer &&
+ "Ending a macro when currently in a #include file!");
+
+ // Delete or cache the now-dead macro expander.
+ if (NumCachedTokenLexers == TokenLexerCacheSize)
+ delete CurTokenLexer;
+ else
+ TokenLexerCache[NumCachedTokenLexers++] = CurTokenLexer;
+
+ // Handle this like a #include file being popped off the stack.
+ CurTokenLexer = 0;
+ return HandleEndOfFile(Result, true);
+}
+
+/// RemoveTopOfLexerStack - Pop the current lexer/macro exp off the top of the
+/// lexer stack. This should only be used in situations where the current
+/// state of the top-of-stack lexer is unknown.
+void Preprocessor::RemoveTopOfLexerStack() {
+ assert(!IncludeMacroStack.empty() && "Ran out of stack entries to load");
+
+ if (CurTokenLexer) {
+ // Delete or cache the now-dead macro expander.
+ if (NumCachedTokenLexers == TokenLexerCacheSize)
+ delete CurTokenLexer;
+ else
+ TokenLexerCache[NumCachedTokenLexers++] = CurTokenLexer;
+ } else {
+ delete CurLexer;
+ }
+ CurLexer = IncludeMacroStack.back().TheLexer;
+ CurDirLookup = IncludeMacroStack.back().TheDirLookup;
+ CurTokenLexer = IncludeMacroStack.back().TheTokenLexer;
+ IncludeMacroStack.pop_back();
+}
+
+/// HandleMicrosoftCommentPaste - When the macro expander pastes together a
+/// comment (/##/) in microsoft mode, this method handles updating the current
+/// state, returning the token on the next source line.
+void Preprocessor::HandleMicrosoftCommentPaste(Token &Tok) {
+ assert(CurTokenLexer && !CurLexer &&
+ "Pasted comment can only be formed from macro");
+
+ // We handle this by scanning for the closest real lexer, switching it to
+ // raw mode and preprocessor mode. This will cause it to return \n as an
+ // explicit EOM token.
+ Lexer *FoundLexer = 0;
+ bool LexerWasInPPMode = false;
+ for (unsigned i = 0, e = IncludeMacroStack.size(); i != e; ++i) {
+ IncludeStackInfo &ISI = *(IncludeMacroStack.end()-i-1);
+ if (ISI.TheLexer == 0) continue; // Scan for a real lexer.
+
+ // Once we find a real lexer, mark it as raw mode (disabling macro
+ // expansions) and preprocessor mode (return EOM). We know that the lexer
+ // was *not* in raw mode before, because the macro that the comment came
+ // from was expanded. However, it could have already been in preprocessor
+ // mode (#if COMMENT) in which case we have to return it to that mode and
+ // return EOM.
+ FoundLexer = ISI.TheLexer;
+ FoundLexer->LexingRawMode = true;
+ LexerWasInPPMode = FoundLexer->ParsingPreprocessorDirective;
+ FoundLexer->ParsingPreprocessorDirective = true;
+ break;
+ }
+
+ // Okay, we either found and switched over the lexer, or we didn't find a
+ // lexer. In either case, finish off the macro the comment came from, getting
+ // the next token.
+ if (!HandleEndOfTokenLexer(Tok)) Lex(Tok);
+
+ // Discarding comments as long as we don't have EOF or EOM. This 'comments
+ // out' the rest of the line, including any tokens that came from other macros
+ // that were active, as in:
+ // #define submacro a COMMENT b
+ // submacro c
+ // which should lex to 'a' only: 'b' and 'c' should be removed.
+ while (Tok.isNot(tok::eom) && Tok.isNot(tok::eof))
+ Lex(Tok);
+
+ // If we got an eom token, then we successfully found the end of the line.
+ if (Tok.is(tok::eom)) {
+ assert(FoundLexer && "Can't get end of line without an active lexer");
+ // Restore the lexer back to normal mode instead of raw mode.
+ FoundLexer->LexingRawMode = false;
+
+ // If the lexer was already in preprocessor mode, just return the EOM token
+ // to finish the preprocessor line.
+ if (LexerWasInPPMode) return;
+
+ // Otherwise, switch out of PP mode and return the next lexed token.
+ FoundLexer->ParsingPreprocessorDirective = false;
+ return Lex(Tok);
+ }
+
+ // If we got an EOF token, then we reached the end of the token stream but
+ // didn't find an explicit \n. This can only happen if there was no lexer
+ // active (an active lexer would return EOM at EOF if there was no \n in
+ // preprocessor directive mode), so just return EOF as our token.
+ assert(!FoundLexer && "Lexer should return EOM before EOF in PP mode");
+}
diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp
new file mode 100644
index 00000000000..8218d0ac06e
--- /dev/null
+++ b/clang/lib/Lex/PPMacroExpansion.cpp
@@ -0,0 +1,523 @@
+//===--- MacroExpansion.cpp - Top level Macro Expansion -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the top level handling of macro expasion for the
+// preprocessor.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/Preprocessor.h"
+#include "MacroArgs.h"
+#include "clang/Lex/MacroInfo.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/Diagnostic.h"
+using namespace clang;
+
+/// setMacroInfo - Specify a macro for this identifier.
+///
+void Preprocessor::setMacroInfo(IdentifierInfo *II, MacroInfo *MI) {
+ if (MI == 0) {
+ if (II->hasMacroDefinition()) {
+ Macros.erase(II);
+ II->setHasMacroDefinition(false);
+ }
+ } else {
+ Macros[II] = MI;
+ II->setHasMacroDefinition(true);
+ }
+}
+
+/// RegisterBuiltinMacro - Register the specified identifier in the identifier
+/// table and mark it as a builtin macro to be expanded.
+IdentifierInfo *Preprocessor::RegisterBuiltinMacro(const char *Name) {
+ // Get the identifier.
+ IdentifierInfo *Id = getIdentifierInfo(Name);
+
+ // Mark it as being a macro that is builtin.
+ MacroInfo *MI = new MacroInfo(SourceLocation());
+ MI->setIsBuiltinMacro();
+ setMacroInfo(Id, MI);
+ return Id;
+}
+
+
+/// RegisterBuiltinMacros - Register builtin macros, such as __LINE__ with the
+/// identifier table.
+void Preprocessor::RegisterBuiltinMacros() {
+ Ident__LINE__ = RegisterBuiltinMacro("__LINE__");
+ Ident__FILE__ = RegisterBuiltinMacro("__FILE__");
+ Ident__DATE__ = RegisterBuiltinMacro("__DATE__");
+ Ident__TIME__ = RegisterBuiltinMacro("__TIME__");
+ Ident_Pragma = RegisterBuiltinMacro("_Pragma");
+
+ // GCC Extensions.
+ Ident__BASE_FILE__ = RegisterBuiltinMacro("__BASE_FILE__");
+ Ident__INCLUDE_LEVEL__ = RegisterBuiltinMacro("__INCLUDE_LEVEL__");
+ Ident__TIMESTAMP__ = RegisterBuiltinMacro("__TIMESTAMP__");
+}
+
+/// isTrivialSingleTokenExpansion - Return true if MI, which has a single token
+/// in its expansion, currently expands to that token literally.
+static bool isTrivialSingleTokenExpansion(const MacroInfo *MI,
+ const IdentifierInfo *MacroIdent,
+ Preprocessor &PP) {
+ IdentifierInfo *II = MI->getReplacementToken(0).getIdentifierInfo();
+
+ // If the token isn't an identifier, it's always literally expanded.
+ if (II == 0) return true;
+
+ // If the identifier is a macro, and if that macro is enabled, it may be
+ // expanded so it's not a trivial expansion.
+ if (II->hasMacroDefinition() && PP.getMacroInfo(II)->isEnabled() &&
+ // Fast expanding "#define X X" is ok, because X would be disabled.
+ II != MacroIdent)
+ return false;
+
+ // If this is an object-like macro invocation, it is safe to trivially expand
+ // it.
+ if (MI->isObjectLike()) return true;
+
+ // If this is a function-like macro invocation, it's safe to trivially expand
+ // as long as the identifier is not a macro argument.
+ for (MacroInfo::arg_iterator I = MI->arg_begin(), E = MI->arg_end();
+ I != E; ++I)
+ if (*I == II)
+ return false; // Identifier is a macro argument.
+
+ return true;
+}
+
+
+/// isNextPPTokenLParen - Determine whether the next preprocessor token to be
+/// lexed is a '('. If so, consume the token and return true, if not, this
+/// method should have no observable side-effect on the lexed tokens.
+bool Preprocessor::isNextPPTokenLParen() {
+ // Do some quick tests for rejection cases.
+ unsigned Val;
+ if (CurLexer)
+ Val = CurLexer->isNextPPTokenLParen();
+ else
+ Val = CurTokenLexer->isNextTokenLParen();
+
+ if (Val == 2) {
+ // We have run off the end. If it's a source file we don't
+ // examine enclosing ones (C99 5.1.1.2p4). Otherwise walk up the
+ // macro stack.
+ if (CurLexer)
+ return false;
+ for (unsigned i = IncludeMacroStack.size(); i != 0; --i) {
+ IncludeStackInfo &Entry = IncludeMacroStack[i-1];
+ if (Entry.TheLexer)
+ Val = Entry.TheLexer->isNextPPTokenLParen();
+ else
+ Val = Entry.TheTokenLexer->isNextTokenLParen();
+
+ if (Val != 2)
+ break;
+
+ // Ran off the end of a source file?
+ if (Entry.TheLexer)
+ return false;
+ }
+ }
+
+ // Okay, if we know that the token is a '(', lex it and return. Otherwise we
+ // have found something that isn't a '(' or we found the end of the
+ // translation unit. In either case, return false.
+ if (Val != 1)
+ return false;
+
+ Token Tok;
+ LexUnexpandedToken(Tok);
+ assert(Tok.is(tok::l_paren) && "Error computing l-paren-ness?");
+ return true;
+}
+
+/// HandleMacroExpandedIdentifier - If an identifier token is read that is to be
+/// expanded as a macro, handle it and return the next token as 'Identifier'.
+bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier,
+ MacroInfo *MI) {
+ // If this is a macro exapnsion in the "#if !defined(x)" line for the file,
+ // then the macro could expand to different things in other contexts, we need
+ // to disable the optimization in this case.
+ if (CurLexer) CurLexer->MIOpt.ExpandedMacro();
+
+ // If this is a builtin macro, like __LINE__ or _Pragma, handle it specially.
+ if (MI->isBuiltinMacro()) {
+ ExpandBuiltinMacro(Identifier);
+ return false;
+ }
+
+ /// Args - If this is a function-like macro expansion, this contains,
+ /// for each macro argument, the list of tokens that were provided to the
+ /// invocation.
+ MacroArgs *Args = 0;
+
+ // If this is a function-like macro, read the arguments.
+ if (MI->isFunctionLike()) {
+ // C99 6.10.3p10: If the preprocessing token immediately after the the macro
+ // name isn't a '(', this macro should not be expanded. Otherwise, consume
+ // it.
+ if (!isNextPPTokenLParen())
+ return true;
+
+ // Remember that we are now parsing the arguments to a macro invocation.
+ // Preprocessor directives used inside macro arguments are not portable, and
+ // this enables the warning.
+ InMacroArgs = true;
+ Args = ReadFunctionLikeMacroArgs(Identifier, MI);
+
+ // Finished parsing args.
+ InMacroArgs = false;
+
+ // If there was an error parsing the arguments, bail out.
+ if (Args == 0) return false;
+
+ ++NumFnMacroExpanded;
+ } else {
+ ++NumMacroExpanded;
+ }
+
+ // Notice that this macro has been used.
+ MI->setIsUsed(true);
+
+ // If we started lexing a macro, enter the macro expansion body.
+
+ // If this macro expands to no tokens, don't bother to push it onto the
+ // expansion stack, only to take it right back off.
+ if (MI->getNumTokens() == 0) {
+ // No need for arg info.
+ if (Args) Args->destroy();
+
+ // Ignore this macro use, just return the next token in the current
+ // buffer.
+ bool HadLeadingSpace = Identifier.hasLeadingSpace();
+ bool IsAtStartOfLine = Identifier.isAtStartOfLine();
+
+ Lex(Identifier);
+
+ // If the identifier isn't on some OTHER line, inherit the leading
+ // whitespace/first-on-a-line property of this token. This handles
+ // stuff like "! XX," -> "! ," and " XX," -> " ,", when XX is
+ // empty.
+ if (!Identifier.isAtStartOfLine()) {
+ if (IsAtStartOfLine) Identifier.setFlag(Token::StartOfLine);
+ if (HadLeadingSpace) Identifier.setFlag(Token::LeadingSpace);
+ }
+ ++NumFastMacroExpanded;
+ return false;
+
+ } else if (MI->getNumTokens() == 1 &&
+ isTrivialSingleTokenExpansion(MI, Identifier.getIdentifierInfo(),
+ *this)){
+ // Otherwise, if this macro expands into a single trivially-expanded
+ // token: expand it now. This handles common cases like
+ // "#define VAL 42".
+
+ // Propagate the isAtStartOfLine/hasLeadingSpace markers of the macro
+ // identifier to the expanded token.
+ bool isAtStartOfLine = Identifier.isAtStartOfLine();
+ bool hasLeadingSpace = Identifier.hasLeadingSpace();
+
+ // Remember where the token is instantiated.
+ SourceLocation InstantiateLoc = Identifier.getLocation();
+
+ // Replace the result token.
+ Identifier = MI->getReplacementToken(0);
+
+ // Restore the StartOfLine/LeadingSpace markers.
+ Identifier.setFlagValue(Token::StartOfLine , isAtStartOfLine);
+ Identifier.setFlagValue(Token::LeadingSpace, hasLeadingSpace);
+
+ // Update the tokens location to include both its logical and physical
+ // locations.
+ SourceLocation Loc =
+ SourceMgr.getInstantiationLoc(Identifier.getLocation(), InstantiateLoc);
+ Identifier.setLocation(Loc);
+
+ // If this is #define X X, we must mark the result as unexpandible.
+ if (IdentifierInfo *NewII = Identifier.getIdentifierInfo())
+ if (getMacroInfo(NewII) == MI)
+ Identifier.setFlag(Token::DisableExpand);
+
+ // Since this is not an identifier token, it can't be macro expanded, so
+ // we're done.
+ ++NumFastMacroExpanded;
+ return false;
+ }
+
+ // Start expanding the macro.
+ EnterMacro(Identifier, Args);
+
+ // Now that the macro is at the top of the include stack, ask the
+ // preprocessor to read the next token from it.
+ Lex(Identifier);
+ return false;
+}
+
+/// ReadFunctionLikeMacroArgs - After reading "MACRO(", this method is
+/// invoked to read all of the actual arguments specified for the macro
+/// invocation. This returns null on error.
+MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName,
+ MacroInfo *MI) {
+ // The number of fixed arguments to parse.
+ unsigned NumFixedArgsLeft = MI->getNumArgs();
+ bool isVariadic = MI->isVariadic();
+
+ // Outer loop, while there are more arguments, keep reading them.
+ Token Tok;
+ Tok.setKind(tok::comma);
+ --NumFixedArgsLeft; // Start reading the first arg.
+
+ // ArgTokens - Build up a list of tokens that make up each argument. Each
+ // argument is separated by an EOF token. Use a SmallVector so we can avoid
+ // heap allocations in the common case.
+ llvm::SmallVector<Token, 64> ArgTokens;
+
+ unsigned NumActuals = 0;
+ while (Tok.is(tok::comma)) {
+ // C99 6.10.3p11: Keep track of the number of l_parens we have seen. Note
+ // that we already consumed the first one.
+ unsigned NumParens = 0;
+
+ while (1) {
+ // Read arguments as unexpanded tokens. This avoids issues, e.g., where
+ // an argument value in a macro could expand to ',' or '(' or ')'.
+ LexUnexpandedToken(Tok);
+
+ if (Tok.is(tok::eof) || Tok.is(tok::eom)) { // "#if f(<eof>" & "#if f(\n"
+ Diag(MacroName, diag::err_unterm_macro_invoc);
+ // Do not lose the EOF/EOM. Return it to the client.
+ MacroName = Tok;
+ return 0;
+ } else if (Tok.is(tok::r_paren)) {
+ // If we found the ) token, the macro arg list is done.
+ if (NumParens-- == 0)
+ break;
+ } else if (Tok.is(tok::l_paren)) {
+ ++NumParens;
+ } else if (Tok.is(tok::comma) && NumParens == 0) {
+ // Comma ends this argument if there are more fixed arguments expected.
+ if (NumFixedArgsLeft)
+ break;
+
+ // If this is not a variadic macro, too many args were specified.
+ if (!isVariadic) {
+ // Emit the diagnostic at the macro name in case there is a missing ).
+ // Emitting it at the , could be far away from the macro name.
+ Diag(MacroName, diag::err_too_many_args_in_macro_invoc);
+ return 0;
+ }
+ // Otherwise, continue to add the tokens to this variable argument.
+ } else if (Tok.is(tok::comment) && !KeepMacroComments) {
+ // If this is a comment token in the argument list and we're just in
+ // -C mode (not -CC mode), discard the comment.
+ continue;
+ } else if (Tok.is(tok::identifier)) {
+ // Reading macro arguments can cause macros that we are currently
+ // expanding from to be popped off the expansion stack. Doing so causes
+ // them to be reenabled for expansion. Here we record whether any
+ // identifiers we lex as macro arguments correspond to disabled macros.
+ // If so, we mark the token as noexpand. This is a subtle aspect of
+ // C99 6.10.3.4p2.
+ if (MacroInfo *MI = getMacroInfo(Tok.getIdentifierInfo()))
+ if (!MI->isEnabled())
+ Tok.setFlag(Token::DisableExpand);
+ }
+
+ ArgTokens.push_back(Tok);
+ }
+
+ // Empty arguments are standard in C99 and supported as an extension in
+ // other modes.
+ if (ArgTokens.empty() && !Features.C99)
+ Diag(Tok, diag::ext_empty_fnmacro_arg);
+
+ // Add a marker EOF token to the end of the token list for this argument.
+ Token EOFTok;
+ EOFTok.startToken();
+ EOFTok.setKind(tok::eof);
+ EOFTok.setLocation(Tok.getLocation());
+ EOFTok.setLength(0);
+ ArgTokens.push_back(EOFTok);
+ ++NumActuals;
+ --NumFixedArgsLeft;
+ };
+
+ // Okay, we either found the r_paren. Check to see if we parsed too few
+ // arguments.
+ unsigned MinArgsExpected = MI->getNumArgs();
+
+ // See MacroArgs instance var for description of this.
+ bool isVarargsElided = false;
+
+ if (NumActuals < MinArgsExpected) {
+ // There are several cases where too few arguments is ok, handle them now.
+ if (NumActuals+1 == MinArgsExpected && MI->isVariadic()) {
+ // Varargs where the named vararg parameter is missing: ok as extension.
+ // #define A(x, ...)
+ // A("blah")
+ Diag(Tok, diag::ext_missing_varargs_arg);
+
+ // Remember this occurred if this is a C99 macro invocation with at least
+ // one actual argument.
+ isVarargsElided = MI->isC99Varargs() && MI->getNumArgs() > 1;
+ } else if (MI->getNumArgs() == 1) {
+ // #define A(x)
+ // A()
+ // is ok because it is an empty argument.
+
+ // Empty arguments are standard in C99 and supported as an extension in
+ // other modes.
+ if (ArgTokens.empty() && !Features.C99)
+ Diag(Tok, diag::ext_empty_fnmacro_arg);
+ } else {
+ // Otherwise, emit the error.
+ Diag(Tok, diag::err_too_few_args_in_macro_invoc);
+ return 0;
+ }
+
+ // Add a marker EOF token to the end of the token list for this argument.
+ SourceLocation EndLoc = Tok.getLocation();
+ Tok.startToken();
+ Tok.setKind(tok::eof);
+ Tok.setLocation(EndLoc);
+ Tok.setLength(0);
+ ArgTokens.push_back(Tok);
+ }
+
+ return MacroArgs::create(MI, &ArgTokens[0], ArgTokens.size(),isVarargsElided);
+}
+
+/// ComputeDATE_TIME - Compute the current time, enter it into the specified
+/// scratch buffer, then return DATELoc/TIMELoc locations with the position of
+/// the identifier tokens inserted.
+static void ComputeDATE_TIME(SourceLocation &DATELoc, SourceLocation &TIMELoc,
+ Preprocessor &PP) {
+ time_t TT = time(0);
+ struct tm *TM = localtime(&TT);
+
+ static const char * const Months[] = {
+ "Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"
+ };
+
+ char TmpBuffer[100];
+ sprintf(TmpBuffer, "\"%s %2d %4d\"", Months[TM->tm_mon], TM->tm_mday,
+ TM->tm_year+1900);
+ DATELoc = PP.CreateString(TmpBuffer, strlen(TmpBuffer));
+
+ sprintf(TmpBuffer, "\"%02d:%02d:%02d\"", TM->tm_hour, TM->tm_min, TM->tm_sec);
+ TIMELoc = PP.CreateString(TmpBuffer, strlen(TmpBuffer));
+}
+
+/// ExpandBuiltinMacro - If an identifier token is read that is to be expanded
+/// as a builtin macro, handle it and return the next token as 'Tok'.
+void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
+ // Figure out which token this is.
+ IdentifierInfo *II = Tok.getIdentifierInfo();
+ assert(II && "Can't be a macro without id info!");
+
+ // If this is an _Pragma directive, expand it, invoke the pragma handler, then
+ // lex the token after it.
+ if (II == Ident_Pragma)
+ return Handle_Pragma(Tok);
+
+ ++NumBuiltinMacroExpanded;
+
+ char TmpBuffer[100];
+
+ // Set up the return result.
+ Tok.setIdentifierInfo(0);
+ Tok.clearFlag(Token::NeedsCleaning);
+
+ if (II == Ident__LINE__) {
+ // __LINE__ expands to a simple numeric value.
+ sprintf(TmpBuffer, "%u", SourceMgr.getLogicalLineNumber(Tok.getLocation()));
+ unsigned Length = strlen(TmpBuffer);
+ Tok.setKind(tok::numeric_constant);
+ Tok.setLength(Length);
+ Tok.setLocation(CreateString(TmpBuffer, Length, Tok.getLocation()));
+ } else if (II == Ident__FILE__ || II == Ident__BASE_FILE__) {
+ SourceLocation Loc = Tok.getLocation();
+ if (II == Ident__BASE_FILE__) {
+ Diag(Tok, diag::ext_pp_base_file);
+ SourceLocation NextLoc = SourceMgr.getIncludeLoc(Loc);
+ while (NextLoc.isValid()) {
+ Loc = NextLoc;
+ NextLoc = SourceMgr.getIncludeLoc(Loc);
+ }
+ }
+
+ // Escape this filename. Turn '\' -> '\\' '"' -> '\"'
+ std::string FN = SourceMgr.getSourceName(SourceMgr.getLogicalLoc(Loc));
+ FN = '"' + Lexer::Stringify(FN) + '"';
+ Tok.setKind(tok::string_literal);
+ Tok.setLength(FN.size());
+ Tok.setLocation(CreateString(&FN[0], FN.size(), Tok.getLocation()));
+ } else if (II == Ident__DATE__) {
+ if (!DATELoc.isValid())
+ ComputeDATE_TIME(DATELoc, TIMELoc, *this);
+ Tok.setKind(tok::string_literal);
+ Tok.setLength(strlen("\"Mmm dd yyyy\""));
+ Tok.setLocation(SourceMgr.getInstantiationLoc(DATELoc, Tok.getLocation()));
+ } else if (II == Ident__TIME__) {
+ if (!TIMELoc.isValid())
+ ComputeDATE_TIME(DATELoc, TIMELoc, *this);
+ Tok.setKind(tok::string_literal);
+ Tok.setLength(strlen("\"hh:mm:ss\""));
+ Tok.setLocation(SourceMgr.getInstantiationLoc(TIMELoc, Tok.getLocation()));
+ } else if (II == Ident__INCLUDE_LEVEL__) {
+ Diag(Tok, diag::ext_pp_include_level);
+
+ // Compute the include depth of this token.
+ unsigned Depth = 0;
+ SourceLocation Loc = SourceMgr.getIncludeLoc(Tok.getLocation());
+ for (; Loc.isValid(); ++Depth)
+ Loc = SourceMgr.getIncludeLoc(Loc);
+
+ // __INCLUDE_LEVEL__ expands to a simple numeric value.
+ sprintf(TmpBuffer, "%u", Depth);
+ unsigned Length = strlen(TmpBuffer);
+ Tok.setKind(tok::numeric_constant);
+ Tok.setLength(Length);
+ Tok.setLocation(CreateString(TmpBuffer, Length, Tok.getLocation()));
+ } else if (II == Ident__TIMESTAMP__) {
+ // MSVC, ICC, GCC, VisualAge C++ extension. The generated string should be
+ // of the form "Ddd Mmm dd hh::mm::ss yyyy", which is returned by asctime.
+ Diag(Tok, diag::ext_pp_timestamp);
+
+ // Get the file that we are lexing out of. If we're currently lexing from
+ // a macro, dig into the include stack.
+ const FileEntry *CurFile = 0;
+ Lexer *TheLexer = getCurrentFileLexer();
+
+ if (TheLexer)
+ CurFile = SourceMgr.getFileEntryForLoc(TheLexer->getFileLoc());
+
+ // If this file is older than the file it depends on, emit a diagnostic.
+ const char *Result;
+ if (CurFile) {
+ time_t TT = CurFile->getModificationTime();
+ struct tm *TM = localtime(&TT);
+ Result = asctime(TM);
+ } else {
+ Result = "??? ??? ?? ??:??:?? ????\n";
+ }
+ TmpBuffer[0] = '"';
+ strcpy(TmpBuffer+1, Result);
+ unsigned Len = strlen(TmpBuffer);
+ TmpBuffer[Len-1] = '"'; // Replace the newline with a quote.
+ Tok.setKind(tok::string_literal);
+ Tok.setLength(Len);
+ Tok.setLocation(CreateString(TmpBuffer, Len, Tok.getLocation()));
+ } else {
+ assert(0 && "Unknown identifier!");
+ }
+}
diff --git a/clang/lib/Lex/Pragma.cpp b/clang/lib/Lex/Pragma.cpp
new file mode 100644
index 00000000000..08ad1cf1d2f
--- /dev/null
+++ b/clang/lib/Lex/Pragma.cpp
@@ -0,0 +1,386 @@
+//===--- Pragma.cpp - Pragma registration and handling --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PragmaHandler/PragmaTable interfaces and implements
+// pragma related methods of the Preprocessor class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/Pragma.h"
+#include "clang/Lex/PPCallbacks.h"
+#include "clang/Lex/HeaderSearch.h"
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/SourceManager.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace clang;
+
+// Out-of-line destructor to provide a home for the class.
+PragmaHandler::~PragmaHandler() {
+}
+
+//===----------------------------------------------------------------------===//
+// PragmaNamespace Implementation.
+//===----------------------------------------------------------------------===//
+
+
+PragmaNamespace::~PragmaNamespace() {
+ for (unsigned i = 0, e = Handlers.size(); i != e; ++i)
+ delete Handlers[i];
+}
+
+/// FindHandler - Check to see if there is already a handler for the
+/// specified name. If not, return the handler for the null identifier if it
+/// exists, otherwise return null. If IgnoreNull is true (the default) then
+/// the null handler isn't returned on failure to match.
+PragmaHandler *PragmaNamespace::FindHandler(const IdentifierInfo *Name,
+ bool IgnoreNull) const {
+ PragmaHandler *NullHandler = 0;
+ for (unsigned i = 0, e = Handlers.size(); i != e; ++i) {
+ if (Handlers[i]->getName() == Name)
+ return Handlers[i];
+
+ if (Handlers[i]->getName() == 0)
+ NullHandler = Handlers[i];
+ }
+ return IgnoreNull ? 0 : NullHandler;
+}
+
+void PragmaNamespace::HandlePragma(Preprocessor &PP, Token &Tok) {
+ // Read the 'namespace' that the directive is in, e.g. STDC. Do not macro
+ // expand it, the user can have a STDC #define, that should not affect this.
+ PP.LexUnexpandedToken(Tok);
+
+ // Get the handler for this token. If there is no handler, ignore the pragma.
+ PragmaHandler *Handler = FindHandler(Tok.getIdentifierInfo(), false);
+ if (Handler == 0) return;
+
+ // Otherwise, pass it down.
+ Handler->HandlePragma(PP, Tok);
+}
+
+//===----------------------------------------------------------------------===//
+// Preprocessor Pragma Directive Handling.
+//===----------------------------------------------------------------------===//
+
+/// HandlePragmaDirective - The "#pragma" directive has been parsed. Lex the
+/// rest of the pragma, passing it to the registered pragma handlers.
+void Preprocessor::HandlePragmaDirective() {
+ ++NumPragma;
+
+ // Invoke the first level of pragma handlers which reads the namespace id.
+ Token Tok;
+ PragmaHandlers->HandlePragma(*this, Tok);
+
+ // If the pragma handler didn't read the rest of the line, consume it now.
+ if (CurLexer->ParsingPreprocessorDirective)
+ DiscardUntilEndOfDirective();
+}
+
+/// Handle_Pragma - Read a _Pragma directive, slice it up, process it, then
+/// return the first token after the directive. The _Pragma token has just
+/// been read into 'Tok'.
+void Preprocessor::Handle_Pragma(Token &Tok) {
+ // Remember the pragma token location.
+ SourceLocation PragmaLoc = Tok.getLocation();
+
+ // Read the '('.
+ Lex(Tok);
+ if (Tok.isNot(tok::l_paren))
+ return Diag(PragmaLoc, diag::err__Pragma_malformed);
+
+ // Read the '"..."'.
+ Lex(Tok);
+ if (Tok.isNot(tok::string_literal) && Tok.isNot(tok::wide_string_literal))
+ return Diag(PragmaLoc, diag::err__Pragma_malformed);
+
+ // Remember the string.
+ std::string StrVal = getSpelling(Tok);
+ SourceLocation StrLoc = Tok.getLocation();
+
+ // Read the ')'.
+ Lex(Tok);
+ if (Tok.isNot(tok::r_paren))
+ return Diag(PragmaLoc, diag::err__Pragma_malformed);
+
+ // The _Pragma is lexically sound. Destringize according to C99 6.10.9.1.
+ if (StrVal[0] == 'L') // Remove L prefix.
+ StrVal.erase(StrVal.begin());
+ assert(StrVal[0] == '"' && StrVal[StrVal.size()-1] == '"' &&
+ "Invalid string token!");
+
+ // Remove the front quote, replacing it with a space, so that the pragma
+ // contents appear to have a space before them.
+ StrVal[0] = ' ';
+
+ // Replace the terminating quote with a \n\0.
+ StrVal[StrVal.size()-1] = '\n';
+ StrVal += '\0';
+
+ // Remove escaped quotes and escapes.
+ for (unsigned i = 0, e = StrVal.size(); i != e-1; ++i) {
+ if (StrVal[i] == '\\' &&
+ (StrVal[i+1] == '\\' || StrVal[i+1] == '"')) {
+ // \\ -> '\' and \" -> '"'.
+ StrVal.erase(StrVal.begin()+i);
+ --e;
+ }
+ }
+
+ // Plop the string (including the newline and trailing null) into a buffer
+ // where we can lex it.
+ SourceLocation TokLoc = CreateString(&StrVal[0], StrVal.size(), StrLoc);
+ const char *StrData = SourceMgr.getCharacterData(TokLoc);
+
+ // Make and enter a lexer object so that we lex and expand the tokens just
+ // like any others.
+ Lexer *TL = new Lexer(TokLoc, *this,
+ StrData, StrData+StrVal.size()-1 /* no null */);
+
+ // Ensure that the lexer thinks it is inside a directive, so that end \n will
+ // return an EOM token.
+ TL->ParsingPreprocessorDirective = true;
+
+ // This lexer really is for _Pragma.
+ TL->Is_PragmaLexer = true;
+
+ EnterSourceFileWithLexer(TL, 0);
+
+ // With everything set up, lex this as a #pragma directive.
+ HandlePragmaDirective();
+
+ // Finally, return whatever came after the pragma directive.
+ return Lex(Tok);
+}
+
+
+
+/// HandlePragmaOnce - Handle #pragma once. OnceTok is the 'once'.
+///
+void Preprocessor::HandlePragmaOnce(Token &OnceTok) {
+ if (isInPrimaryFile()) {
+ Diag(OnceTok, diag::pp_pragma_once_in_main_file);
+ return;
+ }
+
+ // Get the current file lexer we're looking at. Ignore _Pragma 'files' etc.
+ SourceLocation FileLoc = getCurrentFileLexer()->getFileLoc();
+
+ // Mark the file as a once-only file now.
+ HeaderInfo.MarkFileIncludeOnce(SourceMgr.getFileEntryForLoc(FileLoc));
+}
+
+void Preprocessor::HandlePragmaMark() {
+ assert(CurLexer && "No current lexer?");
+ CurLexer->ReadToEndOfLine();
+}
+
+
+/// HandlePragmaPoison - Handle #pragma GCC poison. PoisonTok is the 'poison'.
+///
+void Preprocessor::HandlePragmaPoison(Token &PoisonTok) {
+ Token Tok;
+
+ while (1) {
+ // Read the next token to poison. While doing this, pretend that we are
+ // skipping while reading the identifier to poison.
+ // This avoids errors on code like:
+ // #pragma GCC poison X
+ // #pragma GCC poison X
+ if (CurLexer) CurLexer->LexingRawMode = true;
+ LexUnexpandedToken(Tok);
+ if (CurLexer) CurLexer->LexingRawMode = false;
+
+ // If we reached the end of line, we're done.
+ if (Tok.is(tok::eom)) return;
+
+ // Can only poison identifiers.
+ if (Tok.isNot(tok::identifier)) {
+ Diag(Tok, diag::err_pp_invalid_poison);
+ return;
+ }
+
+ // Look up the identifier info for the token. We disabled identifier lookup
+ // by saying we're skipping contents, so we need to do this manually.
+ IdentifierInfo *II = LookUpIdentifierInfo(Tok);
+
+ // Already poisoned.
+ if (II->isPoisoned()) continue;
+
+ // If this is a macro identifier, emit a warning.
+ if (II->hasMacroDefinition())
+ Diag(Tok, diag::pp_poisoning_existing_macro);
+
+ // Finally, poison it!
+ II->setIsPoisoned();
+ }
+}
+
+/// HandlePragmaSystemHeader - Implement #pragma GCC system_header. We know
+/// that the whole directive has been parsed.
+void Preprocessor::HandlePragmaSystemHeader(Token &SysHeaderTok) {
+ if (isInPrimaryFile()) {
+ Diag(SysHeaderTok, diag::pp_pragma_sysheader_in_main_file);
+ return;
+ }
+
+ // Get the current file lexer we're looking at. Ignore _Pragma 'files' etc.
+ Lexer *TheLexer = getCurrentFileLexer();
+
+ // Mark the file as a system header.
+ const FileEntry *File = SourceMgr.getFileEntryForLoc(TheLexer->getFileLoc());
+ HeaderInfo.MarkFileSystemHeader(File);
+
+ // Notify the client, if desired, that we are in a new source file.
+ if (Callbacks)
+ Callbacks->FileChanged(TheLexer->getSourceLocation(TheLexer->BufferPtr),
+ PPCallbacks::SystemHeaderPragma,
+ DirectoryLookup::SystemHeaderDir);
+}
+
+/// HandlePragmaDependency - Handle #pragma GCC dependency "foo" blah.
+///
+void Preprocessor::HandlePragmaDependency(Token &DependencyTok) {
+ Token FilenameTok;
+ CurLexer->LexIncludeFilename(FilenameTok);
+
+ // If the token kind is EOM, the error has already been diagnosed.
+ if (FilenameTok.is(tok::eom))
+ return;
+
+ // Reserve a buffer to get the spelling.
+ llvm::SmallVector<char, 128> FilenameBuffer;
+ FilenameBuffer.resize(FilenameTok.getLength());
+
+ const char *FilenameStart = &FilenameBuffer[0];
+ unsigned Len = getSpelling(FilenameTok, FilenameStart);
+ const char *FilenameEnd = FilenameStart+Len;
+ bool isAngled = GetIncludeFilenameSpelling(FilenameTok.getLocation(),
+ FilenameStart, FilenameEnd);
+ // If GetIncludeFilenameSpelling set the start ptr to null, there was an
+ // error.
+ if (FilenameStart == 0)
+ return;
+
+ // Search include directories for this file.
+ const DirectoryLookup *CurDir;
+ const FileEntry *File = LookupFile(FilenameStart, FilenameEnd,
+ isAngled, 0, CurDir);
+ if (File == 0)
+ return Diag(FilenameTok, diag::err_pp_file_not_found,
+ std::string(FilenameStart, FilenameEnd));
+
+ SourceLocation FileLoc = getCurrentFileLexer()->getFileLoc();
+ const FileEntry *CurFile = SourceMgr.getFileEntryForLoc(FileLoc);
+
+ // If this file is older than the file it depends on, emit a diagnostic.
+ if (CurFile && CurFile->getModificationTime() < File->getModificationTime()) {
+ // Lex tokens at the end of the message and include them in the message.
+ std::string Message;
+ Lex(DependencyTok);
+ while (DependencyTok.isNot(tok::eom)) {
+ Message += getSpelling(DependencyTok) + " ";
+ Lex(DependencyTok);
+ }
+
+ Message.erase(Message.end()-1);
+ Diag(FilenameTok, diag::pp_out_of_date_dependency, Message);
+ }
+}
+
+
+/// AddPragmaHandler - Add the specified pragma handler to the preprocessor.
+/// If 'Namespace' is non-null, then it is a token required to exist on the
+/// pragma line before the pragma string starts, e.g. "STDC" or "GCC".
+void Preprocessor::AddPragmaHandler(const char *Namespace,
+ PragmaHandler *Handler) {
+ PragmaNamespace *InsertNS = PragmaHandlers;
+
+ // If this is specified to be in a namespace, step down into it.
+ if (Namespace) {
+ IdentifierInfo *NSID = getIdentifierInfo(Namespace);
+
+ // If there is already a pragma handler with the name of this namespace,
+ // we either have an error (directive with the same name as a namespace) or
+ // we already have the namespace to insert into.
+ if (PragmaHandler *Existing = PragmaHandlers->FindHandler(NSID)) {
+ InsertNS = Existing->getIfNamespace();
+ assert(InsertNS != 0 && "Cannot have a pragma namespace and pragma"
+ " handler with the same name!");
+ } else {
+ // Otherwise, this namespace doesn't exist yet, create and insert the
+ // handler for it.
+ InsertNS = new PragmaNamespace(NSID);
+ PragmaHandlers->AddPragma(InsertNS);
+ }
+ }
+
+ // Check to make sure we don't already have a pragma for this identifier.
+ assert(!InsertNS->FindHandler(Handler->getName()) &&
+ "Pragma handler already exists for this identifier!");
+ InsertNS->AddPragma(Handler);
+}
+
+namespace {
+/// PragmaOnceHandler - "#pragma once" marks the file as atomically included.
+struct PragmaOnceHandler : public PragmaHandler {
+ PragmaOnceHandler(const IdentifierInfo *OnceID) : PragmaHandler(OnceID) {}
+ virtual void HandlePragma(Preprocessor &PP, Token &OnceTok) {
+ PP.CheckEndOfDirective("#pragma once");
+ PP.HandlePragmaOnce(OnceTok);
+ }
+};
+
+/// PragmaMarkHandler - "#pragma mark ..." is ignored by the compiler, and the
+/// rest of the line is not lexed.
+struct PragmaMarkHandler : public PragmaHandler {
+ PragmaMarkHandler(const IdentifierInfo *MarkID) : PragmaHandler(MarkID) {}
+ virtual void HandlePragma(Preprocessor &PP, Token &MarkTok) {
+ PP.HandlePragmaMark();
+ }
+};
+
+/// PragmaPoisonHandler - "#pragma poison x" marks x as not usable.
+struct PragmaPoisonHandler : public PragmaHandler {
+ PragmaPoisonHandler(const IdentifierInfo *ID) : PragmaHandler(ID) {}
+ virtual void HandlePragma(Preprocessor &PP, Token &PoisonTok) {
+ PP.HandlePragmaPoison(PoisonTok);
+ }
+};
+
+/// PragmaSystemHeaderHandler - "#pragma system_header" marks the current file
+/// as a system header, which silences warnings in it.
+struct PragmaSystemHeaderHandler : public PragmaHandler {
+ PragmaSystemHeaderHandler(const IdentifierInfo *ID) : PragmaHandler(ID) {}
+ virtual void HandlePragma(Preprocessor &PP, Token &SHToken) {
+ PP.HandlePragmaSystemHeader(SHToken);
+ PP.CheckEndOfDirective("#pragma");
+ }
+};
+struct PragmaDependencyHandler : public PragmaHandler {
+ PragmaDependencyHandler(const IdentifierInfo *ID) : PragmaHandler(ID) {}
+ virtual void HandlePragma(Preprocessor &PP, Token &DepToken) {
+ PP.HandlePragmaDependency(DepToken);
+ }
+};
+} // end anonymous namespace
+
+
+/// RegisterBuiltinPragmas - Install the standard preprocessor pragmas:
+/// #pragma GCC poison/system_header/dependency and #pragma once.
+void Preprocessor::RegisterBuiltinPragmas() {
+ AddPragmaHandler(0, new PragmaOnceHandler(getIdentifierInfo("once")));
+ AddPragmaHandler(0, new PragmaMarkHandler(getIdentifierInfo("mark")));
+ AddPragmaHandler("GCC", new PragmaPoisonHandler(getIdentifierInfo("poison")));
+ AddPragmaHandler("GCC", new PragmaSystemHeaderHandler(
+ getIdentifierInfo("system_header")));
+ AddPragmaHandler("GCC", new PragmaDependencyHandler(
+ getIdentifierInfo("dependency")));
+}
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
new file mode 100644
index 00000000000..86156a07728
--- /dev/null
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -0,0 +1,560 @@
+//===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Preprocessor interface.
+//
+//===----------------------------------------------------------------------===//
+//
+// Options to support:
+// -H - Print the name of each header file used.
+// -d[MDNI] - Dump various things.
+// -fworking-directory - #line's with preprocessor's working dir.
+// -fpreprocessed
+// -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD
+// -W*
+// -w
+//
+// Messages to emit:
+// "Multiple include guards may be useful for:\n"
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Lex/HeaderSearch.h"
+#include "clang/Lex/MacroInfo.h"
+#include "clang/Lex/Pragma.h"
+#include "clang/Lex/ScratchBuffer.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/TargetInfo.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Streams.h"
+#include <ctime>
+using namespace clang;
+
+//===----------------------------------------------------------------------===//
+
+Preprocessor::Preprocessor(Diagnostic &diags, const LangOptions &opts,
+ TargetInfo &target, SourceManager &SM,
+ HeaderSearch &Headers)
+ : Diags(diags), Features(opts), Target(target), FileMgr(Headers.getFileMgr()),
+ SourceMgr(SM), HeaderInfo(Headers), Identifiers(opts),
+ CurLexer(0), CurDirLookup(0), CurTokenLexer(0), Callbacks(0) {
+ ScratchBuf = new ScratchBuffer(SourceMgr);
+
+ // Clear stats.
+ NumDirectives = NumDefined = NumUndefined = NumPragma = 0;
+ NumIf = NumElse = NumEndif = 0;
+ NumEnteredSourceFiles = 0;
+ NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0;
+ NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0;
+ MaxIncludeStackDepth = 0;
+ NumSkipped = 0;
+
+ // Default to discarding comments.
+ KeepComments = false;
+ KeepMacroComments = false;
+
+ // Macro expansion is enabled.
+ DisableMacroExpansion = false;
+ InMacroArgs = false;
+ NumCachedTokenLexers = 0;
+
+ // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro.
+ // This gets unpoisoned where it is allowed.
+ (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
+
+ Predefines = 0;
+
+ // Initialize the pragma handlers.
+ PragmaHandlers = new PragmaNamespace(0);
+ RegisterBuiltinPragmas();
+
+ // Initialize builtin macros like __LINE__ and friends.
+ RegisterBuiltinMacros();
+}
+
+Preprocessor::~Preprocessor() {
+ // Free any active lexers.
+ delete CurLexer;
+
+ while (!IncludeMacroStack.empty()) {
+ delete IncludeMacroStack.back().TheLexer;
+ delete IncludeMacroStack.back().TheTokenLexer;
+ IncludeMacroStack.pop_back();
+ }
+
+ // Free any macro definitions.
+ for (llvm::DenseMap<IdentifierInfo*, MacroInfo*>::iterator I =
+ Macros.begin(), E = Macros.end(); I != E; ++I) {
+ // Free the macro definition.
+ delete I->second;
+ I->second = 0;
+ I->first->setHasMacroDefinition(false);
+ }
+
+ // Free any cached macro expanders.
+ for (unsigned i = 0, e = NumCachedTokenLexers; i != e; ++i)
+ delete TokenLexerCache[i];
+
+ // Release pragma information.
+ delete PragmaHandlers;
+
+ // Delete the scratch buffer info.
+ delete ScratchBuf;
+
+ delete Callbacks;
+}
+
+/// Diag - Forwarding function for diagnostics. This emits a diagnostic at
+/// the specified Token's location, translating the token's start
+/// position in the current buffer into a SourcePosition object for rendering.
+void Preprocessor::Diag(SourceLocation Loc, unsigned DiagID) {
+ Diags.Report(getFullLoc(Loc), DiagID);
+}
+
+void Preprocessor::Diag(SourceLocation Loc, unsigned DiagID,
+ const std::string &Msg) {
+ Diags.Report(getFullLoc(Loc), DiagID, &Msg, 1);
+}
+
+void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
+ llvm::cerr << tok::getTokenName(Tok.getKind()) << " '"
+ << getSpelling(Tok) << "'";
+
+ if (!DumpFlags) return;
+
+ llvm::cerr << "\t";
+ if (Tok.isAtStartOfLine())
+ llvm::cerr << " [StartOfLine]";
+ if (Tok.hasLeadingSpace())
+ llvm::cerr << " [LeadingSpace]";
+ if (Tok.isExpandDisabled())
+ llvm::cerr << " [ExpandDisabled]";
+ if (Tok.needsCleaning()) {
+ const char *Start = SourceMgr.getCharacterData(Tok.getLocation());
+ llvm::cerr << " [UnClean='" << std::string(Start, Start+Tok.getLength())
+ << "']";
+ }
+
+ llvm::cerr << "\tLoc=<";
+ DumpLocation(Tok.getLocation());
+ llvm::cerr << ">";
+}
+
+void Preprocessor::DumpLocation(SourceLocation Loc) const {
+ SourceLocation LogLoc = SourceMgr.getLogicalLoc(Loc);
+ llvm::cerr << SourceMgr.getSourceName(LogLoc) << ':'
+ << SourceMgr.getLineNumber(LogLoc) << ':'
+ << SourceMgr.getLineNumber(LogLoc);
+
+ SourceLocation PhysLoc = SourceMgr.getPhysicalLoc(Loc);
+ if (PhysLoc != LogLoc) {
+ llvm::cerr << " <PhysLoc=";
+ DumpLocation(PhysLoc);
+ llvm::cerr << ">";
+ }
+}
+
+void Preprocessor::DumpMacro(const MacroInfo &MI) const {
+ llvm::cerr << "MACRO: ";
+ for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) {
+ DumpToken(MI.getReplacementToken(i));
+ llvm::cerr << " ";
+ }
+ llvm::cerr << "\n";
+}
+
+void Preprocessor::PrintStats() {
+ llvm::cerr << "\n*** Preprocessor Stats:\n";
+ llvm::cerr << NumDirectives << " directives found:\n";
+ llvm::cerr << " " << NumDefined << " #define.\n";
+ llvm::cerr << " " << NumUndefined << " #undef.\n";
+ llvm::cerr << " #include/#include_next/#import:\n";
+ llvm::cerr << " " << NumEnteredSourceFiles << " source files entered.\n";
+ llvm::cerr << " " << MaxIncludeStackDepth << " max include stack depth\n";
+ llvm::cerr << " " << NumIf << " #if/#ifndef/#ifdef.\n";
+ llvm::cerr << " " << NumElse << " #else/#elif.\n";
+ llvm::cerr << " " << NumEndif << " #endif.\n";
+ llvm::cerr << " " << NumPragma << " #pragma.\n";
+ llvm::cerr << NumSkipped << " #if/#ifndef#ifdef regions skipped\n";
+
+ llvm::cerr << NumMacroExpanded << "/" << NumFnMacroExpanded << "/"
+ << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, "
+ << NumFastMacroExpanded << " on the fast path.\n";
+ llvm::cerr << (NumFastTokenPaste+NumTokenPaste)
+ << " token paste (##) operations performed, "
+ << NumFastTokenPaste << " on the fast path.\n";
+}
+
+//===----------------------------------------------------------------------===//
+// Token Spelling
+//===----------------------------------------------------------------------===//
+
+
+/// getSpelling() - Return the 'spelling' of this token. The spelling of a
+/// token are the characters used to represent the token in the source file
+/// after trigraph expansion and escaped-newline folding. In particular, this
+/// wants to get the true, uncanonicalized, spelling of things like digraphs
+/// UCNs, etc.
+std::string Preprocessor::getSpelling(const Token &Tok) const {
+ assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");
+
+ // If this token contains nothing interesting, return it directly.
+ const char *TokStart = SourceMgr.getCharacterData(Tok.getLocation());
+ if (!Tok.needsCleaning())
+ return std::string(TokStart, TokStart+Tok.getLength());
+
+ std::string Result;
+ Result.reserve(Tok.getLength());
+
+ // Otherwise, hard case, relex the characters into the string.
+ for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength();
+ Ptr != End; ) {
+ unsigned CharSize;
+ Result.push_back(Lexer::getCharAndSizeNoWarn(Ptr, CharSize, Features));
+ Ptr += CharSize;
+ }
+ assert(Result.size() != unsigned(Tok.getLength()) &&
+ "NeedsCleaning flag set on something that didn't need cleaning!");
+ return Result;
+}
+
+/// getSpelling - This method is used to get the spelling of a token into a
+/// preallocated buffer, instead of as an std::string. The caller is required
+/// to allocate enough space for the token, which is guaranteed to be at least
+/// Tok.getLength() bytes long. The actual length of the token is returned.
+///
+/// Note that this method may do two possible things: it may either fill in
+/// the buffer specified with characters, or it may *change the input pointer*
+/// to point to a constant buffer with the data already in it (avoiding a
+/// copy). The caller is not allowed to modify the returned buffer pointer
+/// if an internal buffer is returned.
+unsigned Preprocessor::getSpelling(const Token &Tok,
+ const char *&Buffer) const {
+ assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");
+
+ // If this token is an identifier, just return the string from the identifier
+ // table, which is very quick.
+ if (const IdentifierInfo *II = Tok.getIdentifierInfo()) {
+ Buffer = II->getName();
+
+ // Return the length of the token. If the token needed cleaning, don't
+ // include the size of the newlines or trigraphs in it.
+ if (!Tok.needsCleaning())
+ return Tok.getLength();
+ else
+ return strlen(Buffer);
+ }
+
+ // Otherwise, compute the start of the token in the input lexer buffer.
+ const char *TokStart = SourceMgr.getCharacterData(Tok.getLocation());
+
+ // If this token contains nothing interesting, return it directly.
+ if (!Tok.needsCleaning()) {
+ Buffer = TokStart;
+ return Tok.getLength();
+ }
+ // Otherwise, hard case, relex the characters into the string.
+ char *OutBuf = const_cast<char*>(Buffer);
+ for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength();
+ Ptr != End; ) {
+ unsigned CharSize;
+ *OutBuf++ = Lexer::getCharAndSizeNoWarn(Ptr, CharSize, Features);
+ Ptr += CharSize;
+ }
+ assert(unsigned(OutBuf-Buffer) != Tok.getLength() &&
+ "NeedsCleaning flag set on something that didn't need cleaning!");
+
+ return OutBuf-Buffer;
+}
+
+
+/// CreateString - Plop the specified string into a scratch buffer and return a
+/// location for it. If specified, the source location provides a source
+/// location for the token.
+SourceLocation Preprocessor::
+CreateString(const char *Buf, unsigned Len, SourceLocation SLoc) {
+ if (SLoc.isValid())
+ return ScratchBuf->getToken(Buf, Len, SLoc);
+ return ScratchBuf->getToken(Buf, Len);
+}
+
+
+/// AdvanceToTokenCharacter - Given a location that specifies the start of a
+/// token, return a new location that specifies a character within the token.
+SourceLocation Preprocessor::AdvanceToTokenCharacter(SourceLocation TokStart,
+ unsigned CharNo) {
+ // If they request the first char of the token, we're trivially done. If this
+ // is a macro expansion, it doesn't make sense to point to a character within
+ // the instantiation point (the name). We could point to the source
+ // character, but without also pointing to instantiation info, this is
+ // confusing.
+ if (CharNo == 0 || TokStart.isMacroID()) return TokStart;
+
+ // Figure out how many physical characters away the specified logical
+ // character is. This needs to take into consideration newlines and
+ // trigraphs.
+ const char *TokPtr = SourceMgr.getCharacterData(TokStart);
+ unsigned PhysOffset = 0;
+
+ // The usual case is that tokens don't contain anything interesting. Skip
+ // over the uninteresting characters. If a token only consists of simple
+ // chars, this method is extremely fast.
+ while (CharNo && Lexer::isObviouslySimpleCharacter(*TokPtr))
+ ++TokPtr, --CharNo, ++PhysOffset;
+
+ // If we have a character that may be a trigraph or escaped newline, create a
+ // lexer to parse it correctly.
+ if (CharNo != 0) {
+ // Create a lexer starting at this token position.
+ Lexer TheLexer(TokStart, *this, TokPtr);
+ Token Tok;
+ // Skip over characters the remaining characters.
+ const char *TokStartPtr = TokPtr;
+ for (; CharNo; --CharNo)
+ TheLexer.getAndAdvanceChar(TokPtr, Tok);
+
+ PhysOffset += TokPtr-TokStartPtr;
+ }
+
+ return TokStart.getFileLocWithOffset(PhysOffset);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Preprocessor Initialization Methods
+//===----------------------------------------------------------------------===//
+
+// Append a #define line to Buf for Macro. Macro should be of the form XXX,
+// in which case we emit "#define XXX 1" or "XXX=Y z W" in which case we emit
+// "#define XXX Y z W". To get a #define with no value, use "XXX=".
+static void DefineBuiltinMacro(std::vector<char> &Buf, const char *Macro,
+ const char *Command = "#define ") {
+ Buf.insert(Buf.end(), Command, Command+strlen(Command));
+ if (const char *Equal = strchr(Macro, '=')) {
+ // Turn the = into ' '.
+ Buf.insert(Buf.end(), Macro, Equal);
+ Buf.push_back(' ');
+ Buf.insert(Buf.end(), Equal+1, Equal+strlen(Equal));
+ } else {
+ // Push "macroname 1".
+ Buf.insert(Buf.end(), Macro, Macro+strlen(Macro));
+ Buf.push_back(' ');
+ Buf.push_back('1');
+ }
+ Buf.push_back('\n');
+}
+
+
+static void InitializePredefinedMacros(Preprocessor &PP,
+ std::vector<char> &Buf) {
+ // FIXME: Implement magic like cpp_init_builtins for things like __STDC__
+ // and __DATE__ etc.
+#if 0
+ /* __STDC__ has the value 1 under normal circumstances.
+ However, if (a) we are in a system header, (b) the option
+ stdc_0_in_system_headers is true (set by target config), and
+ (c) we are not in strictly conforming mode, then it has the
+ value 0. (b) and (c) are already checked in cpp_init_builtins. */
+ //case BT_STDC:
+ if (cpp_in_system_header (pfile))
+ number = 0;
+ else
+ number = 1;
+ break;
+#endif
+ // These should all be defined in the preprocessor according to the
+ // current language configuration.
+ DefineBuiltinMacro(Buf, "__STDC__=1");
+ //DefineBuiltinMacro(Buf, "__ASSEMBLER__=1");
+ if (PP.getLangOptions().C99 && !PP.getLangOptions().CPlusPlus)
+ DefineBuiltinMacro(Buf, "__STDC_VERSION__=199901L");
+ else if (0) // STDC94 ?
+ DefineBuiltinMacro(Buf, "__STDC_VERSION__=199409L");
+
+ DefineBuiltinMacro(Buf, "__STDC_HOSTED__=1");
+ if (PP.getLangOptions().ObjC1)
+ DefineBuiltinMacro(Buf, "__OBJC__=1");
+ if (PP.getLangOptions().ObjC2)
+ DefineBuiltinMacro(Buf, "__OBJC2__=1");
+
+ // Add __builtin_va_list typedef.
+ {
+ const char *VAList = PP.getTargetInfo().getVAListDeclaration();
+ Buf.insert(Buf.end(), VAList, VAList+strlen(VAList));
+ Buf.push_back('\n');
+ }
+
+ // Get the target #defines.
+ PP.getTargetInfo().getTargetDefines(Buf);
+
+ // Compiler set macros.
+ DefineBuiltinMacro(Buf, "__APPLE_CC__=5250");
+ DefineBuiltinMacro(Buf, "__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__=1050");
+ DefineBuiltinMacro(Buf, "__GNUC_MINOR__=0");
+ DefineBuiltinMacro(Buf, "__GNUC_PATCHLEVEL__=1");
+ DefineBuiltinMacro(Buf, "__GNUC__=4");
+ DefineBuiltinMacro(Buf, "__GXX_ABI_VERSION=1002");
+ DefineBuiltinMacro(Buf, "__VERSION__=\"4.0.1 (Apple Computer, Inc. "
+ "build 5250)\"");
+
+ // Build configuration options.
+ DefineBuiltinMacro(Buf, "__DYNAMIC__=1");
+ DefineBuiltinMacro(Buf, "__FINITE_MATH_ONLY__=0");
+ DefineBuiltinMacro(Buf, "__NO_INLINE__=1");
+ DefineBuiltinMacro(Buf, "__PIC__=1");
+
+
+ if (PP.getLangOptions().CPlusPlus) {
+ DefineBuiltinMacro(Buf, "__DEPRECATED=1");
+ DefineBuiltinMacro(Buf, "__EXCEPTIONS=1");
+ DefineBuiltinMacro(Buf, "__GNUG__=4");
+ DefineBuiltinMacro(Buf, "__GXX_WEAK__=1");
+ DefineBuiltinMacro(Buf, "__cplusplus=1");
+ DefineBuiltinMacro(Buf, "__private_extern__=extern");
+ }
+ if (PP.getLangOptions().Microsoft) {
+ DefineBuiltinMacro(Buf, "__stdcall=");
+ DefineBuiltinMacro(Buf, "__cdecl=");
+ DefineBuiltinMacro(Buf, "_cdecl=");
+ DefineBuiltinMacro(Buf, "__ptr64=");
+ DefineBuiltinMacro(Buf, "__w64=");
+ DefineBuiltinMacro(Buf, "__forceinline=");
+ DefineBuiltinMacro(Buf, "__int8=char");
+ DefineBuiltinMacro(Buf, "__int16=short");
+ DefineBuiltinMacro(Buf, "__int32=int");
+ DefineBuiltinMacro(Buf, "__int64=long long");
+ DefineBuiltinMacro(Buf, "__declspec(X)=");
+ }
+ // FIXME: Should emit a #line directive here.
+}
+
+
+/// EnterMainSourceFile - Enter the specified FileID as the main source file,
+/// which implicitly adds the builtin defines etc.
+void Preprocessor::EnterMainSourceFile() {
+
+ unsigned MainFileID = SourceMgr.getMainFileID();
+
+ // Enter the main file source buffer.
+ EnterSourceFile(MainFileID, 0);
+
+ // Tell the header info that the main file was entered. If the file is later
+ // #imported, it won't be re-entered.
+ if (const FileEntry *FE =
+ SourceMgr.getFileEntryForLoc(SourceLocation::getFileLoc(MainFileID, 0)))
+ HeaderInfo.IncrementIncludeCount(FE);
+
+ std::vector<char> PrologFile;
+ PrologFile.reserve(4080);
+
+ // Install things like __POWERPC__, __GNUC__, etc into the macro table.
+ InitializePredefinedMacros(*this, PrologFile);
+
+ // Add on the predefines from the driver.
+ PrologFile.insert(PrologFile.end(), Predefines,Predefines+strlen(Predefines));
+
+ // Memory buffer must end with a null byte!
+ PrologFile.push_back(0);
+
+ // Now that we have emitted the predefined macros, #includes, etc into
+ // PrologFile, preprocess it to populate the initial preprocessor state.
+ llvm::MemoryBuffer *SB =
+ llvm::MemoryBuffer::getMemBufferCopy(&PrologFile.front(),&PrologFile.back(),
+ "<predefines>");
+ assert(SB && "Cannot fail to create predefined source buffer");
+ unsigned FileID = SourceMgr.createFileIDForMemBuffer(SB);
+ assert(FileID && "Could not create FileID for predefines?");
+
+ // Start parsing the predefines.
+ EnterSourceFile(FileID, 0);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Lexer Event Handling.
+//===----------------------------------------------------------------------===//
+
+/// LookUpIdentifierInfo - Given a tok::identifier token, look up the
+/// identifier information for the token and install it into the token.
+IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier,
+ const char *BufPtr) {
+ assert(Identifier.is(tok::identifier) && "Not an identifier!");
+ assert(Identifier.getIdentifierInfo() == 0 && "Identinfo already exists!");
+
+ // Look up this token, see if it is a macro, or if it is a language keyword.
+ IdentifierInfo *II;
+ if (BufPtr && !Identifier.needsCleaning()) {
+ // No cleaning needed, just use the characters from the lexed buffer.
+ II = getIdentifierInfo(BufPtr, BufPtr+Identifier.getLength());
+ } else {
+ // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
+ llvm::SmallVector<char, 64> IdentifierBuffer;
+ IdentifierBuffer.resize(Identifier.getLength());
+ const char *TmpBuf = &IdentifierBuffer[0];
+ unsigned Size = getSpelling(Identifier, TmpBuf);
+ II = getIdentifierInfo(TmpBuf, TmpBuf+Size);
+ }
+ Identifier.setIdentifierInfo(II);
+ return II;
+}
+
+
+/// HandleIdentifier - This callback is invoked when the lexer reads an
+/// identifier. This callback looks up the identifier in the map and/or
+/// potentially macro expands it or turns it into a named token (like 'for').
+void Preprocessor::HandleIdentifier(Token &Identifier) {
+ assert(Identifier.getIdentifierInfo() &&
+ "Can't handle identifiers without identifier info!");
+
+ IdentifierInfo &II = *Identifier.getIdentifierInfo();
+
+ // If this identifier was poisoned, and if it was not produced from a macro
+ // expansion, emit an error.
+ if (II.isPoisoned() && CurLexer) {
+ if (&II != Ident__VA_ARGS__) // We warn about __VA_ARGS__ with poisoning.
+ Diag(Identifier, diag::err_pp_used_poisoned_id);
+ else
+ Diag(Identifier, diag::ext_pp_bad_vaargs_use);
+ }
+
+ // If this is a macro to be expanded, do it.
+ if (MacroInfo *MI = getMacroInfo(&II)) {
+ if (!DisableMacroExpansion && !Identifier.isExpandDisabled()) {
+ if (MI->isEnabled()) {
+ if (!HandleMacroExpandedIdentifier(Identifier, MI))
+ return;
+ } else {
+ // C99 6.10.3.4p2 says that a disabled macro may never again be
+ // expanded, even if it's in a context where it could be expanded in the
+ // future.
+ Identifier.setFlag(Token::DisableExpand);
+ }
+ }
+ }
+
+ // C++ 2.11p2: If this is an alternative representation of a C++ operator,
+ // then we act as if it is the actual operator and not the textual
+ // representation of it.
+ if (II.isCPlusPlusOperatorKeyword())
+ Identifier.setIdentifierInfo(0);
+
+ // Change the kind of this identifier to the appropriate token kind, e.g.
+ // turning "for" into a keyword.
+ Identifier.setKind(II.getTokenID());
+
+ // If this is an extension token, diagnose its use.
+ // FIXME: tried (unsuccesfully) to shut this up when compiling with gnu99
+ // For now, I'm just commenting it out (while I work on attributes).
+ if (II.isExtensionToken() && Features.C99)
+ Diag(Identifier, diag::ext_token_used);
+}
+
diff --git a/clang/lib/Lex/ScratchBuffer.cpp b/clang/lib/Lex/ScratchBuffer.cpp
new file mode 100644
index 00000000000..99fbdf75654
--- /dev/null
+++ b/clang/lib/Lex/ScratchBuffer.cpp
@@ -0,0 +1,72 @@
+//===--- ScratchBuffer.cpp - Scratch space for forming tokens -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ScratchBuffer interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/ScratchBuffer.h"
+#include "clang/Basic/SourceManager.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <cstring>
+using namespace clang;
+
+// ScratchBufSize - The size of each chunk of scratch memory. Slightly less
+//than a page, almost certainly enough for anything. :)
+static const unsigned ScratchBufSize = 4060;
+
+ScratchBuffer::ScratchBuffer(SourceManager &SM) : SourceMgr(SM), CurBuffer(0) {
+ // Set BytesUsed so that the first call to getToken will require an alloc.
+ BytesUsed = ScratchBufSize;
+ FileID = 0;
+}
+
+/// getToken - Splat the specified text into a temporary MemoryBuffer and
+/// return a SourceLocation that refers to the token. This is just like the
+/// method below, but returns a location that indicates the physloc of the
+/// token.
+SourceLocation ScratchBuffer::getToken(const char *Buf, unsigned Len) {
+ if (BytesUsed+Len > ScratchBufSize)
+ AllocScratchBuffer(Len);
+
+ // Copy the token data into the buffer.
+ memcpy(CurBuffer+BytesUsed, Buf, Len);
+
+ // Remember that we used these bytes.
+ BytesUsed += Len;
+
+ assert(BytesUsed-Len < (1 << SourceLocation::FilePosBits) &&
+ "Out of range file position!");
+
+ return SourceLocation::getFileLoc(FileID, BytesUsed-Len);
+}
+
+
+/// getToken - Splat the specified text into a temporary MemoryBuffer and
+/// return a SourceLocation that refers to the token. The SourceLoc value
+/// gives a virtual location that the token will appear to be from.
+SourceLocation ScratchBuffer::getToken(const char *Buf, unsigned Len,
+ SourceLocation SourceLoc) {
+ // Map the physloc to the specified sourceloc.
+ return SourceMgr.getInstantiationLoc(getToken(Buf, Len), SourceLoc);
+}
+
+void ScratchBuffer::AllocScratchBuffer(unsigned RequestLen) {
+ // Only pay attention to the requested length if it is larger than our default
+ // page size. If it is, we allocate an entire chunk for it. This is to
+ // support gigantic tokens, which almost certainly won't happen. :)
+ if (RequestLen < ScratchBufSize)
+ RequestLen = ScratchBufSize;
+
+ llvm::MemoryBuffer *Buf =
+ llvm::MemoryBuffer::getNewMemBuffer(RequestLen, "<scratch space>");
+ FileID = SourceMgr.createFileIDForMemBuffer(Buf);
+ CurBuffer = const_cast<char*>(Buf->getBufferStart());
+ BytesUsed = 0;
+}
diff --git a/clang/lib/Lex/TokenLexer.cpp b/clang/lib/Lex/TokenLexer.cpp
new file mode 100644
index 00000000000..fc8cfd715c4
--- /dev/null
+++ b/clang/lib/Lex/TokenLexer.cpp
@@ -0,0 +1,488 @@
+//===--- TokenLexer.cpp - Lex from a token stream -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TokenLexer interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/TokenLexer.h"
+#include "MacroArgs.h"
+#include "clang/Lex/MacroInfo.h"
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/Diagnostic.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace clang;
+
+
+/// Create a TokenLexer for the specified macro with the specified actual
+/// arguments. Note that this ctor takes ownership of the ActualArgs pointer.
+void TokenLexer::Init(Token &Tok, MacroArgs *Actuals) {
+ // If the client is reusing a TokenLexer, make sure to free any memory
+ // associated with it.
+ destroy();
+
+ Macro = PP.getMacroInfo(Tok.getIdentifierInfo());
+ ActualArgs = Actuals;
+ CurToken = 0;
+ InstantiateLoc = Tok.getLocation();
+ AtStartOfLine = Tok.isAtStartOfLine();
+ HasLeadingSpace = Tok.hasLeadingSpace();
+ Tokens = &*Macro->tokens_begin();
+ OwnsTokens = false;
+ DisableMacroExpansion = false;
+ NumTokens = Macro->tokens_end()-Macro->tokens_begin();
+
+ // If this is a function-like macro, expand the arguments and change
+ // Tokens to point to the expanded tokens.
+ if (Macro->isFunctionLike() && Macro->getNumArgs())
+ ExpandFunctionArguments();
+
+ // Mark the macro as currently disabled, so that it is not recursively
+ // expanded. The macro must be disabled only after argument pre-expansion of
+ // function-like macro arguments occurs.
+ Macro->DisableMacro();
+}
+
+
+
+/// Create a TokenLexer for the specified token stream. This does not
+/// take ownership of the specified token vector.
+void TokenLexer::Init(const Token *TokArray, unsigned NumToks,
+ bool disableMacroExpansion, bool ownsTokens) {
+ // If the client is reusing a TokenLexer, make sure to free any memory
+ // associated with it.
+ destroy();
+
+ Macro = 0;
+ ActualArgs = 0;
+ Tokens = TokArray;
+ OwnsTokens = ownsTokens;
+ DisableMacroExpansion = disableMacroExpansion;
+ NumTokens = NumToks;
+ CurToken = 0;
+ InstantiateLoc = SourceLocation();
+ AtStartOfLine = false;
+ HasLeadingSpace = false;
+
+ // Set HasLeadingSpace/AtStartOfLine so that the first token will be
+ // returned unmodified.
+ if (NumToks != 0) {
+ AtStartOfLine = TokArray[0].isAtStartOfLine();
+ HasLeadingSpace = TokArray[0].hasLeadingSpace();
+ }
+}
+
+
+void TokenLexer::destroy() {
+ // If this was a function-like macro that actually uses its arguments, delete
+ // the expanded tokens.
+ if (OwnsTokens) {
+ delete [] Tokens;
+ Tokens = 0;
+ }
+
+ // TokenLexer owns its formal arguments.
+ if (ActualArgs) ActualArgs->destroy();
+}
+
+/// Expand the arguments of a function-like macro so that we can quickly
+/// return preexpanded tokens from Tokens.
+void TokenLexer::ExpandFunctionArguments() {
+ llvm::SmallVector<Token, 128> ResultToks;
+
+ // Loop through 'Tokens', expanding them into ResultToks. Keep
+ // track of whether we change anything. If not, no need to keep them. If so,
+ // we install the newly expanded sequence as the new 'Tokens' list.
+ bool MadeChange = false;
+
+ // NextTokGetsSpace - When this is true, the next token appended to the
+ // output list will get a leading space, regardless of whether it had one to
+ // begin with or not. This is used for placemarker support.
+ bool NextTokGetsSpace = false;
+
+ for (unsigned i = 0, e = NumTokens; i != e; ++i) {
+ // If we found the stringify operator, get the argument stringified. The
+ // preprocessor already verified that the following token is a macro name
+ // when the #define was parsed.
+ const Token &CurTok = Tokens[i];
+ if (CurTok.is(tok::hash) || CurTok.is(tok::hashat)) {
+ int ArgNo = Macro->getArgumentNum(Tokens[i+1].getIdentifierInfo());
+ assert(ArgNo != -1 && "Token following # is not an argument?");
+
+ Token Res;
+ if (CurTok.is(tok::hash)) // Stringify
+ Res = ActualArgs->getStringifiedArgument(ArgNo, PP);
+ else {
+ // 'charify': don't bother caching these.
+ Res = MacroArgs::StringifyArgument(ActualArgs->getUnexpArgument(ArgNo),
+ PP, true);
+ }
+
+ // The stringified/charified string leading space flag gets set to match
+ // the #/#@ operator.
+ if (CurTok.hasLeadingSpace() || NextTokGetsSpace)
+ Res.setFlag(Token::LeadingSpace);
+
+ ResultToks.push_back(Res);
+ MadeChange = true;
+ ++i; // Skip arg name.
+ NextTokGetsSpace = false;
+ continue;
+ }
+
+ // Otherwise, if this is not an argument token, just add the token to the
+ // output buffer.
+ IdentifierInfo *II = CurTok.getIdentifierInfo();
+ int ArgNo = II ? Macro->getArgumentNum(II) : -1;
+ if (ArgNo == -1) {
+ // This isn't an argument, just add it.
+ ResultToks.push_back(CurTok);
+
+ if (NextTokGetsSpace) {
+ ResultToks.back().setFlag(Token::LeadingSpace);
+ NextTokGetsSpace = false;
+ }
+ continue;
+ }
+
+ // An argument is expanded somehow, the result is different than the
+ // input.
+ MadeChange = true;
+
+ // Otherwise, this is a use of the argument. Find out if there is a paste
+ // (##) operator before or after the argument.
+ bool PasteBefore =
+ !ResultToks.empty() && ResultToks.back().is(tok::hashhash);
+ bool PasteAfter = i+1 != e && Tokens[i+1].is(tok::hashhash);
+
+ // If it is not the LHS/RHS of a ## operator, we must pre-expand the
+ // argument and substitute the expanded tokens into the result. This is
+ // C99 6.10.3.1p1.
+ if (!PasteBefore && !PasteAfter) {
+ const Token *ResultArgToks;
+
+ // Only preexpand the argument if it could possibly need it. This
+ // avoids some work in common cases.
+ const Token *ArgTok = ActualArgs->getUnexpArgument(ArgNo);
+ if (ActualArgs->ArgNeedsPreexpansion(ArgTok, PP))
+ ResultArgToks = &ActualArgs->getPreExpArgument(ArgNo, PP)[0];
+ else
+ ResultArgToks = ArgTok; // Use non-preexpanded tokens.
+
+ // If the arg token expanded into anything, append it.
+ if (ResultArgToks->isNot(tok::eof)) {
+ unsigned FirstResult = ResultToks.size();
+ unsigned NumToks = MacroArgs::getArgLength(ResultArgToks);
+ ResultToks.append(ResultArgToks, ResultArgToks+NumToks);
+
+ // If any tokens were substituted from the argument, the whitespace
+ // before the first token should match the whitespace of the arg
+ // identifier.
+ ResultToks[FirstResult].setFlagValue(Token::LeadingSpace,
+ CurTok.hasLeadingSpace() ||
+ NextTokGetsSpace);
+ NextTokGetsSpace = false;
+ } else {
+ // If this is an empty argument, and if there was whitespace before the
+ // formal token, make sure the next token gets whitespace before it.
+ NextTokGetsSpace = CurTok.hasLeadingSpace();
+ }
+ continue;
+ }
+
+ // Okay, we have a token that is either the LHS or RHS of a paste (##)
+ // argument. It gets substituted as its non-pre-expanded tokens.
+ const Token *ArgToks = ActualArgs->getUnexpArgument(ArgNo);
+ unsigned NumToks = MacroArgs::getArgLength(ArgToks);
+ if (NumToks) { // Not an empty argument?
+ // If this is the GNU ", ## __VA_ARG__" extension, and we just learned
+ // that __VA_ARG__ expands to multiple tokens, avoid a pasting error when
+ // the expander trys to paste ',' with the first token of the __VA_ARG__
+ // expansion.
+ if (PasteBefore && ResultToks.size() >= 2 &&
+ ResultToks[ResultToks.size()-2].is(tok::comma) &&
+ (unsigned)ArgNo == Macro->getNumArgs()-1 &&
+ Macro->isVariadic()) {
+ // Remove the paste operator, report use of the extension.
+ PP.Diag(ResultToks.back().getLocation(), diag::ext_paste_comma);
+ ResultToks.pop_back();
+ }
+
+ ResultToks.append(ArgToks, ArgToks+NumToks);
+
+ // If the next token was supposed to get leading whitespace, ensure it has
+ // it now.
+ if (NextTokGetsSpace) {
+ ResultToks[ResultToks.size()-NumToks].setFlag(Token::LeadingSpace);
+ NextTokGetsSpace = false;
+ }
+ continue;
+ }
+
+ // If an empty argument is on the LHS or RHS of a paste, the standard (C99
+ // 6.10.3.3p2,3) calls for a bunch of placemarker stuff to occur. We
+ // implement this by eating ## operators when a LHS or RHS expands to
+ // empty.
+ NextTokGetsSpace |= CurTok.hasLeadingSpace();
+ if (PasteAfter) {
+ // Discard the argument token and skip (don't copy to the expansion
+ // buffer) the paste operator after it.
+ NextTokGetsSpace |= Tokens[i+1].hasLeadingSpace();
+ ++i;
+ continue;
+ }
+
+ // If this is on the RHS of a paste operator, we've already copied the
+ // paste operator to the ResultToks list. Remove it.
+ assert(PasteBefore && ResultToks.back().is(tok::hashhash));
+ NextTokGetsSpace |= ResultToks.back().hasLeadingSpace();
+ ResultToks.pop_back();
+
+ // If this is the __VA_ARGS__ token, and if the argument wasn't provided,
+ // and if the macro had at least one real argument, and if the token before
+ // the ## was a comma, remove the comma.
+ if ((unsigned)ArgNo == Macro->getNumArgs()-1 && // is __VA_ARGS__
+ ActualArgs->isVarargsElidedUse() && // Argument elided.
+ !ResultToks.empty() && ResultToks.back().is(tok::comma)) {
+ // Never add a space, even if the comma, ##, or arg had a space.
+ NextTokGetsSpace = false;
+ // Remove the paste operator, report use of the extension.
+ PP.Diag(ResultToks.back().getLocation(), diag::ext_paste_comma);
+ ResultToks.pop_back();
+ }
+ continue;
+ }
+
+ // If anything changed, install this as the new Tokens list.
+ if (MadeChange) {
+ // This is deleted in the dtor.
+ NumTokens = ResultToks.size();
+ Token *Res = new Token[ResultToks.size()];
+ if (NumTokens)
+ memcpy(Res, &ResultToks[0], NumTokens*sizeof(Token));
+ Tokens = Res;
+ OwnsTokens = true;
+ }
+}
+
+/// Lex - Lex and return a token from this macro stream.
+///
+void TokenLexer::Lex(Token &Tok) {
+ // Lexing off the end of the macro, pop this macro off the expansion stack.
+ if (isAtEnd()) {
+ // If this is a macro (not a token stream), mark the macro enabled now
+ // that it is no longer being expanded.
+ if (Macro) Macro->EnableMacro();
+
+ // Pop this context off the preprocessors lexer stack and get the next
+ // token. This will delete "this" so remember the PP instance var.
+ Preprocessor &PPCache = PP;
+ if (PP.HandleEndOfTokenLexer(Tok))
+ return;
+
+ // HandleEndOfTokenLexer may not return a token. If it doesn't, lex
+ // whatever is next.
+ return PPCache.Lex(Tok);
+ }
+
+ // If this is the first token of the expanded result, we inherit spacing
+ // properties later.
+ bool isFirstToken = CurToken == 0;
+
+ // Get the next token to return.
+ Tok = Tokens[CurToken++];
+
+ // If this token is followed by a token paste (##) operator, paste the tokens!
+ if (!isAtEnd() && Tokens[CurToken].is(tok::hashhash))
+ if (PasteTokens(Tok)) {
+ // When handling the microsoft /##/ extension, the final token is
+ // returned by PasteTokens, not the pasted token.
+ return;
+ }
+
+ // The token's current location indicate where the token was lexed from. We
+ // need this information to compute the spelling of the token, but any
+ // diagnostics for the expanded token should appear as if they came from
+ // InstantiationLoc. Pull this information together into a new SourceLocation
+ // that captures all of this.
+ if (InstantiateLoc.isValid()) { // Don't do this for token streams.
+ SourceManager &SrcMgr = PP.getSourceManager();
+ Tok.setLocation(SrcMgr.getInstantiationLoc(Tok.getLocation(),
+ InstantiateLoc));
+ }
+
+ // If this is the first token, set the lexical properties of the token to
+ // match the lexical properties of the macro identifier.
+ if (isFirstToken) {
+ Tok.setFlagValue(Token::StartOfLine , AtStartOfLine);
+ Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace);
+ }
+
+ // Handle recursive expansion!
+ if (Tok.getIdentifierInfo() && !DisableMacroExpansion)
+ return PP.HandleIdentifier(Tok);
+
+ // Otherwise, return a normal token.
+}
+
+/// PasteTokens - Tok is the LHS of a ## operator, and CurToken is the ##
+/// operator. Read the ## and RHS, and paste the LHS/RHS together. If there
+/// are is another ## after it, chomp it iteratively. Return the result as Tok.
+/// If this returns true, the caller should immediately return the token.
+bool TokenLexer::PasteTokens(Token &Tok) {
+ llvm::SmallVector<char, 128> Buffer;
+ do {
+ // Consume the ## operator.
+ SourceLocation PasteOpLoc = Tokens[CurToken].getLocation();
+ ++CurToken;
+ assert(!isAtEnd() && "No token on the RHS of a paste operator!");
+
+ // Get the RHS token.
+ const Token &RHS = Tokens[CurToken];
+
+ bool isInvalid = false;
+
+ // Allocate space for the result token. This is guaranteed to be enough for
+ // the two tokens and a null terminator.
+ Buffer.resize(Tok.getLength() + RHS.getLength() + 1);
+
+ // Get the spelling of the LHS token in Buffer.
+ const char *BufPtr = &Buffer[0];
+ unsigned LHSLen = PP.getSpelling(Tok, BufPtr);
+ if (BufPtr != &Buffer[0]) // Really, we want the chars in Buffer!
+ memcpy(&Buffer[0], BufPtr, LHSLen);
+
+ BufPtr = &Buffer[LHSLen];
+ unsigned RHSLen = PP.getSpelling(RHS, BufPtr);
+ if (BufPtr != &Buffer[LHSLen]) // Really, we want the chars in Buffer!
+ memcpy(&Buffer[LHSLen], BufPtr, RHSLen);
+
+ // Add null terminator.
+ Buffer[LHSLen+RHSLen] = '\0';
+
+ // Trim excess space.
+ Buffer.resize(LHSLen+RHSLen+1);
+
+ // Plop the pasted result (including the trailing newline and null) into a
+ // scratch buffer where we can lex it.
+ SourceLocation ResultTokLoc = PP.CreateString(&Buffer[0], Buffer.size());
+
+ // Lex the resultant pasted token into Result.
+ Token Result;
+
+ // Avoid testing /*, as the lexer would think it is the start of a comment
+ // and emit an error that it is unterminated.
+ if (Tok.is(tok::slash) && RHS.is(tok::star)) {
+ isInvalid = true;
+ } else if (Tok.is(tok::identifier) && RHS.is(tok::identifier)) {
+ // Common paste case: identifier+identifier = identifier. Avoid creating
+ // a lexer and other overhead.
+ PP.IncrementPasteCounter(true);
+ Result.startToken();
+ Result.setKind(tok::identifier);
+ Result.setLocation(ResultTokLoc);
+ Result.setLength(LHSLen+RHSLen);
+ } else {
+ PP.IncrementPasteCounter(false);
+
+ // Make a lexer to lex this string from.
+ SourceManager &SourceMgr = PP.getSourceManager();
+ const char *ResultStrData = SourceMgr.getCharacterData(ResultTokLoc);
+
+ // Make a lexer object so that we lex and expand the paste result.
+ Lexer *TL = new Lexer(ResultTokLoc, PP, ResultStrData,
+ ResultStrData+LHSLen+RHSLen /*don't include null*/);
+
+ // Lex a token in raw mode. This way it won't look up identifiers
+ // automatically, lexing off the end will return an eof token, and
+ // warnings are disabled. This returns true if the result token is the
+ // entire buffer.
+ bool IsComplete = TL->LexRawToken(Result);
+
+ // If we got an EOF token, we didn't form even ONE token. For example, we
+ // did "/ ## /" to get "//".
+ IsComplete &= Result.isNot(tok::eof);
+ isInvalid = !IsComplete;
+
+ // We're now done with the temporary lexer.
+ delete TL;
+ }
+
+ // If pasting the two tokens didn't form a full new token, this is an error.
+ // This occurs with "x ## +" and other stuff. Return with Tok unmodified
+ // and with RHS as the next token to lex.
+ if (isInvalid) {
+ // Test for the Microsoft extension of /##/ turning into // here on the
+ // error path.
+ if (PP.getLangOptions().Microsoft && Tok.is(tok::slash) &&
+ RHS.is(tok::slash)) {
+ HandleMicrosoftCommentPaste(Tok);
+ return true;
+ } else {
+ // TODO: If not in assembler language mode.
+ PP.Diag(PasteOpLoc, diag::err_pp_bad_paste,
+ std::string(Buffer.begin(), Buffer.end()-1));
+ return false;
+ }
+ }
+
+ // Turn ## into 'unknown' to avoid # ## # from looking like a paste
+ // operator.
+ if (Result.is(tok::hashhash))
+ Result.setKind(tok::unknown);
+ // FIXME: Turn __VA_ARGS__ into "not a token"?
+
+ // Transfer properties of the LHS over the the Result.
+ Result.setFlagValue(Token::StartOfLine , Tok.isAtStartOfLine());
+ Result.setFlagValue(Token::LeadingSpace, Tok.hasLeadingSpace());
+
+ // Finally, replace LHS with the result, consume the RHS, and iterate.
+ ++CurToken;
+ Tok = Result;
+ } while (!isAtEnd() && Tokens[CurToken].is(tok::hashhash));
+
+ // Now that we got the result token, it will be subject to expansion. Since
+ // token pasting re-lexes the result token in raw mode, identifier information
+ // isn't looked up. As such, if the result is an identifier, look up id info.
+ if (Tok.is(tok::identifier)) {
+ // Look up the identifier info for the token. We disabled identifier lookup
+ // by saying we're skipping contents, so we need to do this manually.
+ Tok.setIdentifierInfo(PP.LookUpIdentifierInfo(Tok));
+ }
+ return false;
+}
+
+/// isNextTokenLParen - If the next token lexed will pop this macro off the
+/// expansion stack, return 2. If the next unexpanded token is a '(', return
+/// 1, otherwise return 0.
+unsigned TokenLexer::isNextTokenLParen() const {
+ // Out of tokens?
+ if (isAtEnd())
+ return 2;
+ return Tokens[CurToken].is(tok::l_paren);
+}
+
+
+/// HandleMicrosoftCommentPaste - In microsoft compatibility mode, /##/ pastes
+/// together to form a comment that comments out everything in the current
+/// macro, other active macros, and anything left on the current physical
+/// source line of the instantiated buffer. Handle this by returning the
+/// first token on the next line.
+void TokenLexer::HandleMicrosoftCommentPaste(Token &Tok) {
+ // We 'comment out' the rest of this macro by just ignoring the rest of the
+ // tokens that have not been lexed yet, if any.
+
+ // Since this must be a macro, mark the macro enabled now that it is no longer
+ // being expanded.
+ assert(Macro && "Token streams can't paste comments");
+ Macro->EnableMacro();
+
+ PP.HandleMicrosoftCommentPaste(Tok);
+}
OpenPOWER on IntegriCloud