16 files changed, 7673 insertions, 0 deletions
diff --git a/clang/lib/Lex/HeaderMap.cpp b/clang/lib/Lex/HeaderMap.cpp
new file mode 100644
index 00000000000..282e742b4c8
--- /dev/null
+++ b/clang/lib/Lex/HeaderMap.cpp
@@ -0,0 +1,242 @@
+//===--- HeaderMap.cpp - A file that acts like dir of symlinks ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the HeaderMap interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/HeaderMap.h"
+#include "clang/Basic/FileManager.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/MemoryBuffer.h"
+using namespace clang;
+
+//===----------------------------------------------------------------------===//
+// Data Structures and Manifest Constants
+//===----------------------------------------------------------------------===//
+
+enum {
+  HMAP_HeaderMagicNumber = ('h' << 24) | ('m' << 16) | ('a' << 8) | 'p',
+  HMAP_HeaderVersion = 1,
+  
+  HMAP_EmptyBucketKey = 0 
+};
+
+namespace clang {
+struct HMapBucket {
+  uint32_t Key;          // Offset (into strings) of key.
+
+  uint32_t Prefix;     // Offset (into strings) of value prefix.
+  uint32_t Suffix;     // Offset (into strings) of value suffix.
+};
+
+struct HMapHeader {
+  uint32_t Magic;           // Magic word, also indicates byte order.
+  uint16_t Version;         // Version number -- currently 1.
+  uint16_t Reserved;        // Reserved for future use - zero for now.
+  uint32_t StringsOffset;   // Offset to start of string pool.
+  uint32_t NumEntries;      // Number of entries in the string table.
+  uint32_t NumBuckets;      // Number of buckets (always a power of 2).
+  uint32_t MaxValueLength;  // Length of longest result path (excluding nul).
+  // An array of 'NumBuckets' HMapBucket objects follows this header.
+  // Strings follow the buckets, at StringsOffset.
+};
+} // end namespace clang.
+
+/// HashHMapKey - This is the 'well known' hash function required by the file
+/// format, used to look up keys in the hash table.  The hash table uses simple
+/// linear probing based on this function.
+static inline unsigned HashHMapKey(const char *S, const char *End) {
+  unsigned Result = 0;
+  
+  for (; S != End; S++)
+    Result += tolower(*S) * 13;
+  return Result;
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Verification and Construction
+//===----------------------------------------------------------------------===//
+
+/// HeaderMap::Create - This attempts to load the specified file as a header
+/// map.  If it doesn't look like a HeaderMap, it gives up and returns null.
+/// If it looks like a HeaderMap but is obviously corrupted, it puts a reason
+/// into the string error argument and returns null.
+const HeaderMap *HeaderMap::Create(const FileEntry *FE) {
+  // If the file is too small to be a header map, ignore it.
+  unsigned FileSize = FE->getSize();
+  if (FileSize <= sizeof(HMapHeader)) return 0;
+  
+  llvm::OwningPtr<const llvm::MemoryBuffer> FileBuffer( 
+    llvm::MemoryBuffer::getFile(FE->getName(), strlen(FE->getName()), 0,
+                                FE->getSize()));
+  if (FileBuffer == 0) return 0;  // Unreadable file?
+  const char *FileStart = FileBuffer->getBufferStart();
+
+  // We know the file is at least as big as the header, check it now.
+  const HMapHeader *Header = reinterpret_cast<const HMapHeader*>(FileStart);
+  
+  // Sniff it to see if it's a headermap by checking the magic number and
+  // version.
+  bool NeedsByteSwap;
+  if (Header->Magic == HMAP_HeaderMagicNumber && 
+      Header->Version == HMAP_HeaderVersion)
+    NeedsByteSwap = false;
+  else if (Header->Magic == llvm::ByteSwap_32(HMAP_HeaderMagicNumber) &&
+           Header->Version == llvm::ByteSwap_16(HMAP_HeaderVersion))
+    NeedsByteSwap = true;  // Mixed endianness headermap.
+  else 
+    return 0;  // Not a header map.
+  
+  if (Header->Reserved != 0) return 0;
+
+  // Okay, everything looks good, create the header map.
+  return new HeaderMap(FileBuffer.take(), NeedsByteSwap);
+}
+
+HeaderMap::~HeaderMap() {
+  delete FileBuffer;
+}
+
+//===----------------------------------------------------------------------===//
+//  Utility Methods
+//===----------------------------------------------------------------------===//
+
+
+/// getFileName - Return the filename of the headermap.
+const char *HeaderMap::getFileName() const {
+  return FileBuffer->getBufferIdentifier();
+}
+
+unsigned HeaderMap::getEndianAdjustedWord(unsigned X) const {
+  if (!NeedsBSwap) return X;
+  return llvm::ByteSwap_32(X);
+}
+
+/// getHeader - Return a reference to the file header, in unbyte-swapped form.
+/// This method cannot fail.
+const HMapHeader &HeaderMap::getHeader() const {
+  // We know the file is at least as big as the header.  Return it.
+  return *reinterpret_cast<const HMapHeader*>(FileBuffer->getBufferStart());
+}
+
+/// getBucket - Return the specified hash table bucket from the header map,
+/// bswap'ing its fields as appropriate.  If the bucket number is not valid,
+/// this return a bucket with an empty key (0).
+HMapBucket HeaderMap::getBucket(unsigned BucketNo) const {
+  HMapBucket Result;
+  Result.Key = HMAP_EmptyBucketKey;
+  
+  const HMapBucket *BucketArray = 
+    reinterpret_cast<const HMapBucket*>(FileBuffer->getBufferStart() +
+                                        sizeof(HMapHeader));
+  
+  const HMapBucket *BucketPtr = BucketArray+BucketNo;
+  if ((char*)(BucketPtr+1) > FileBuffer->getBufferEnd())
+    return Result;  // Invalid buffer, corrupt hmap.
+
+  // Otherwise, the bucket is valid.  Load the values, bswapping as needed.
+  Result.Key    = getEndianAdjustedWord(BucketPtr->Key);
+  Result.Prefix = getEndianAdjustedWord(BucketPtr->Prefix);
+  Result.Suffix = getEndianAdjustedWord(BucketPtr->Suffix);
+  return Result;
+}
+
+/// getString - Look up the specified string in the string table.  If the string
+/// index is not valid, it returns an empty string.
+const char *HeaderMap::getString(unsigned StrTabIdx) const {
+  // Add the start of the string table to the idx.
+  StrTabIdx += getEndianAdjustedWord(getHeader().StringsOffset);
+  
+  // Check for invalid index.
+  if (StrTabIdx >= FileBuffer->getBufferSize())
+    return 0;
+  
+  // Otherwise, we have a valid pointer into the file.  Just return it.  We know
+  // that the "string" can not overrun the end of the file, because the buffer
+  // is nul terminated by virtue of being a MemoryBuffer.
+  return FileBuffer->getBufferStart()+StrTabIdx;
+}
+
+/// StringsEqualWithoutCase - Compare the specified two strings for case-
+/// insensitive equality, returning true if they are equal.  Both strings are
+/// known to have the same length.
+static bool StringsEqualWithoutCase(const char *S1, const char *S2,
+                                    unsigned Len) {
+  for (; Len; ++S1, ++S2, --Len)
+    if (tolower(*S1) != tolower(*S2))
+      return false;
+  return true;
+}
+
+//===----------------------------------------------------------------------===//
+// The Main Drivers
+//===----------------------------------------------------------------------===//
+
+/// dump - Print the contents of this headermap to stderr.
+void HeaderMap::dump() const {
+  const HMapHeader &Hdr = getHeader();
+  unsigned NumBuckets = getEndianAdjustedWord(Hdr.NumBuckets);
+  
+  fprintf(stderr, "Header Map %s:\n  %d buckets, %d entries\n", 
+          getFileName(), NumBuckets,
+          getEndianAdjustedWord(Hdr.NumEntries));
+  
+  for (unsigned i = 0; i != NumBuckets; ++i) {
+    HMapBucket B = getBucket(i);
+    if (B.Key == HMAP_EmptyBucketKey) continue;
+    
+    const char *Key    = getString(B.Key);
+    const char *Prefix = getString(B.Prefix);
+    const char *Suffix = getString(B.Suffix);
+    fprintf(stderr, "  %d. %s -> '%s' '%s'\n", i, Key, Prefix, Suffix);
+  }
+}
+
+/// LookupFile - Check to see if the specified relative filename is located in
+/// this HeaderMap.  If so, open it and return its FileEntry.
+const FileEntry *HeaderMap::LookupFile(const char *FilenameStart,
+                                       const char *FilenameEnd,
+                                       FileManager &FM) const {
+  const HMapHeader &Hdr = getHeader();
+  unsigned NumBuckets = getEndianAdjustedWord(Hdr.NumBuckets);
+
+  // If the number of buckets is not a power of two, the headermap is corrupt.
+  // Don't probe infinitely.
+  if (NumBuckets & (NumBuckets-1))
+    return 0;
+  
+  // Linearly probe the hash table.
+  for (unsigned Bucket = HashHMapKey(FilenameStart, FilenameEnd);; ++Bucket) {
+    HMapBucket B = getBucket(Bucket & (NumBuckets-1));
+    if (B.Key == HMAP_EmptyBucketKey) return 0; // Hash miss.
+    
+    // See if the key matches.  If not, probe on.
+    const char *Key = getString(B.Key);
+    unsigned BucketKeyLen = strlen(Key);
+    if (BucketKeyLen != unsigned(FilenameEnd-FilenameStart))
+      continue;
+    
+    // See if the actual strings equal.
+    if (!StringsEqualWithoutCase(FilenameStart, Key, BucketKeyLen))
+      continue;
+    
+    // If so, we have a match in the hash table.  Construct the destination
+    // path.
+    llvm::SmallString<1024> DestPath;
+    DestPath += getString(B.Prefix);
+    DestPath += getString(B.Suffix);
+    return FM.getFile(DestPath.begin(), DestPath.end());
+  }
+}
diff --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp
new file mode 100644
index 00000000000..44ae35c8b7e
--- /dev/null
+++ b/clang/lib/Lex/HeaderSearch.cpp
@@ -0,0 +1,425 @@
+//===--- HeaderSearch.cpp - Resolve Header File Locations ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements the DirectoryLookup and HeaderSearch interfaces.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/HeaderSearch.h"
+#include "clang/Lex/HeaderMap.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/IdentifierTable.h"
+#include "llvm/System/Path.h"
+#include "llvm/ADT/SmallString.h"
+using namespace clang;
+
+HeaderSearch::HeaderSearch(FileManager &FM) : FileMgr(FM), FrameworkMap(64) {
+  SystemDirIdx = 0;
+  NoCurDirSearch = false;
+  
+  NumIncluded = 0;
+  NumMultiIncludeFileOptzn = 0;
+  NumFrameworkLookups = NumSubFrameworkLookups = 0;
+}
+
+HeaderSearch::~HeaderSearch() {
+  // Delete headermaps.
+  for (unsigned i = 0, e = HeaderMaps.size(); i != e; ++i)
+    delete HeaderMaps[i].second;
+}
+                           
+void HeaderSearch::PrintStats() {
+  fprintf(stderr, "\n*** HeaderSearch Stats:\n");
+  fprintf(stderr, "%d files tracked.\n", (int)FileInfo.size());
+  unsigned NumOnceOnlyFiles = 0, MaxNumIncludes = 0, NumSingleIncludedFiles = 0;
+  for (unsigned i = 0, e = FileInfo.size(); i != e; ++i) {
+    NumOnceOnlyFiles += FileInfo[i].isImport;
+    if (MaxNumIncludes < FileInfo[i].NumIncludes)
+      MaxNumIncludes = FileInfo[i].NumIncludes;
+    NumSingleIncludedFiles += FileInfo[i].NumIncludes == 1;
+  }
+  fprintf(stderr, "  %d #import/#pragma once files.\n", NumOnceOnlyFiles);
+  fprintf(stderr, "  %d included exactly once.\n", NumSingleIncludedFiles);
+  fprintf(stderr, "  %d max times a file is included.\n", MaxNumIncludes);
+  
+  fprintf(stderr, "  %d #include/#include_next/#import.\n", NumIncluded);
+  fprintf(stderr, "    %d #includes skipped due to"
+          " the multi-include optimization.\n", NumMultiIncludeFileOptzn);
+  
+  fprintf(stderr, "%d framework lookups.\n", NumFrameworkLookups);
+  fprintf(stderr, "%d subframework lookups.\n", NumSubFrameworkLookups);
+}
+
+/// CreateHeaderMap - This method returns a HeaderMap for the specified
+/// FileEntry, uniquing them through the the 'HeaderMaps' datastructure.
+const HeaderMap *HeaderSearch::CreateHeaderMap(const FileEntry *FE) {
+  // We expect the number of headermaps to be small, and almost always empty.
+  // If it ever grows, use of a linear search should be re-evaluated.
+  if (!HeaderMaps.empty()) {
+    for (unsigned i = 0, e = HeaderMaps.size(); i != e; ++i)
+      // Pointer equality comparison of FileEntries works because they are
+      // already uniqued by inode.
+      if (HeaderMaps[i].first == FE) 
+        return HeaderMaps[i].second;
+  }
+  
+  if (const HeaderMap *HM = HeaderMap::Create(FE)) {
+    HeaderMaps.push_back(std::make_pair(FE, HM));
+    return HM;
+  }
+    
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// File lookup within a DirectoryLookup scope
+//===----------------------------------------------------------------------===//
+
+/// getName - Return the directory or filename corresponding to this lookup
+/// object.
+const char *DirectoryLookup::getName() const {
+  if (isNormalDir())
+    return getDir()->getName();
+  if (isFramework())
+    return getFrameworkDir()->getName();
+  assert(isHeaderMap() && "Unknown DirectoryLookup");
+  return getHeaderMap()->getFileName();
+}
+
+
+/// LookupFile - Lookup the specified file in this search path, returning it
+/// if it exists or returning null if not.
+const FileEntry *DirectoryLookup::LookupFile(const char *FilenameStart,
+                                             const char *FilenameEnd,
+                                             HeaderSearch &HS) const {
+  llvm::SmallString<1024> TmpDir;
+  if (isNormalDir()) {
+    // Concatenate the requested file onto the directory.
+    // FIXME: Portability.  Filename concatenation should be in sys::Path.
+    TmpDir += getDir()->getName();
+    TmpDir.push_back('/');
+    TmpDir.append(FilenameStart, FilenameEnd);
+    return HS.getFileMgr().getFile(TmpDir.begin(), TmpDir.end());
+  }
+  
+  if (isFramework())
+    return DoFrameworkLookup(FilenameStart, FilenameEnd, HS);
+  
+  assert(isHeaderMap() && "Unknown directory lookup");
+  return getHeaderMap()->LookupFile(FilenameStart, FilenameEnd,HS.getFileMgr());
+}
+
+
+/// DoFrameworkLookup - Do a lookup of the specified file in the current
+/// DirectoryLookup, which is a framework directory.
+const FileEntry *DirectoryLookup::DoFrameworkLookup(const char *FilenameStart,
+                                                    const char *FilenameEnd,
+                                                    HeaderSearch &HS) const {
+  FileManager &FileMgr = HS.getFileMgr();
+  
+  // Framework names must have a '/' in the filename.
+  const char *SlashPos = std::find(FilenameStart, FilenameEnd, '/');
+  if (SlashPos == FilenameEnd) return 0;
+  
+  // Find out if this is the home for the specified framework, by checking
+  // HeaderSearch.  Possible answer are yes/no and unknown.
+  const DirectoryEntry *&FrameworkDirCache = 
+    HS.LookupFrameworkCache(FilenameStart, SlashPos);
+  
+  // If it is known and in some other directory, fail.
+  if (FrameworkDirCache && FrameworkDirCache != getFrameworkDir())
+    return 0;
+  
+  // Otherwise, construct the path to this framework dir.
+  
+  // FrameworkName = "/System/Library/Frameworks/"
+  llvm::SmallString<1024> FrameworkName;
+  FrameworkName += getFrameworkDir()->getName();
+  if (FrameworkName.empty() || FrameworkName.back() != '/')
+    FrameworkName.push_back('/');
+  
+  // FrameworkName = "/System/Library/Frameworks/Cocoa"
+  FrameworkName.append(FilenameStart, SlashPos);
+  
+  // FrameworkName = "/System/Library/Frameworks/Cocoa.framework/"
+  FrameworkName += ".framework/";
+  
+  // If the cache entry is still unresolved, query to see if the cache entry is
+  // still unresolved.  If so, check its existence now.
+  if (FrameworkDirCache == 0) {
+    HS.IncrementFrameworkLookupCount();
+    
+    // If the framework dir doesn't exist, we fail.
+    // FIXME: It's probably more efficient to query this with FileMgr.getDir.
+    if (!llvm::sys::Path(std::string(FrameworkName.begin(), 
+                                     FrameworkName.end())).exists())
+      return 0;
+    
+    // Otherwise, if it does, remember that this is the right direntry for this
+    // framework.
+    FrameworkDirCache = getFrameworkDir();
+  }
+  
+  // Check "/System/Library/Frameworks/Cocoa.framework/Headers/file.h"
+  unsigned OrigSize = FrameworkName.size();
+  
+  FrameworkName += "Headers/";
+  FrameworkName.append(SlashPos+1, FilenameEnd);
+  if (const FileEntry *FE = FileMgr.getFile(FrameworkName.begin(),
+                                            FrameworkName.end())) {
+    return FE;
+  }
+  
+  // Check "/System/Library/Frameworks/Cocoa.framework/PrivateHeaders/file.h"
+  const char *Private = "Private";
+  FrameworkName.insert(FrameworkName.begin()+OrigSize, Private, 
+                       Private+strlen(Private));
+  return FileMgr.getFile(FrameworkName.begin(), FrameworkName.end());
+}
+
+
+//===----------------------------------------------------------------------===//
+// Header File Location.
+//===----------------------------------------------------------------------===//
+
+
+/// LookupFile - Given a "foo" or <foo> reference, look up the indicated file,
+/// return null on failure.  isAngled indicates whether the file reference is
+/// for system #include's or not (i.e. using <> instead of "").  CurFileEnt, if
+/// non-null, indicates where the #including file is, in case a relative search
+/// is needed.
+const FileEntry *HeaderSearch::LookupFile(const char *FilenameStart,
+                                          const char *FilenameEnd, 
+                                          bool isAngled,
+                                          const DirectoryLookup *FromDir,
+                                          const DirectoryLookup *&CurDir,
+                                          const FileEntry *CurFileEnt) {
+  // If 'Filename' is absolute, check to see if it exists and no searching.
+  // FIXME: Portability.  This should be a sys::Path interface, this doesn't
+  // handle things like C:\foo.txt right, nor win32 \\network\device\blah.
+  if (FilenameStart[0] == '/') {
+    CurDir = 0;
+
+    // If this was an #include_next "/absolute/file", fail.
+    if (FromDir) return 0;
+    
+    // Otherwise, just return the file.
+    return FileMgr.getFile(FilenameStart, FilenameEnd);
+  }
+  
+  // Step #0, unless disabled, check to see if the file is in the #includer's
+  // directory.  This has to be based on CurFileEnt, not CurDir, because
+  // CurFileEnt could be a #include of a subdirectory (#include "foo/bar.h") and
+  // a subsequent include of "baz.h" should resolve to "whatever/foo/baz.h".
+  // This search is not done for <> headers.
+  if (CurFileEnt && !isAngled && !NoCurDirSearch) {
+    llvm::SmallString<1024> TmpDir;
+    // Concatenate the requested file onto the directory.
+    // FIXME: Portability.  Filename concatenation should be in sys::Path.
+    TmpDir += CurFileEnt->getDir()->getName();
+    TmpDir.push_back('/');
+    TmpDir.append(FilenameStart, FilenameEnd);
+    if (const FileEntry *FE = FileMgr.getFile(TmpDir.begin(), TmpDir.end())) {
+      // Leave CurDir unset.
+      // This file is a system header or C++ unfriendly if the old file is.
+      //
+      // Note that the temporary 'DirInfo' is required here, as either call to
+      // getFileInfo could resize the vector and we don't want to rely on order
+      // of evaluation.
+      unsigned DirInfo = getFileInfo(CurFileEnt).DirInfo;
+      getFileInfo(FE).DirInfo = DirInfo;
+      return FE;
+    }
+  }
+  
+  CurDir = 0;
+
+  // If this is a system #include, ignore the user #include locs.
+  unsigned i = isAngled ? SystemDirIdx : 0;
+  
+  // If this is a #include_next request, start searching after the directory the
+  // file was found in.
+  if (FromDir)
+    i = FromDir-&SearchDirs[0];
+  
+  // Cache all of the lookups performed by this method.  Many headers are
+  // multiply included, and the "pragma once" optimization prevents them from
+  // being relex/pp'd, but they would still have to search through a
+  // (potentially huge) series of SearchDirs to find it.
+  std::pair<unsigned, unsigned> &CacheLookup =
+    LookupFileCache.GetOrCreateValue(FilenameStart, FilenameEnd).getValue();
+
+  // If the entry has been previously looked up, the first value will be
+  // non-zero.  If the value is equal to i (the start point of our search), then
+  // this is a matching hit.
+  if (CacheLookup.first == i+1) {
+    // Skip querying potentially lots of directories for this lookup.
+    i = CacheLookup.second;
+  } else {
+    // Otherwise, this is the first query, or the previous query didn't match
+    // our search start.  We will fill in our found location below, so prime the
+    // start point value.
+    CacheLookup.first = i+1;
+  }
+  
+  // Check each directory in sequence to see if it contains this file.
+  for (; i != SearchDirs.size(); ++i) {
+    const FileEntry *FE = 
+      SearchDirs[i].LookupFile(FilenameStart, FilenameEnd, *this);
+    if (!FE) continue;
+    
+    CurDir = &SearchDirs[i];
+    
+    // This file is a system header or C++ unfriendly if the dir is.
+    getFileInfo(FE).DirInfo = CurDir->getDirCharacteristic();
+    
+    // Remember this location for the next lookup we do.
+    CacheLookup.second = i;
+    return FE;
+  }
+  
+  // Otherwise, didn't find it. Remember we didn't find this.
+  CacheLookup.second = SearchDirs.size();
+  return 0;
+}
+
+/// LookupSubframeworkHeader - Look up a subframework for the specified
+/// #include file.  For example, if #include'ing <HIToolbox/HIToolbox.h> from
+/// within ".../Carbon.framework/Headers/Carbon.h", check to see if HIToolbox
+/// is a subframework within Carbon.framework.  If so, return the FileEntry
+/// for the designated file, otherwise return null.
+const FileEntry *HeaderSearch::
+LookupSubframeworkHeader(const char *FilenameStart,
+                         const char *FilenameEnd,
+                         const FileEntry *ContextFileEnt) {
+  assert(ContextFileEnt && "No context file?");
+  
+  // Framework names must have a '/' in the filename.  Find it.
+  const char *SlashPos = std::find(FilenameStart, FilenameEnd, '/');
+  if (SlashPos == FilenameEnd) return 0;
+  
+  // Look up the base framework name of the ContextFileEnt.
+  const char *ContextName = ContextFileEnt->getName();
+    
+  // If the context info wasn't a framework, couldn't be a subframework.
+  const char *FrameworkPos = strstr(ContextName, ".framework/");
+  if (FrameworkPos == 0)
+    return 0;
+  
+  llvm::SmallString<1024> FrameworkName(ContextName, 
+                                        FrameworkPos+strlen(".framework/"));
+
+  // Append Frameworks/HIToolbox.framework/
+  FrameworkName += "Frameworks/";
+  FrameworkName.append(FilenameStart, SlashPos);
+  FrameworkName += ".framework/";
+
+  llvm::StringMapEntry<const DirectoryEntry *> &CacheLookup =
+    FrameworkMap.GetOrCreateValue(FilenameStart, SlashPos);
+  
+  // Some other location?
+  if (CacheLookup.getValue() &&
+      CacheLookup.getKeyLength() == FrameworkName.size() &&
+      memcmp(CacheLookup.getKeyData(), &FrameworkName[0],
+             CacheLookup.getKeyLength()) != 0)
+    return 0;
+  
+  // Cache subframework.
+  if (CacheLookup.getValue() == 0) {
+    ++NumSubFrameworkLookups;
+    
+    // If the framework dir doesn't exist, we fail.
+    const DirectoryEntry *Dir = FileMgr.getDirectory(FrameworkName.begin(),
+                                                     FrameworkName.end());
+    if (Dir == 0) return 0;
+    
+    // Otherwise, if it does, remember that this is the right direntry for this
+    // framework.
+    CacheLookup.setValue(Dir);
+  }
+  
+  const FileEntry *FE = 0;
+
+  // Check ".../Frameworks/HIToolbox.framework/Headers/HIToolbox.h"
+  llvm::SmallString<1024> HeadersFilename(FrameworkName);
+  HeadersFilename += "Headers/";
+  HeadersFilename.append(SlashPos+1, FilenameEnd);
+  if (!(FE = FileMgr.getFile(HeadersFilename.begin(),
+                             HeadersFilename.end()))) {
+    
+    // Check ".../Frameworks/HIToolbox.framework/PrivateHeaders/HIToolbox.h"
+    HeadersFilename = FrameworkName;
+    HeadersFilename += "PrivateHeaders/";
+    HeadersFilename.append(SlashPos+1, FilenameEnd);
+    if (!(FE = FileMgr.getFile(HeadersFilename.begin(), HeadersFilename.end())))
+      return 0;
+  }
+  
+  // This file is a system header or C++ unfriendly if the old file is.
+  //
+  // Note that the temporary 'DirInfo' is required here, as either call to
+  // getFileInfo could resize the vector and we don't want to rely on order
+  // of evaluation.
+  unsigned DirInfo = getFileInfo(ContextFileEnt).DirInfo;
+  getFileInfo(FE).DirInfo = DirInfo;
+  return FE;
+}
+
+//===----------------------------------------------------------------------===//
+// File Info Management.
+//===----------------------------------------------------------------------===//
+
+
+/// getFileInfo - Return the PerFileInfo structure for the specified
+/// FileEntry.
+HeaderSearch::PerFileInfo &HeaderSearch::getFileInfo(const FileEntry *FE) {
+  if (FE->getUID() >= FileInfo.size())
+    FileInfo.resize(FE->getUID()+1);
+  return FileInfo[FE->getUID()];
+}  
+
+/// ShouldEnterIncludeFile - Mark the specified file as a target of of a
+/// #include, #include_next, or #import directive.  Return false if #including
+/// the file will have no effect or true if we should include it.
+bool HeaderSearch::ShouldEnterIncludeFile(const FileEntry *File, bool isImport){
+  ++NumIncluded; // Count # of attempted #includes.
+
+  // Get information about this file.
+  PerFileInfo &FileInfo = getFileInfo(File);
+  
+  // If this is a #import directive, check that we have not already imported
+  // this header.
+  if (isImport) {
+    // If this has already been imported, don't import it again.
+    FileInfo.isImport = true;
+    
+    // Has this already been #import'ed or #include'd?
+    if (FileInfo.NumIncludes) return false;
+  } else {
+    // Otherwise, if this is a #include of a file that was previously #import'd
+    // or if this is the second #include of a #pragma once file, ignore it.
+    if (FileInfo.isImport)
+      return false;
+  }
+  
+  // Next, check to see if the file is wrapped with #ifndef guards.  If so, and
+  // if the macro that guards it is defined, we know the #include has no effect.
+  if (FileInfo.ControllingMacro &&
+      FileInfo.ControllingMacro->hasMacroDefinition()) {
+    ++NumMultiIncludeFileOptzn;
+    return false;
+  }
+  
+  // Increment the number of times this file has been included.
+  ++FileInfo.NumIncludes;
+  
+  return true;
+}
+
+
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
new file mode 100644
index 00000000000..98bbb386305
--- /dev/null
+++ b/clang/lib/Lex/Lexer.cpp
@@ -0,0 +1,1661 @@
+//===--- Lexer.cpp - C Language Family Lexer ------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements the Lexer and Token interfaces.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: GCC Diagnostics emitted by the lexer:
+// PEDWARN: (form feed|vertical tab) in preprocessing directive
+//
+// Universal characters, unicode, char mapping:
+// WARNING: `%.*s' is not in NFKC
+// WARNING: `%.*s' is not in NFC
+//
+// Other:
+// TODO: Options to support:
+//    -fexec-charset,-fwide-exec-charset
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/Lexer.h"
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/SourceManager.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <cctype>
+using namespace clang;
+
+static void InitCharacterInfo();
+
+//===----------------------------------------------------------------------===//
+// Token Class Implementation
+//===----------------------------------------------------------------------===//
+
+/// isObjCAtKeyword - Return true if we have an ObjC keyword identifier. 
+bool Token::isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const {
+  return is(tok::identifier) && 
+         getIdentifierInfo()->getObjCKeywordID() == objcKey;
+}
+
+/// getObjCKeywordID - Return the ObjC keyword kind.
+tok::ObjCKeywordKind Token::getObjCKeywordID() const {
+  IdentifierInfo *specId = getIdentifierInfo();
+  return specId ? specId->getObjCKeywordID() : tok::objc_not_keyword;
+}
+
+/// isNamedIdentifier - Return true if this token is a ppidentifier with the
+/// specified name.  For example, tok.isNamedIdentifier("this").
+bool Token::isNamedIdentifier(const char *Name) const {
+  return IdentInfo && !strcmp(IdentInfo->getName(), Name);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Lexer Class Implementation
+//===----------------------------------------------------------------------===//
+
+
+/// Lexer constructor - Create a new lexer object for the specified buffer
+/// with the specified preprocessor managing the lexing process.  This lexer
+/// assumes that the associated file buffer and Preprocessor objects will
+/// outlive it, so it doesn't take ownership of either of them.
+Lexer::Lexer(SourceLocation fileloc, Preprocessor &pp,
+             const char *BufStart, const char *BufEnd)
+  : FileLoc(fileloc), PP(&pp), Features(pp.getLangOptions()) {
+      
+  SourceManager &SourceMgr = PP->getSourceManager();
+  unsigned InputFileID = SourceMgr.getPhysicalLoc(FileLoc).getFileID();
+  const llvm::MemoryBuffer *InputFile = SourceMgr.getBuffer(InputFileID);
+      
+  Is_PragmaLexer = false;
+  InitCharacterInfo();
+  
+  // BufferStart must always be InputFile->getBufferStart().
+  BufferStart = InputFile->getBufferStart();
+  
+  // BufferPtr and BufferEnd can start out somewhere inside the current buffer.
+  // If unspecified, they starts at the start/end of the buffer.
+  BufferPtr = BufStart ? BufStart : BufferStart;
+  BufferEnd = BufEnd ? BufEnd : InputFile->getBufferEnd();
+
+  assert(BufferEnd[0] == 0 &&
+         "We assume that the input buffer has a null character at the end"
+         " to simplify lexing!");
+  
+  // Start of the file is a start of line.
+  IsAtStartOfLine = true;
+
+  // We are not after parsing a #.
+  ParsingPreprocessorDirective = false;
+
+  // We are not after parsing #include.
+  ParsingFilename = false;
+
+  // We are not in raw mode.  Raw mode disables diagnostics and interpretation
+  // of tokens (e.g. identifiers, thus disabling macro expansion).  It is used
+  // to quickly lex the tokens of the buffer, e.g. when handling a "#if 0" block
+  // or otherwise skipping over tokens.
+  LexingRawMode = false;
+  
+  // Default to keeping comments if requested.
+  KeepCommentMode = PP->getCommentRetentionState();
+}
+
+/// Lexer constructor - Create a new raw lexer object.  This object is only
+/// suitable for calls to 'LexRawToken'.  This lexer assumes that the
+/// associated file buffer will outlive it, so it doesn't take ownership of
+/// either of them.
+Lexer::Lexer(SourceLocation fileloc, const LangOptions &features,
+             const char *BufStart, const char *BufEnd)
+  : FileLoc(fileloc), PP(0), Features(features) {
+  Is_PragmaLexer = false;
+  InitCharacterInfo();
+  
+  BufferStart = BufStart;
+  BufferPtr = BufStart;
+  BufferEnd = BufEnd;
+  
+  assert(BufferEnd[0] == 0 &&
+         "We assume that the input buffer has a null character at the end"
+         " to simplify lexing!");
+  
+  // Start of the file is a start of line.
+  IsAtStartOfLine = true;
+  
+  // We are not after parsing a #.
+  ParsingPreprocessorDirective = false;
+  
+  // We are not after parsing #include.
+  ParsingFilename = false;
+  
+  // We *are* in raw mode.
+  LexingRawMode = true;
+  
+  // Never keep comments in raw mode.
+  KeepCommentMode = false;
+}
+
+
+/// Stringify - Convert the specified string into a C string, with surrounding
+/// ""'s, and with escaped \ and " characters.
+std::string Lexer::Stringify(const std::string &Str, bool Charify) {
+  std::string Result = Str;
+  char Quote = Charify ? '\'' : '"';
+  for (unsigned i = 0, e = Result.size(); i != e; ++i) {
+    if (Result[i] == '\\' || Result[i] == Quote) {
+      Result.insert(Result.begin()+i, '\\');
+      ++i; ++e;
+    }
+  }
+  return Result;
+}
+
+/// Stringify - Convert the specified string into a C string by escaping '\'
+/// and " characters.  This does not add surrounding ""'s to the string.
+void Lexer::Stringify(llvm::SmallVectorImpl<char> &Str) {
+  for (unsigned i = 0, e = Str.size(); i != e; ++i) {
+    if (Str[i] == '\\' || Str[i] == '"') {
+      Str.insert(Str.begin()+i, '\\');
+      ++i; ++e;
+    }
+  }
+}
+
+
+/// MeasureTokenLength - Relex the token at the specified location and return
+/// its length in bytes in the input file.  If the token needs cleaning (e.g.
+/// includes a trigraph or an escaped newline) then this count includes bytes
+/// that are part of that.
+unsigned Lexer::MeasureTokenLength(SourceLocation Loc,
+                                   const SourceManager &SM) {
+  // If this comes from a macro expansion, we really do want the macro name, not
+  // the token this macro expanded to.
+  Loc = SM.getLogicalLoc(Loc);
+  
+  const char *StrData = SM.getCharacterData(Loc);
+  
+  // TODO: this could be special cased for common tokens like identifiers, ')',
+  // etc to make this faster, if it mattered.  Just look at StrData[0] to handle
+  // all obviously single-char tokens.  This could use 
+  // Lexer::isObviouslySimpleCharacter for example to handle identifiers or
+  // something.
+  
+  
+  const char *BufEnd = SM.getBufferData(Loc.getFileID()).second;
+  
+  // Create a langops struct and enable trigraphs.  This is sufficient for
+  // measuring tokens.
+  LangOptions LangOpts;
+  LangOpts.Trigraphs = true;
+  
+  // Create a lexer starting at the beginning of this token.
+  Lexer TheLexer(Loc, LangOpts, StrData, BufEnd);
+  Token TheTok;
+  TheLexer.LexRawToken(TheTok);
+  return TheTok.getLength();
+}
+
+//===----------------------------------------------------------------------===//
+// Character information.
+//===----------------------------------------------------------------------===//
+
+static unsigned char CharInfo[256];
+
+enum {
+  CHAR_HORZ_WS  = 0x01,  // ' ', '\t', '\f', '\v'.  Note, no '\0'
+  CHAR_VERT_WS  = 0x02,  // '\r', '\n'
+  CHAR_LETTER   = 0x04,  // a-z,A-Z
+  CHAR_NUMBER   = 0x08,  // 0-9
+  CHAR_UNDER    = 0x10,  // _
+  CHAR_PERIOD   = 0x20   // .
+};
+
+static void InitCharacterInfo() {
+  static bool isInited = false;
+  if (isInited) return;
+  isInited = true;
+  
+  // Intiialize the CharInfo table.
+  // TODO: statically initialize this.
+  CharInfo[(int)' '] = CharInfo[(int)'\t'] = 
+  CharInfo[(int)'\f'] = CharInfo[(int)'\v'] = CHAR_HORZ_WS;
+  CharInfo[(int)'\n'] = CharInfo[(int)'\r'] = CHAR_VERT_WS;
+  
+  CharInfo[(int)'_'] = CHAR_UNDER;
+  CharInfo[(int)'.'] = CHAR_PERIOD;
+  for (unsigned i = 'a'; i <= 'z'; ++i)
+    CharInfo[i] = CharInfo[i+'A'-'a'] = CHAR_LETTER;
+  for (unsigned i = '0'; i <= '9'; ++i)
+    CharInfo[i] = CHAR_NUMBER;
+}
+
+/// isIdentifierBody - Return true if this is the body character of an
+/// identifier, which is [a-zA-Z0-9_].
+static inline bool isIdentifierBody(unsigned char c) {
+  return (CharInfo[c] & (CHAR_LETTER|CHAR_NUMBER|CHAR_UNDER)) ? true : false;
+}
+
+/// isHorizontalWhitespace - Return true if this character is horizontal
+/// whitespace: ' ', '\t', '\f', '\v'.  Note that this returns false for '\0'.
+static inline bool isHorizontalWhitespace(unsigned char c) {
+  return (CharInfo[c] & CHAR_HORZ_WS) ? true : false;
+}
+
+/// isWhitespace - Return true if this character is horizontal or vertical
+/// whitespace: ' ', '\t', '\f', '\v', '\n', '\r'.  Note that this returns false
+/// for '\0'.
+static inline bool isWhitespace(unsigned char c) {
+  return (CharInfo[c] & (CHAR_HORZ_WS|CHAR_VERT_WS)) ? true : false;
+}
+
+/// isNumberBody - Return true if this is the body character of an
+/// preprocessing number, which is [a-zA-Z0-9_.].
+static inline bool isNumberBody(unsigned char c) {
+  return (CharInfo[c] & (CHAR_LETTER|CHAR_NUMBER|CHAR_UNDER|CHAR_PERIOD)) ? 
+    true : false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Diagnostics forwarding code.
+//===----------------------------------------------------------------------===//
+
+/// GetMappedTokenLoc - If lexing out of a 'mapped buffer', where we pretend the
+/// lexer buffer was all instantiated at a single point, perform the mapping.
+/// This is currently only used for _Pragma implementation, so it is the slow
+/// path of the hot getSourceLocation method.  Do not allow it to be inlined.
+static SourceLocation GetMappedTokenLoc(Preprocessor &PP,
+                                        SourceLocation FileLoc,
+                                        unsigned CharNo) DISABLE_INLINE;
+static SourceLocation GetMappedTokenLoc(Preprocessor &PP,
+                                        SourceLocation FileLoc,
+                                        unsigned CharNo) {
+  // Otherwise, we're lexing "mapped tokens".  This is used for things like
+  // _Pragma handling.  Combine the instantiation location of FileLoc with the
+  // physical location.
+  SourceManager &SourceMgr = PP.getSourceManager();
+  
+  // Create a new SLoc which is expanded from logical(FileLoc) but whose
+  // characters come from phys(FileLoc)+Offset.
+  SourceLocation VirtLoc = SourceMgr.getLogicalLoc(FileLoc);
+  SourceLocation PhysLoc = SourceMgr.getPhysicalLoc(FileLoc);
+  PhysLoc = SourceLocation::getFileLoc(PhysLoc.getFileID(), CharNo);
+  return SourceMgr.getInstantiationLoc(PhysLoc, VirtLoc);
+}
+
+/// getSourceLocation - Return a source location identifier for the specified
+/// offset in the current file.
+SourceLocation Lexer::getSourceLocation(const char *Loc) const {
+  assert(Loc >= BufferStart && Loc <= BufferEnd &&
+         "Location out of range for this buffer!");
+
+  // In the normal case, we're just lexing from a simple file buffer, return
+  // the file id from FileLoc with the offset specified.
+  unsigned CharNo = Loc-BufferStart;
+  if (FileLoc.isFileID())
+    return SourceLocation::getFileLoc(FileLoc.getFileID(), CharNo);
+  
+  assert(PP && "This doesn't work on raw lexers");
+  return GetMappedTokenLoc(*PP, FileLoc, CharNo);
+}
+
+/// Diag - Forwarding function for diagnostics.  This translate a source
+/// position in the current buffer into a SourceLocation object for rendering.
+void Lexer::Diag(const char *Loc, unsigned DiagID,
+                 const std::string &Msg) const {
+  if (LexingRawMode && Diagnostic::isBuiltinNoteWarningOrExtension(DiagID))
+    return;
+  PP->Diag(getSourceLocation(Loc), DiagID, Msg);
+}
+void Lexer::Diag(SourceLocation Loc, unsigned DiagID,
+                 const std::string &Msg) const {
+  if (LexingRawMode && Diagnostic::isBuiltinNoteWarningOrExtension(DiagID))
+    return;
+  PP->Diag(Loc, DiagID, Msg);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Trigraph and Escaped Newline Handling Code.
+//===----------------------------------------------------------------------===//
+
+/// GetTrigraphCharForLetter - Given a character that occurs after a ?? pair,
+/// return the decoded trigraph letter it corresponds to, or '\0' if nothing.
+static char GetTrigraphCharForLetter(char Letter) {
+  switch (Letter) {
+  default:   return 0;
+  case '=':  return '#';
+  case ')':  return ']';
+  case '(':  return '[';
+  case '!':  return '|';
+  case '\'': return '^';
+  case '>':  return '}';
+  case '/':  return '\\';
+  case '<':  return '{';
+  case '-':  return '~';
+  }
+}
+
+/// DecodeTrigraphChar - If the specified character is a legal trigraph when
+/// prefixed with ??, emit a trigraph warning.  If trigraphs are enabled,
+/// return the result character.  Finally, emit a warning about trigraph use
+/// whether trigraphs are enabled or not.
+static char DecodeTrigraphChar(const char *CP, Lexer *L) {
+  char Res = GetTrigraphCharForLetter(*CP);
+  if (Res && L) {
+    if (!L->getFeatures().Trigraphs) {
+      L->Diag(CP-2, diag::trigraph_ignored);
+      return 0;
+    } else {
+      L->Diag(CP-2, diag::trigraph_converted, std::string()+Res);
+    }
+  }
+  return Res;
+}
+
+/// getCharAndSizeSlow - Peek a single 'character' from the specified buffer,
+/// get its size, and return it.  This is tricky in several cases:
+///   1. If currently at the start of a trigraph, we warn about the trigraph,
+///      then either return the trigraph (skipping 3 chars) or the '?',
+///      depending on whether trigraphs are enabled or not.
+///   2. If this is an escaped newline (potentially with whitespace between
+///      the backslash and newline), implicitly skip the newline and return
+///      the char after it.
+///   3. If this is a UCN, return it.  FIXME: C++ UCN's?
+///
+/// This handles the slow/uncommon case of the getCharAndSize method.  Here we
+/// know that we can accumulate into Size, and that we have already incremented
+/// Ptr by Size bytes.
+///
+/// NOTE: When this method is updated, getCharAndSizeSlowNoWarn (below) should
+/// be updated to match.
+///
+char Lexer::getCharAndSizeSlow(const char *Ptr, unsigned &Size,
+                               Token *Tok) {
+  // If we have a slash, look for an escaped newline.
+  if (Ptr[0] == '\\') {
+    ++Size;
+    ++Ptr;
+Slash:
+    // Common case, backslash-char where the char is not whitespace.
+    if (!isWhitespace(Ptr[0])) return '\\';
+    
+    // See if we have optional whitespace characters followed by a newline.
+    {
+      unsigned SizeTmp = 0;
+      do {
+        ++SizeTmp;
+        if (Ptr[SizeTmp-1] == '\n' || Ptr[SizeTmp-1] == '\r') {
+          // Remember that this token needs to be cleaned.
+          if (Tok) Tok->setFlag(Token::NeedsCleaning);
+
+          // Warn if there was whitespace between the backslash and newline.
+          if (SizeTmp != 1 && Tok)
+            Diag(Ptr, diag::backslash_newline_space);
+          
+          // If this is a \r\n or \n\r, skip the newlines.
+          if ((Ptr[SizeTmp] == '\r' || Ptr[SizeTmp] == '\n') &&
+              Ptr[SizeTmp-1] != Ptr[SizeTmp])
+            ++SizeTmp;
+          
+          // Found backslash<whitespace><newline>.  Parse the char after it.
+          Size += SizeTmp;
+          Ptr  += SizeTmp;
+          // Use slow version to accumulate a correct size field.
+          return getCharAndSizeSlow(Ptr, Size, Tok);
+        }
+      } while (isWhitespace(Ptr[SizeTmp]));
+    }
+      
+    // Otherwise, this is not an escaped newline, just return the slash.
+    return '\\';
+  }
+  
+  // If this is a trigraph, process it.
+  if (Ptr[0] == '?' && Ptr[1] == '?') {
+    // If this is actually a legal trigraph (not something like "??x"), emit
+    // a trigraph warning.  If so, and if trigraphs are enabled, return it.
+    if (char C = DecodeTrigraphChar(Ptr+2, Tok ? this : 0)) {
+      // Remember that this token needs to be cleaned.
+      if (Tok) Tok->setFlag(Token::NeedsCleaning);
+
+      Ptr += 3;
+      Size += 3;
+      if (C == '\\') goto Slash;
+      return C;
+    }
+  }
+  
+  // If this is neither, return a single character.
+  ++Size;
+  return *Ptr;
+}
+
+
+/// getCharAndSizeSlowNoWarn - Handle the slow/uncommon case of the
+/// getCharAndSizeNoWarn method.  Here we know that we can accumulate into Size,
+/// and that we have already incremented Ptr by Size bytes.
+///
+/// NOTE: When this method is updated, getCharAndSizeSlow (above) should
+/// be updated to match.
+char Lexer::getCharAndSizeSlowNoWarn(const char *Ptr, unsigned &Size,
+                                     const LangOptions &Features) {
+  // If we have a slash, look for an escaped newline.
+  if (Ptr[0] == '\\') {
+    ++Size;
+    ++Ptr;
+Slash:
+    // Common case, backslash-char where the char is not whitespace.
+    if (!isWhitespace(Ptr[0])) return '\\';
+    
+    // See if we have optional whitespace characters followed by a newline.
+    {
+      unsigned SizeTmp = 0;
+      do {
+        ++SizeTmp;
+        if (Ptr[SizeTmp-1] == '\n' || Ptr[SizeTmp-1] == '\r') {
+          
+          // If this is a \r\n or \n\r, skip the newlines.
+          if ((Ptr[SizeTmp] == '\r' || Ptr[SizeTmp] == '\n') &&
+              Ptr[SizeTmp-1] != Ptr[SizeTmp])
+            ++SizeTmp;
+          
+          // Found backslash<whitespace><newline>.  Parse the char after it.
+          Size += SizeTmp;
+          Ptr  += SizeTmp;
+          
+          // Use slow version to accumulate a correct size field.
+          return getCharAndSizeSlowNoWarn(Ptr, Size, Features);
+        }
+      } while (isWhitespace(Ptr[SizeTmp]));
+    }
+    
+    // Otherwise, this is not an escaped newline, just return the slash.
+    return '\\';
+  }
+  
+  // If this is a trigraph, process it.
+  if (Features.Trigraphs && Ptr[0] == '?' && Ptr[1] == '?') {
+    // If this is actually a legal trigraph (not something like "??x"), return
+    // it.
+    if (char C = GetTrigraphCharForLetter(Ptr[2])) {
+      Ptr += 3;
+      Size += 3;
+      if (C == '\\') goto Slash;
+      return C;
+    }
+  }
+  
+  // If this is neither, return a single character.
+  ++Size;
+  return *Ptr;
+}
+
+//===----------------------------------------------------------------------===//
+// Helper methods for lexing.
+//===----------------------------------------------------------------------===//
+
+void Lexer::LexIdentifier(Token &Result, const char *CurPtr) {
+  // Match [_A-Za-z0-9]*, we have already matched [_A-Za-z$]
+  unsigned Size;
+  unsigned char C = *CurPtr++;
+  while (isIdentifierBody(C)) {
+    C = *CurPtr++;
+  }
+  --CurPtr;   // Back up over the skipped character.
+
+  // Fast path, no $,\,? in identifier found.  '\' might be an escaped newline
+  // or UCN, and ? might be a trigraph for '\', an escaped newline or UCN.
+  // FIXME: UCNs.
+  if (C != '\\' && C != '?' && (C != '$' || !Features.DollarIdents)) {
+FinishIdentifier:
+    const char *IdStart = BufferPtr;
+    FormTokenWithChars(Result, CurPtr);
+    Result.setKind(tok::identifier);
+    
+    // If we are in raw mode, return this identifier raw.  There is no need to
+    // look up identifier information or attempt to macro expand it.
+    if (LexingRawMode) return;
+    
+    // Fill in Result.IdentifierInfo, looking up the identifier in the
+    // identifier table.
+    PP->LookUpIdentifierInfo(Result, IdStart);
+    
+    // Finally, now that we know we have an identifier, pass this off to the
+    // preprocessor, which may macro expand it or something.
+    return PP->HandleIdentifier(Result);
+  }
+  
+  // Otherwise, $,\,? in identifier found.  Enter slower path.
+  
+  C = getCharAndSize(CurPtr, Size);
+  while (1) {
+    if (C == '$') {
+      // If we hit a $ and they are not supported in identifiers, we are done.
+      if (!Features.DollarIdents) goto FinishIdentifier;
+      
+      // Otherwise, emit a diagnostic and continue.
+      Diag(CurPtr, diag::ext_dollar_in_identifier);
+      CurPtr = ConsumeChar(CurPtr, Size, Result);
+      C = getCharAndSize(CurPtr, Size);
+      continue;
+    } else if (!isIdentifierBody(C)) { // FIXME: UCNs.
+      // Found end of identifier.
+      goto FinishIdentifier;
+    }
+
+    // Otherwise, this character is good, consume it.
+    CurPtr = ConsumeChar(CurPtr, Size, Result);
+
+    C = getCharAndSize(CurPtr, Size);
+    while (isIdentifierBody(C)) { // FIXME: UCNs.
+      CurPtr = ConsumeChar(CurPtr, Size, Result);
+      C = getCharAndSize(CurPtr, Size);
+    }
+  }
+}
+
+
+/// LexNumericConstant - Lex the remainer of a integer or floating point
+/// constant. From[-1] is the first character lexed.  Return the end of the
+/// constant.
+void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) {
+  unsigned Size;
+  char C = getCharAndSize(CurPtr, Size);
+  char PrevCh = 0;
+  while (isNumberBody(C)) { // FIXME: UCNs?
+    CurPtr = ConsumeChar(CurPtr, Size, Result);
+    PrevCh = C;
+    C = getCharAndSize(CurPtr, Size);
+  }
+  
+  // If we fell out, check for a sign, due to 1e+12.  If we have one, continue.
+  if ((C == '-' || C == '+') && (PrevCh == 'E' || PrevCh == 'e'))
+    return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result));
+
+  // If we have a hex FP constant, continue.
+  if (Features.HexFloats &&
+      (C == '-' || C == '+') && (PrevCh == 'P' || PrevCh == 'p'))
+    return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result));
+  
+  Result.setKind(tok::numeric_constant);
+
+  // Update the location of token as well as BufferPtr.
+  FormTokenWithChars(Result, CurPtr);
+}
+
+/// LexStringLiteral - Lex the remainder of a string literal, after having lexed
+/// either " or L".
+void Lexer::LexStringLiteral(Token &Result, const char *CurPtr, bool Wide){
+  const char *NulCharacter = 0; // Does this string contain the \0 character?
+  
+  char C = getAndAdvanceChar(CurPtr, Result);
+  while (C != '"') {
+    // Skip escaped characters.
+    if (C == '\\') {
+      // Skip the escaped character.
+      C = getAndAdvanceChar(CurPtr, Result);
+    } else if (C == '\n' || C == '\r' ||             // Newline.
+               (C == 0 && CurPtr-1 == BufferEnd)) {  // End of file.
+      if (!LexingRawMode) Diag(BufferPtr, diag::err_unterminated_string);
+      Result.setKind(tok::unknown);
+      FormTokenWithChars(Result, CurPtr-1);
+      return;
+    } else if (C == 0) {
+      NulCharacter = CurPtr-1;
+    }
+    C = getAndAdvanceChar(CurPtr, Result);
+  }
+  
+  // If a nul character existed in the string, warn about it.
+  if (NulCharacter) Diag(NulCharacter, diag::null_in_string);
+
+  Result.setKind(Wide ? tok::wide_string_literal : tok::string_literal);
+
+  // Update the location of the token as well as the BufferPtr instance var.
+  FormTokenWithChars(Result, CurPtr);
+}
+
+/// LexAngledStringLiteral - Lex the remainder of an angled string literal,
+/// after having lexed the '<' character.  This is used for #include filenames.
+void Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) {
+  const char *NulCharacter = 0; // Does this string contain the \0 character?
+  
+  char C = getAndAdvanceChar(CurPtr, Result);
+  while (C != '>') {
+    // Skip escaped characters.
+    if (C == '\\') {
+      // Skip the escaped character.
+      C = getAndAdvanceChar(CurPtr, Result);
+    } else if (C == '\n' || C == '\r' ||             // Newline.
+               (C == 0 && CurPtr-1 == BufferEnd)) {  // End of file.
+      if (!LexingRawMode) Diag(BufferPtr, diag::err_unterminated_string);
+      Result.setKind(tok::unknown);
+      FormTokenWithChars(Result, CurPtr-1);
+      return;
+    } else if (C == 0) {
+      NulCharacter = CurPtr-1;
+    }
+    C = getAndAdvanceChar(CurPtr, Result);
+  }
+  
+  // If a nul character existed in the string, warn about it.
+  if (NulCharacter) Diag(NulCharacter, diag::null_in_string);
+  
+  Result.setKind(tok::angle_string_literal);
+  
+  // Update the location of token as well as BufferPtr.
+  FormTokenWithChars(Result, CurPtr);
+}
+
+
+/// LexCharConstant - Lex the remainder of a character constant, after having
+/// lexed either ' or L'.
+void Lexer::LexCharConstant(Token &Result, const char *CurPtr) {
+  const char *NulCharacter = 0; // Does this character contain the \0 character?
+
+  // Handle the common case of 'x' and '\y' efficiently.
+  char C = getAndAdvanceChar(CurPtr, Result);
+  if (C == '\'') {
+    if (!LexingRawMode) Diag(BufferPtr, diag::err_empty_character);
+    Result.setKind(tok::unknown);
+    FormTokenWithChars(Result, CurPtr);
+    return;
+  } else if (C == '\\') {
+    // Skip the escaped character.
+    // FIXME: UCN's.
+    C = getAndAdvanceChar(CurPtr, Result);
+  }
+  
+  if (C && C != '\n' && C != '\r' && CurPtr[0] == '\'') {
+    ++CurPtr;
+  } else {
+    // Fall back on generic code for embedded nulls, newlines, wide chars.
+    do {
+      // Skip escaped characters.
+      if (C == '\\') {
+        // Skip the escaped character.
+        C = getAndAdvanceChar(CurPtr, Result);
+      } else if (C == '\n' || C == '\r' ||               // Newline.
+                 (C == 0 && CurPtr-1 == BufferEnd)) {    // End of file.
+        if (!LexingRawMode) Diag(BufferPtr, diag::err_unterminated_char);
+        Result.setKind(tok::unknown);
+        FormTokenWithChars(Result, CurPtr-1);
+        return;
+      } else if (C == 0) {
+        NulCharacter = CurPtr-1;
+      }
+      C = getAndAdvanceChar(CurPtr, Result);
+    } while (C != '\'');
+  }
+  
+  if (NulCharacter) Diag(NulCharacter, diag::null_in_char);
+
+  Result.setKind(tok::char_constant);
+  
+  // Update the location of token as well as BufferPtr.
+  FormTokenWithChars(Result, CurPtr);
+}
+
+/// SkipWhitespace - Efficiently skip over a series of whitespace characters.
+/// Update BufferPtr to point to the next non-whitespace character and return.
+void Lexer::SkipWhitespace(Token &Result, const char *CurPtr) {
+  // Whitespace - Skip it, then return the token after the whitespace.
+  unsigned char Char = *CurPtr;  // Skip consequtive spaces efficiently.
+  while (1) {
+    // Skip horizontal whitespace very aggressively.
+    while (isHorizontalWhitespace(Char))
+      Char = *++CurPtr;
+    
+    // Otherwise if we something other than whitespace, we're done.
+    if (Char != '\n' && Char != '\r')
+      break;
+    
+    if (ParsingPreprocessorDirective) {
+      // End of preprocessor directive line, let LexTokenInternal handle this.
+      BufferPtr = CurPtr;
+      return;
+    }
+    
+    // ok, but handle newline.
+    // The returned token is at the start of the line.
+    Result.setFlag(Token::StartOfLine);
+    // No leading whitespace seen so far.
+    Result.clearFlag(Token::LeadingSpace);
+    Char = *++CurPtr;
+  }
+
+  // If this isn't immediately after a newline, there is leading space.
+  char PrevChar = CurPtr[-1];
+  if (PrevChar != '\n' && PrevChar != '\r')
+    Result.setFlag(Token::LeadingSpace);
+
+  BufferPtr = CurPtr;
+}
+
+// SkipBCPLComment - We have just read the // characters from input.  Skip until
+// we find the newline character thats terminate the comment.  Then update
+/// BufferPtr and return.
+bool Lexer::SkipBCPLComment(Token &Result, const char *CurPtr) {
+  // If BCPL comments aren't explicitly enabled for this language, emit an
+  // extension warning.
+  if (!Features.BCPLComment) {
+    Diag(BufferPtr, diag::ext_bcpl_comment);
+    
+    // Mark them enabled so we only emit one warning for this translation
+    // unit.
+    Features.BCPLComment = true;
+  }
+  
+  // Scan over the body of the comment.  The common case, when scanning, is that
+  // the comment contains normal ascii characters with nothing interesting in
+  // them.  As such, optimize for this case with the inner loop.
+  char C;
+  do {
+    C = *CurPtr;
+    // FIXME: Speedup BCPL comment lexing.  Just scan for a \n or \r character.
+    // If we find a \n character, scan backwards, checking to see if it's an
+    // escaped newline, like we do for block comments.
+    
+    // Skip over characters in the fast loop.
+    while (C != 0 &&                // Potentially EOF.
+           C != '\\' &&             // Potentially escaped newline.
+           C != '?' &&              // Potentially trigraph.
+           C != '\n' && C != '\r')  // Newline or DOS-style newline.
+      C = *++CurPtr;
+
+    // If this is a newline, we're done.
+    if (C == '\n' || C == '\r')
+      break;  // Found the newline? Break out!
+    
+    // Otherwise, this is a hard case.  Fall back on getAndAdvanceChar to
+    // properly decode the character.
+    const char *OldPtr = CurPtr;
+    C = getAndAdvanceChar(CurPtr, Result);
+    
+    // If we read multiple characters, and one of those characters was a \r or
+    // \n, then we had an escaped newline within the comment.  Emit diagnostic
+    // unless the next line is also a // comment.
+    if (CurPtr != OldPtr+1 && C != '/' && CurPtr[0] != '/') {
+      for (; OldPtr != CurPtr; ++OldPtr)
+        if (OldPtr[0] == '\n' || OldPtr[0] == '\r') {
+          // Okay, we found a // comment that ends in a newline, if the next
+          // line is also a // comment, but has spaces, don't emit a diagnostic.
+          if (isspace(C)) {
+            const char *ForwardPtr = CurPtr;
+            while (isspace(*ForwardPtr))  // Skip whitespace.
+              ++ForwardPtr;
+            if (ForwardPtr[0] == '/' && ForwardPtr[1] == '/')
+              break;
+          }
+          
+          Diag(OldPtr-1, diag::ext_multi_line_bcpl_comment);
+          break;
+        }
+    }
+    
+    if (CurPtr == BufferEnd+1) { --CurPtr; break; }
+  } while (C != '\n' && C != '\r');
+
+  // Found but did not consume the newline.
+    
+  // If we are returning comments as tokens, return this comment as a token.
+  if (KeepCommentMode)
+    return SaveBCPLComment(Result, CurPtr);
+
+  // If we are inside a preprocessor directive and we see the end of line,
+  // return immediately, so that the lexer can return this as an EOM token.
+  if (ParsingPreprocessorDirective || CurPtr == BufferEnd) {
+    BufferPtr = CurPtr;
+    return true;
+  }
+  
+  // Otherwise, eat the \n character.  We don't care if this is a \n\r or
+  // \r\n sequence.
+  ++CurPtr;
+    
+  // The next returned token is at the start of the line.
+  Result.setFlag(Token::StartOfLine);
+  // No leading whitespace seen so far.
+  Result.clearFlag(Token::LeadingSpace);
+  BufferPtr = CurPtr;
+  return true;
+}
+
+/// SaveBCPLComment - If in save-comment mode, package up this BCPL comment in
+/// an appropriate way and return it.
+bool Lexer::SaveBCPLComment(Token &Result, const char *CurPtr) {
+  Result.setKind(tok::comment);
+  FormTokenWithChars(Result, CurPtr);
+  
+  // If this BCPL-style comment is in a macro definition, transmogrify it into
+  // a C-style block comment.
+  if (ParsingPreprocessorDirective) {
+    std::string Spelling = PP->getSpelling(Result);
+    assert(Spelling[0] == '/' && Spelling[1] == '/' && "Not bcpl comment?");
+    Spelling[1] = '*';   // Change prefix to "/*".
+    Spelling += "*/";    // add suffix.
+    
+    Result.setLocation(PP->CreateString(&Spelling[0], Spelling.size(),
+                                        Result.getLocation()));
+    Result.setLength(Spelling.size());
+  }
+  return false;
+}
+
+/// isBlockCommentEndOfEscapedNewLine - Return true if the specified newline
+/// character (either \n or \r) is part of an escaped newline sequence.  Issue a
+/// diagnostic if so.  We know that the is inside of a block comment.
+static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, 
+                                                  Lexer *L) {
+  assert(CurPtr[0] == '\n' || CurPtr[0] == '\r');
+  
+  // Back up off the newline.
+  --CurPtr;
+  
+  // If this is a two-character newline sequence, skip the other character.
+  if (CurPtr[0] == '\n' || CurPtr[0] == '\r') {
+    // \n\n or \r\r -> not escaped newline.
+    if (CurPtr[0] == CurPtr[1])
+      return false;
+    // \n\r or \r\n -> skip the newline.
+    --CurPtr;
+  }
+  
+  // If we have horizontal whitespace, skip over it.  We allow whitespace
+  // between the slash and newline.
+  bool HasSpace = false;
+  while (isHorizontalWhitespace(*CurPtr) || *CurPtr == 0) {
+    --CurPtr;
+    HasSpace = true;
+  }
+  
+  // If we have a slash, we know this is an escaped newline.
+  if (*CurPtr == '\\') {
+    if (CurPtr[-1] != '*') return false;
+  } else {
+    // It isn't a slash, is it the ?? / trigraph?
+    if (CurPtr[0] != '/' || CurPtr[-1] != '?' || CurPtr[-2] != '?' ||
+        CurPtr[-3] != '*')
+      return false;
+    
+    // This is the trigraph ending the comment.  Emit a stern warning!
+    CurPtr -= 2;
+
+    // If no trigraphs are enabled, warn that we ignored this trigraph and
+    // ignore this * character.
+    if (!L->getFeatures().Trigraphs) {
+      L->Diag(CurPtr, diag::trigraph_ignored_block_comment);
+      return false;
+    }
+    L->Diag(CurPtr, diag::trigraph_ends_block_comment);
+  }
+  
+  // Warn about having an escaped newline between the */ characters.
+  L->Diag(CurPtr, diag::escaped_newline_block_comment_end);
+  
+  // If there was space between the backslash and newline, warn about it.
+  if (HasSpace) L->Diag(CurPtr, diag::backslash_newline_space);
+  
+  return true;
+}
+
+#ifdef __SSE2__
+#include <emmintrin.h>
+#elif __ALTIVEC__
+#include <altivec.h>
+#undef bool
+#endif
+
+/// SkipBlockComment - We have just read the /* characters from input.  Read
+/// until we find the */ characters that terminate the comment.  Note that we
+/// don't bother decoding trigraphs or escaped newlines in block comments,
+/// because they cannot cause the comment to end.  The only thing that can
+/// happen is the comment could end with an escaped newline between the */ end
+/// of comment.
+bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) {
+  // Scan one character past where we should, looking for a '/' character.  Once
+  // we find it, check to see if it was preceeded by a *.  This common
+  // optimization helps people who like to put a lot of * characters in their
+  // comments.
+
+  // The first character we get with newlines and trigraphs skipped to handle
+  // the degenerate /*/ case below correctly if the * has an escaped newline
+  // after it.
+  unsigned CharSize;
+  unsigned char C = getCharAndSize(CurPtr, CharSize);
+  CurPtr += CharSize;
+  if (C == 0 && CurPtr == BufferEnd+1) {
+    Diag(BufferPtr, diag::err_unterminated_block_comment);
+    BufferPtr = CurPtr-1;
+    return true;
+  }
+  
+  // Check to see if the first character after the '/*' is another /.  If so,
+  // then this slash does not end the block comment, it is part of it.
+  if (C == '/')
+    C = *CurPtr++;
+  
+  while (1) {
+    // Skip over all non-interesting characters until we find end of buffer or a
+    // (probably ending) '/' character.
+    if (CurPtr + 24 < BufferEnd) {
+      // While not aligned to a 16-byte boundary.
+      while (C != '/' && ((intptr_t)CurPtr & 0x0F) != 0)
+        C = *CurPtr++;
+      
+      if (C == '/') goto FoundSlash;
+
+#ifdef __SSE2__
+      __m128i Slashes = _mm_set_epi8('/', '/', '/', '/', '/', '/', '/', '/',
+                                     '/', '/', '/', '/', '/', '/', '/', '/');
+      while (CurPtr+16 <= BufferEnd &&
+             _mm_movemask_epi8(_mm_cmpeq_epi8(*(__m128i*)CurPtr, Slashes)) == 0)
+        CurPtr += 16;
+#elif __ALTIVEC__
+      __vector unsigned char Slashes = {
+        '/', '/', '/', '/',  '/', '/', '/', '/', 
+        '/', '/', '/', '/',  '/', '/', '/', '/'
+      };
+      while (CurPtr+16 <= BufferEnd &&
+             !vec_any_eq(*(vector unsigned char*)CurPtr, Slashes))
+        CurPtr += 16;
+#else    
+      // Scan for '/' quickly.  Many block comments are very large.
+      while (CurPtr[0] != '/' &&
+             CurPtr[1] != '/' &&
+             CurPtr[2] != '/' &&
+             CurPtr[3] != '/' &&
+             CurPtr+4 < BufferEnd) {
+        CurPtr += 4;
+      }
+#endif
+      
+      // It has to be one of the bytes scanned, increment to it and read one.
+      C = *CurPtr++;
+    }
+    
+    // Loop to scan the remainder.
+    while (C != '/' && C != '\0')
+      C = *CurPtr++;
+    
+  FoundSlash:
+    if (C == '/') {
+      if (CurPtr[-2] == '*')  // We found the final */.  We're done!
+        break;
+      
+      if ((CurPtr[-2] == '\n' || CurPtr[-2] == '\r')) {
+        if (isEndOfBlockCommentWithEscapedNewLine(CurPtr-2, this)) {
+          // We found the final */, though it had an escaped newline between the
+          // * and /.  We're done!
+          break;
+        }
+      }
+      if (CurPtr[0] == '*' && CurPtr[1] != '/') {
+        // If this is a /* inside of the comment, emit a warning.  Don't do this
+        // if this is a /*/, which will end the comment.  This misses cases with
+        // embedded escaped newlines, but oh well.
+        Diag(CurPtr-1, diag::nested_block_comment);
+      }
+    } else if (C == 0 && CurPtr == BufferEnd+1) {
+      Diag(BufferPtr, diag::err_unterminated_block_comment);
+      // Note: the user probably forgot a */.  We could continue immediately
+      // after the /*, but this would involve lexing a lot of what really is the
+      // comment, which surely would confuse the parser.
+      BufferPtr = CurPtr-1;
+      return true;
+    }
+    C = *CurPtr++;
+  }
+  
+  // If we are returning comments as tokens, return this comment as a token.
+  if (KeepCommentMode) {
+    Result.setKind(tok::comment);
+    FormTokenWithChars(Result, CurPtr);
+    return false;
+  }
+
+  // It is common for the tokens immediately after a /**/ comment to be
+  // whitespace.  Instead of going through the big switch, handle it
+  // efficiently now.
+  if (isHorizontalWhitespace(*CurPtr)) {
+    Result.setFlag(Token::LeadingSpace);
+    SkipWhitespace(Result, CurPtr+1);
+    return true;
+  }
+
+  // Otherwise, just return so that the next character will be lexed as a token.
+  BufferPtr = CurPtr;
+  Result.setFlag(Token::LeadingSpace);
+  return true;
+}
+
+//===----------------------------------------------------------------------===//
+// Primary Lexing Entry Points
+//===----------------------------------------------------------------------===//
+
+/// LexIncludeFilename - After the preprocessor has parsed a #include, lex and
+/// (potentially) macro expand the filename.
+void Lexer::LexIncludeFilename(Token &FilenameTok) {
+  assert(ParsingPreprocessorDirective &&
+         ParsingFilename == false &&
+         "Must be in a preprocessing directive!");
+
+  // We are now parsing a filename!
+  ParsingFilename = true;
+  
+  // Lex the filename.
+  Lex(FilenameTok);
+
+  // We should have obtained the filename now.
+  ParsingFilename = false;
+  
+  // No filename?
+  if (FilenameTok.is(tok::eom))
+    Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename);
+}
+
+/// ReadToEndOfLine - Read the rest of the current preprocessor line as an
+/// uninterpreted string.  This switches the lexer out of directive mode.
+std::string Lexer::ReadToEndOfLine() {
+  assert(ParsingPreprocessorDirective && ParsingFilename == false &&
+         "Must be in a preprocessing directive!");
+  std::string Result;
+  Token Tmp;
+
+  // CurPtr - Cache BufferPtr in an automatic variable.
+  const char *CurPtr = BufferPtr;
+  while (1) {
+    char Char = getAndAdvanceChar(CurPtr, Tmp);
+    switch (Char) {
+    default:
+      Result += Char;
+      break;
+    case 0:  // Null.
+      // Found end of file?
+      if (CurPtr-1 != BufferEnd) {
+        // Nope, normal character, continue.
+        Result += Char;
+        break;
+      }
+      // FALL THROUGH.
+    case '\r':
+    case '\n':
+      // Okay, we found the end of the line. First, back up past the \0, \r, \n.
+      assert(CurPtr[-1] == Char && "Trigraphs for newline?");
+      BufferPtr = CurPtr-1;
+      
+      // Next, lex the character, which should handle the EOM transition.
+      Lex(Tmp);
+      assert(Tmp.is(tok::eom) && "Unexpected token!");
+      
+      // Finally, we're done, return the string we found.
+      return Result;
+    }
+  }
+}
+
+/// LexEndOfFile - CurPtr points to the end of this file.  Handle this
+/// condition, reporting diagnostics and handling other edge cases as required.
+/// This returns true if Result contains a token, false if PP.Lex should be
+/// called again.
+bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) {
+  // If we hit the end of the file while parsing a preprocessor directive,
+  // end the preprocessor directive first.  The next token returned will
+  // then be the end of file.
+  if (ParsingPreprocessorDirective) {
+    // Done parsing the "line".
+    ParsingPreprocessorDirective = false;
+    Result.setKind(tok::eom);
+    // Update the location of token as well as BufferPtr.
+    FormTokenWithChars(Result, CurPtr);
+    
+    // Restore comment saving mode, in case it was disabled for directive.
+    KeepCommentMode = PP->getCommentRetentionState();
+    return true;  // Have a token.
+  }        
+
+  // If we are in raw mode, return this event as an EOF token.  Let the caller
+  // that put us in raw mode handle the event.
+  if (LexingRawMode) {
+    Result.startToken();
+    BufferPtr = BufferEnd;
+    FormTokenWithChars(Result, BufferEnd);
+    Result.setKind(tok::eof);
+    return true;
+  }
+  
+  // Otherwise, issue diagnostics for unterminated #if and missing newline.
+
+  // If we are in a #if directive, emit an error.
+  while (!ConditionalStack.empty()) {
+    Diag(ConditionalStack.back().IfLoc, diag::err_pp_unterminated_conditional);
+    ConditionalStack.pop_back();
+  }
+  
+  // If the file was empty or didn't end in a newline, issue a pedwarn.
+  if (CurPtr[-1] != '\n' && CurPtr[-1] != '\r')
+    Diag(BufferEnd, diag::ext_no_newline_eof);
+  
+  BufferPtr = CurPtr;
+
+  // Finally, let the preprocessor handle this.
+  return PP->HandleEndOfFile(Result);
+}
+
+/// isNextPPTokenLParen - Return 1 if the next unexpanded token lexed from
+/// the specified lexer will return a tok::l_paren token, 0 if it is something
+/// else and 2 if there are no more tokens in the buffer controlled by the
+/// lexer.
+unsigned Lexer::isNextPPTokenLParen() {
+  assert(!LexingRawMode && "How can we expand a macro from a skipping buffer?");
+  
+  // Switch to 'skipping' mode.  This will ensure that we can lex a token
+  // without emitting diagnostics, disables macro expansion, and will cause EOF
+  // to return an EOF token instead of popping the include stack.
+  LexingRawMode = true;
+  
+  // Save state that can be changed while lexing so that we can restore it.
+  const char *TmpBufferPtr = BufferPtr;
+  
+  Token Tok;
+  Tok.startToken();
+  LexTokenInternal(Tok);
+  
+  // Restore state that may have changed.
+  BufferPtr = TmpBufferPtr;
+  
+  // Restore the lexer back to non-skipping mode.
+  LexingRawMode = false;
+  
+  if (Tok.is(tok::eof))
+    return 2;
+  return Tok.is(tok::l_paren);
+}
+
+
+/// LexTokenInternal - This implements a simple C family lexer.  It is an
+/// extremely performance critical piece of code.  This assumes that the buffer
+/// has a null character at the end of the file.  Return true if an error
+/// occurred and compilation should terminate, false if normal.  This returns a
+/// preprocessing token, not a normal token, as such, it is an internal
+/// interface.  It assumes that the Flags of result have been cleared before
+/// calling this.
+void Lexer::LexTokenInternal(Token &Result) {
+LexNextToken:
+  // New token, can't need cleaning yet.
+  Result.clearFlag(Token::NeedsCleaning);
+  Result.setIdentifierInfo(0);
+  
+  // CurPtr - Cache BufferPtr in an automatic variable.
+  const char *CurPtr = BufferPtr;
+
+  // Small amounts of horizontal whitespace is very common between tokens.
+  if ((*CurPtr == ' ') || (*CurPtr == '\t')) {
+    ++CurPtr;
+    while ((*CurPtr == ' ') || (*CurPtr == '\t'))
+      ++CurPtr;
+    BufferPtr = CurPtr;
+    Result.setFlag(Token::LeadingSpace);
+  }
+  
+  unsigned SizeTmp, SizeTmp2;   // Temporaries for use in cases below.
+  
+  // Read a character, advancing over it.
+  char Char = getAndAdvanceChar(CurPtr, Result);
+  switch (Char) {
+  case 0:  // Null.
+    // Found end of file?
+    if (CurPtr-1 == BufferEnd) {
+      // Read the PP instance variable into an automatic variable, because
+      // LexEndOfFile will often delete 'this'.
+      Preprocessor *PPCache = PP;
+      if (LexEndOfFile(Result, CurPtr-1))  // Retreat back into the file.
+        return;   // Got a token to return.
+      assert(PPCache && "Raw buffer::LexEndOfFile should return a token");
+      return PPCache->Lex(Result);
+    }
+    
+    Diag(CurPtr-1, diag::null_in_file);
+    Result.setFlag(Token::LeadingSpace);
+    SkipWhitespace(Result, CurPtr);
+    goto LexNextToken;   // GCC isn't tail call eliminating.
+  case '\n':
+  case '\r':
+    // If we are inside a preprocessor directive and we see the end of line,
+    // we know we are done with the directive, so return an EOM token.
+    if (ParsingPreprocessorDirective) {
+      // Done parsing the "line".
+      ParsingPreprocessorDirective = false;
+      
+      // Restore comment saving mode, in case it was disabled for directive.
+      KeepCommentMode = PP->getCommentRetentionState();
+      
+      // Since we consumed a newline, we are back at the start of a line.
+      IsAtStartOfLine = true;
+      
+      Result.setKind(tok::eom);
+      break;
+    }
+    // The returned token is at the start of the line.
+    Result.setFlag(Token::StartOfLine);
+    // No leading whitespace seen so far.
+    Result.clearFlag(Token::LeadingSpace);
+    SkipWhitespace(Result, CurPtr);
+    goto LexNextToken;   // GCC isn't tail call eliminating.
+  case ' ':
+  case '\t':
+  case '\f':
+  case '\v':
+  SkipHorizontalWhitespace:
+    Result.setFlag(Token::LeadingSpace);
+    SkipWhitespace(Result, CurPtr);
+
+  SkipIgnoredUnits:
+    CurPtr = BufferPtr;
+    
+    // If the next token is obviously a // or /* */ comment, skip it efficiently
+    // too (without going through the big switch stmt).
+    if (CurPtr[0] == '/' && CurPtr[1] == '/' && !KeepCommentMode) {
+      SkipBCPLComment(Result, CurPtr+2);
+      goto SkipIgnoredUnits;
+    } else if (CurPtr[0] == '/' && CurPtr[1] == '*' && !KeepCommentMode) {
+      SkipBlockComment(Result, CurPtr+2);
+      goto SkipIgnoredUnits;
+    } else if (isHorizontalWhitespace(*CurPtr)) {
+      goto SkipHorizontalWhitespace;
+    }
+    goto LexNextToken;   // GCC isn't tail call eliminating.
+
+  // C99 6.4.4.1: Integer Constants.
+  // C99 6.4.4.2: Floating Constants.
+  case '0': case '1': case '2': case '3': case '4':
+  case '5': case '6': case '7': case '8': case '9':
+    // Notify MIOpt that we read a non-whitespace/non-comment token.
+    MIOpt.ReadToken();
+    return LexNumericConstant(Result, CurPtr);
+    
+  case 'L':   // Identifier (Loony) or wide literal (L'x' or L"xyz").
+    // Notify MIOpt that we read a non-whitespace/non-comment token.
+    MIOpt.ReadToken();
+    Char = getCharAndSize(CurPtr, SizeTmp);
+
+    // Wide string literal.
+    if (Char == '"')
+      return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),
+                              true);
+
+    // Wide character constant.
+    if (Char == '\'')
+      return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result));
+    // FALL THROUGH, treating L like the start of an identifier.
+    
+  // C99 6.4.2: Identifiers.
+  case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
+  case 'H': case 'I': case 'J': case 'K':    /*'L'*/case 'M': case 'N':
+  case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
+  case 'V': case 'W': case 'X': case 'Y': case 'Z':
+  case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
+  case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
+  case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
+  case 'v': case 'w': case 'x': case 'y': case 'z':
+  case '_':
+    // Notify MIOpt that we read a non-whitespace/non-comment token.
+    MIOpt.ReadToken();
+    return LexIdentifier(Result, CurPtr);
+
+  case '$':   // $ in identifiers.
+    if (Features.DollarIdents) {
+      Diag(CurPtr-1, diag::ext_dollar_in_identifier);
+      // Notify MIOpt that we read a non-whitespace/non-comment token.
+      MIOpt.ReadToken();
+      return LexIdentifier(Result, CurPtr);
+    }
+    
+    Result.setKind(tok::unknown);
+    break;
+    
+  // C99 6.4.4: Character Constants.
+  case '\'':
+    // Notify MIOpt that we read a non-whitespace/non-comment token.
+    MIOpt.ReadToken();
+    return LexCharConstant(Result, CurPtr);
+
+  // C99 6.4.5: String Literals.
+  case '"':
+    // Notify MIOpt that we read a non-whitespace/non-comment token.
+    MIOpt.ReadToken();
+    return LexStringLiteral(Result, CurPtr, false);
+
+  // C99 6.4.6: Punctuators.
+  case '?':
+    Result.setKind(tok::question);
+    break;
+  case '[':
+    Result.setKind(tok::l_square);
+    break;
+  case ']':
+    Result.setKind(tok::r_square);
+    break;
+  case '(':
+    Result.setKind(tok::l_paren);
+    break;
+  case ')':
+    Result.setKind(tok::r_paren);
+    break;
+  case '{':
+    Result.setKind(tok::l_brace);
+    break;
+  case '}':
+    Result.setKind(tok::r_brace);
+    break;
+  case '.':
+    Char = getCharAndSize(CurPtr, SizeTmp);
+    if (Char >= '0' && Char <= '9') {
+      // Notify MIOpt that we read a non-whitespace/non-comment token.
+      MIOpt.ReadToken();
+
+      return LexNumericConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result));
+    } else if (Features.CPlusPlus && Char == '*') {
+      Result.setKind(tok::periodstar);
+      CurPtr += SizeTmp;
+    } else if (Char == '.' &&
+               getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '.') {
+      Result.setKind(tok::ellipsis);
+      CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+                           SizeTmp2, Result);
+    } else {
+      Result.setKind(tok::period);
+    }
+    break;
+  case '&':
+    Char = getCharAndSize(CurPtr, SizeTmp);
+    if (Char == '&') {
+      Result.setKind(tok::ampamp);
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else if (Char == '=') {
+      Result.setKind(tok::ampequal);
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else {
+      Result.setKind(tok::amp);
+    }
+    break;
+  case '*': 
+    if (getCharAndSize(CurPtr, SizeTmp) == '=') {
+      Result.setKind(tok::starequal);
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else {
+      Result.setKind(tok::star);
+    }
+    break;
+  case '+':
+    Char = getCharAndSize(CurPtr, SizeTmp);
+    if (Char == '+') {
+      Result.setKind(tok::plusplus);
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else if (Char == '=') {
+      Result.setKind(tok::plusequal);
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else {
+      Result.setKind(tok::plus);
+    }
+    break;
+  case '-':
+    Char = getCharAndSize(CurPtr, SizeTmp);
+    if (Char == '-') {
+      Result.setKind(tok::minusminus);
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else if (Char == '>' && Features.CPlusPlus && 
+               getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '*') {
+      Result.setKind(tok::arrowstar);  // C++ ->*
+      CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+                           SizeTmp2, Result);
+    } else if (Char == '>') {
+      Result.setKind(tok::arrow);
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else if (Char == '=') {
+      Result.setKind(tok::minusequal);
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else {
+      Result.setKind(tok::minus);
+    }
+    break;
+  case '~':
+    Result.setKind(tok::tilde);
+    break;
+  case '!':
+    if (getCharAndSize(CurPtr, SizeTmp) == '=') {
+      Result.setKind(tok::exclaimequal);
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else {
+      Result.setKind(tok::exclaim);
+    }
+    break;
+  case '/':
+    // 6.4.9: Comments
+    Char = getCharAndSize(CurPtr, SizeTmp);
+    if (Char == '/') {         // BCPL comment.
+      if (SkipBCPLComment(Result, ConsumeChar(CurPtr, SizeTmp, Result))) {
+        // It is common for the tokens immediately after a // comment to be
+        // whitespace (indentation for the next line).  Instead of going through
+        // the big switch, handle it efficiently now.
+        goto SkipIgnoredUnits;
+      }        
+      return; // KeepCommentMode
+    } else if (Char == '*') {  // /**/ comment.
+      if (SkipBlockComment(Result, ConsumeChar(CurPtr, SizeTmp, Result)))
+        goto LexNextToken;   // GCC isn't tail call eliminating.
+      return; // KeepCommentMode
+    } else if (Char == '=') {
+      Result.setKind(tok::slashequal);
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else {
+      Result.setKind(tok::slash);
+    }
+    break;
+  case '%':
+    Char = getCharAndSize(CurPtr, SizeTmp);
+    if (Char == '=') {
+      Result.setKind(tok::percentequal);
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else if (Features.Digraphs && Char == '>') {
+      Result.setKind(tok::r_brace);    // '%>' -> '}'
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else if (Features.Digraphs && Char == ':') {
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+      Char = getCharAndSize(CurPtr, SizeTmp);
+      if (Char == '%' && getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == ':') {
+        Result.setKind(tok::hashhash);   // '%:%:' -> '##'
+        CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+                             SizeTmp2, Result);
+      } else if (Char == '@' && Features.Microsoft) {  // %:@ -> #@ -> Charize
+        Result.setKind(tok::hashat);
+        CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+        Diag(BufferPtr, diag::charize_microsoft_ext);
+      } else {
+        Result.setKind(tok::hash);       // '%:' -> '#'
+        
+        // We parsed a # character.  If this occurs at the start of the line,
+        // it's actually the start of a preprocessing directive.  Callback to
+        // the preprocessor to handle it.
+        // FIXME: -fpreprocessed mode??
+        if (Result.isAtStartOfLine() && !LexingRawMode) {
+          BufferPtr = CurPtr;
+          PP->HandleDirective(Result);
+          
+          // As an optimization, if the preprocessor didn't switch lexers, tail
+          // recurse.
+          if (PP->isCurrentLexer(this)) {
+            // Start a new token. If this is a #include or something, the PP may
+            // want us starting at the beginning of the line again.  If so, set
+            // the StartOfLine flag.
+            if (IsAtStartOfLine) {
+              Result.setFlag(Token::StartOfLine);
+              IsAtStartOfLine = false;
+            }
+            goto LexNextToken;   // GCC isn't tail call eliminating.
+          }
+          
+          return PP->Lex(Result);
+        }
+      }
+    } else {
+      Result.setKind(tok::percent);
+    }
+    break;
+  case '<':
+    Char = getCharAndSize(CurPtr, SizeTmp);
+    if (ParsingFilename) {
+      return LexAngledStringLiteral(Result, CurPtr+SizeTmp);
+    } else if (Char == '<' &&
+               getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '=') {
+      Result.setKind(tok::lesslessequal);
+      CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+                           SizeTmp2, Result);
+    } else if (Char == '<') {
+      Result.setKind(tok::lessless);
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else if (Char == '=') {
+      Result.setKind(tok::lessequal);
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else if (Features.Digraphs && Char == ':') {
+      Result.setKind(tok::l_square); // '<:' -> '['
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else if (Features.Digraphs && Char == '%') {
+      Result.setKind(tok::l_brace); // '<%' -> '{'
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else {
+      Result.setKind(tok::less);
+    }
+    break;
+  case '>':
+    Char = getCharAndSize(CurPtr, SizeTmp);
+    if (Char == '=') {
+      Result.setKind(tok::greaterequal);
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else if (Char == '>' && 
+               getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '=') {
+      Result.setKind(tok::greatergreaterequal);
+      CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+                           SizeTmp2, Result);
+    } else if (Char == '>') {
+      Result.setKind(tok::greatergreater);
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else {
+      Result.setKind(tok::greater);
+    }
+    break;
+  case '^':
+    Char = getCharAndSize(CurPtr, SizeTmp);
+    if (Char == '=') {
+      Result.setKind(tok::caretequal);
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else {
+      Result.setKind(tok::caret);
+    }
+    break;
+  case '|':
+    Char = getCharAndSize(CurPtr, SizeTmp);
+    if (Char == '=') {
+      Result.setKind(tok::pipeequal);
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else if (Char == '|') {
+      Result.setKind(tok::pipepipe);
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else {
+      Result.setKind(tok::pipe);
+    }
+    break;
+  case ':':
+    Char = getCharAndSize(CurPtr, SizeTmp);
+    if (Features.Digraphs && Char == '>') {
+      Result.setKind(tok::r_square); // ':>' -> ']'
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else if (Features.CPlusPlus && Char == ':') {
+      Result.setKind(tok::coloncolon);
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else {    
+      Result.setKind(tok::colon);
+    }
+    break;
+  case ';':
+    Result.setKind(tok::semi);
+    break;
+  case '=':
+    Char = getCharAndSize(CurPtr, SizeTmp);
+    if (Char == '=') {
+      Result.setKind(tok::equalequal);
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else {      
+      Result.setKind(tok::equal);
+    }
+    break;
+  case ',':
+    Result.setKind(tok::comma);
+    break;
+  case '#':
+    Char = getCharAndSize(CurPtr, SizeTmp);
+    if (Char == '#') {
+      Result.setKind(tok::hashhash);
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else if (Char == '@' && Features.Microsoft) {  // #@ -> Charize
+      Result.setKind(tok::hashat);
+      Diag(BufferPtr, diag::charize_microsoft_ext);
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else {
+      Result.setKind(tok::hash);
+      // We parsed a # character.  If this occurs at the start of the line,
+      // it's actually the start of a preprocessing directive.  Callback to
+      // the preprocessor to handle it.
+      // FIXME: -fpreprocessed mode??
+      if (Result.isAtStartOfLine() && !LexingRawMode) {
+        BufferPtr = CurPtr;
+        PP->HandleDirective(Result);
+        
+        // As an optimization, if the preprocessor didn't switch lexers, tail
+        // recurse.
+        if (PP->isCurrentLexer(this)) {
+          // Start a new token.  If this is a #include or something, the PP may
+          // want us starting at the beginning of the line again.  If so, set
+          // the StartOfLine flag.
+          if (IsAtStartOfLine) {
+            Result.setFlag(Token::StartOfLine);
+            IsAtStartOfLine = false;
+          }
+          goto LexNextToken;   // GCC isn't tail call eliminating.
+        }
+        return PP->Lex(Result);
+      }
+    }
+    break;
+
+  case '@':
+    // Objective C support.
+    if (CurPtr[-1] == '@' && Features.ObjC1)
+      Result.setKind(tok::at);
+    else
+      Result.setKind(tok::unknown);
+    break;
+    
+  case '\\':
+    // FIXME: UCN's.
+    // FALL THROUGH.
+  default:
+    Result.setKind(tok::unknown);
+    break;
+  }
+  
+  // Notify MIOpt that we read a non-whitespace/non-comment token.
+  MIOpt.ReadToken();
+
+  // Update the location of token as well as BufferPtr.
+  FormTokenWithChars(Result, CurPtr);
+}
diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp
new file mode 100644
index 00000000000..aa0b831af90
--- /dev/null
+++ b/clang/lib/Lex/LiteralSupport.cpp
@@ -0,0 +1,691 @@
+//===--- LiteralSupport.cpp - Code to parse and process literals ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the NumericLiteralParser, CharLiteralParser, and
+// StringLiteralParser interfaces.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/LiteralSupport.h"
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/TargetInfo.h"
+#include "llvm/ADT/StringExtras.h"
+using namespace clang;
+
+/// HexDigitValue - Return the value of the specified hex digit, or -1 if it's
+/// not valid.
+static int HexDigitValue(char C) {
+  if (C >= '0' && C <= '9') return C-'0';
+  if (C >= 'a' && C <= 'f') return C-'a'+10;
+  if (C >= 'A' && C <= 'F') return C-'A'+10;
+  return -1;
+}
+
+/// ProcessCharEscape - Parse a standard C escape sequence, which can occur in
+/// either a character or a string literal.
+static unsigned ProcessCharEscape(const char *&ThisTokBuf,
+                                  const char *ThisTokEnd, bool &HadError,
+                                  SourceLocation Loc, bool IsWide,
+                                  Preprocessor &PP) {
+  // Skip the '\' char.
+  ++ThisTokBuf;
+
+  // We know that this character can't be off the end of the buffer, because
+  // that would have been \", which would not have been the end of string.
+  unsigned ResultChar = *ThisTokBuf++;
+  switch (ResultChar) {
+  // These map to themselves.
+  case '\\': case '\'': case '"': case '?': break;
+    
+    // These have fixed mappings.
+  case 'a':
+    // TODO: K&R: the meaning of '\\a' is different in traditional C
+    ResultChar = 7;
+    break;
+  case 'b':
+    ResultChar = 8;
+    break;
+  case 'e':
+    PP.Diag(Loc, diag::ext_nonstandard_escape, "e");
+    ResultChar = 27;
+    break;
+  case 'f':
+    ResultChar = 12;
+    break;
+  case 'n':
+    ResultChar = 10;
+    break;
+  case 'r':
+    ResultChar = 13;
+    break;
+  case 't':
+    ResultChar = 9;
+    break;
+  case 'v':
+    ResultChar = 11;
+    break;
+    
+    //case 'u': case 'U':  // FIXME: UCNs.
+  case 'x': { // Hex escape.
+    ResultChar = 0;
+    if (ThisTokBuf == ThisTokEnd || !isxdigit(*ThisTokBuf)) {
+      PP.Diag(Loc, diag::err_hex_escape_no_digits);
+      HadError = 1;
+      break;
+    }
+    
+    // Hex escapes are a maximal series of hex digits.
+    bool Overflow = false;
+    for (; ThisTokBuf != ThisTokEnd; ++ThisTokBuf) {
+      int CharVal = HexDigitValue(ThisTokBuf[0]);
+      if (CharVal == -1) break;
+      Overflow |= (ResultChar & 0xF0000000) ? true : false;  // About to shift out a digit?
+      ResultChar <<= 4;
+      ResultChar |= CharVal;
+    }
+
+    // See if any bits will be truncated when evaluated as a character.
+    unsigned CharWidth = PP.getTargetInfo().getCharWidth(IsWide);
+                       
+    if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
+      Overflow = true;
+      ResultChar &= ~0U >> (32-CharWidth);
+    }
+    
+    // Check for overflow.
+    if (Overflow)   // Too many digits to fit in
+      PP.Diag(Loc, diag::warn_hex_escape_too_large);
+    break;
+  }
+  case '0': case '1': case '2': case '3':
+  case '4': case '5': case '6': case '7': {
+    // Octal escapes.
+    --ThisTokBuf;
+    ResultChar = 0;
+
+    // Octal escapes are a series of octal digits with maximum length 3.
+    // "\0123" is a two digit sequence equal to "\012" "3".
+    unsigned NumDigits = 0;
+    do {
+      ResultChar <<= 3;
+      ResultChar |= *ThisTokBuf++ - '0';
+      ++NumDigits;
+    } while (ThisTokBuf != ThisTokEnd && NumDigits < 3 &&
+             ThisTokBuf[0] >= '0' && ThisTokBuf[0] <= '7');
+    
+    // Check for overflow.  Reject '\777', but not L'\777'.
+    unsigned CharWidth = PP.getTargetInfo().getCharWidth(IsWide);
+                       
+    if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
+      PP.Diag(Loc, diag::warn_octal_escape_too_large);
+      ResultChar &= ~0U >> (32-CharWidth);
+    }
+    break;
+  }
+    
+    // Otherwise, these are not valid escapes.
+  case '(': case '{': case '[': case '%':
+    // GCC accepts these as extensions.  We warn about them as such though.
+    if (!PP.getLangOptions().NoExtensions) {
+      PP.Diag(Loc, diag::ext_nonstandard_escape,
+              std::string()+(char)ResultChar);
+      break;
+    }
+    // FALL THROUGH.
+  default:
+    if (isgraph(ThisTokBuf[0])) {
+      PP.Diag(Loc, diag::ext_unknown_escape, std::string()+(char)ResultChar);
+    } else {
+      PP.Diag(Loc, diag::ext_unknown_escape, "x"+llvm::utohexstr(ResultChar));
+    }
+    break;
+  }
+  
+  return ResultChar;
+}
+
+
+
+
+///       integer-constant: [C99 6.4.4.1]
+///         decimal-constant integer-suffix
+///         octal-constant integer-suffix
+///         hexadecimal-constant integer-suffix
+///       decimal-constant: 
+///         nonzero-digit
+///         decimal-constant digit
+///       octal-constant: 
+///         0
+///         octal-constant octal-digit
+///       hexadecimal-constant: 
+///         hexadecimal-prefix hexadecimal-digit
+///         hexadecimal-constant hexadecimal-digit
+///       hexadecimal-prefix: one of
+///         0x 0X
+///       integer-suffix:
+///         unsigned-suffix [long-suffix]
+///         unsigned-suffix [long-long-suffix]
+///         long-suffix [unsigned-suffix]
+///         long-long-suffix [unsigned-sufix]
+///       nonzero-digit:
+///         1 2 3 4 5 6 7 8 9
+///       octal-digit:
+///         0 1 2 3 4 5 6 7
+///       hexadecimal-digit:
+///         0 1 2 3 4 5 6 7 8 9
+///         a b c d e f
+///         A B C D E F
+///       unsigned-suffix: one of
+///         u U
+///       long-suffix: one of
+///         l L
+///       long-long-suffix: one of 
+///         ll LL
+///
+///       floating-constant: [C99 6.4.4.2]
+///         TODO: add rules...
+///
+
+NumericLiteralParser::
+NumericLiteralParser(const char *begin, const char *end,
+                     SourceLocation TokLoc, Preprocessor &pp)
+  : PP(pp), ThisTokBegin(begin), ThisTokEnd(end) {
+  s = DigitsBegin = begin;
+  saw_exponent = false;
+  saw_period = false;
+  isLong = false;
+  isUnsigned = false;
+  isLongLong = false;
+  isFloat = false;
+  isImaginary = false;
+  hadError = false;
+  
+  if (*s == '0') { // parse radix
+    s++;
+    if ((*s == 'x' || *s == 'X') && (isxdigit(s[1]) || s[1] == '.')) {
+      s++;
+      radix = 16;
+      DigitsBegin = s;
+      s = SkipHexDigits(s);
+      if (s == ThisTokEnd) {
+        // Done.
+      } else if (*s == '.') {
+        s++;
+        saw_period = true;
+        s = SkipHexDigits(s);
+      }
+      // A binary exponent can appear with or with a '.'. If dotted, the
+      // binary exponent is required. 
+      if ((*s == 'p' || *s == 'P') && PP.getLangOptions().HexFloats) { 
+        s++;
+        saw_exponent = true;
+        if (*s == '+' || *s == '-')  s++; // sign
+        const char *first_non_digit = SkipDigits(s);
+        if (first_non_digit == s) {
+          Diag(TokLoc, diag::err_exponent_has_no_digits);
+          return;
+        } else {
+          s = first_non_digit;
+        }
+      } else if (saw_period) {
+        Diag(TokLoc, diag::err_hexconstant_requires_exponent);
+        return;
+      }
+    } else if (*s == 'b' || *s == 'B') {
+      // 0b101010 is a GCC extension.
+      ++s;
+      radix = 2;
+      DigitsBegin = s;
+      s = SkipBinaryDigits(s);
+      if (s == ThisTokEnd) {
+        // Done.
+      } else if (isxdigit(*s)) {
+        Diag(TokLoc, diag::err_invalid_binary_digit, std::string(s, s+1));
+        return;
+      }
+      PP.Diag(TokLoc, diag::ext_binary_literal);
+    } else {
+      // For now, the radix is set to 8. If we discover that we have a
+      // floating point constant, the radix will change to 10. Octal floating
+      // point constants are not permitted (only decimal and hexadecimal). 
+      radix = 8;
+      DigitsBegin = s;
+      s = SkipOctalDigits(s);
+      if (s == ThisTokEnd) {
+        // Done.
+      } else if (isxdigit(*s) && !(*s == 'e' || *s == 'E')) {
+        TokLoc = PP.AdvanceToTokenCharacter(TokLoc, s-begin);
+        Diag(TokLoc, diag::err_invalid_octal_digit, std::string(s, s+1));
+        return;
+      } else if (*s == '.') {
+        s++;
+        radix = 10;
+        saw_period = true;
+        s = SkipDigits(s);
+      }
+      if (*s == 'e' || *s == 'E') { // exponent
+        s++;
+        radix = 10;
+        saw_exponent = true;
+        if (*s == '+' || *s == '-')  s++; // sign
+        const char *first_non_digit = SkipDigits(s);
+        if (first_non_digit == s) {
+          Diag(TokLoc, diag::err_exponent_has_no_digits);
+          return;
+        } else {
+          s = first_non_digit;
+        }
+      }
+    }
+  } else { // the first digit is non-zero
+    radix = 10;
+    s = SkipDigits(s);
+    if (s == ThisTokEnd) {
+      // Done.
+    } else if (isxdigit(*s) && !(*s == 'e' || *s == 'E')) {
+      Diag(TokLoc, diag::err_invalid_decimal_digit, std::string(s, s+1));
+      return;
+    } else if (*s == '.') {
+      s++;
+      saw_period = true;
+      s = SkipDigits(s);
+    } 
+    if (*s == 'e' || *s == 'E') { // exponent
+      s++;
+      saw_exponent = true;
+      if (*s == '+' || *s == '-')  s++; // sign
+      const char *first_non_digit = SkipDigits(s);
+      if (first_non_digit == s) {
+        Diag(TokLoc, diag::err_exponent_has_no_digits);
+        return;
+      } else {
+        s = first_non_digit;
+      }
+    }
+  }
+
+  SuffixBegin = s;
+  
+  // Parse the suffix.  At this point we can classify whether we have an FP or
+  // integer constant.
+  bool isFPConstant = isFloatingLiteral();
+  
+  // Loop over all of the characters of the suffix.  If we see something bad,
+  // we break out of the loop.
+  for (; s != ThisTokEnd; ++s) {
+    switch (*s) {
+    case 'f':      // FP Suffix for "float"
+    case 'F':
+      if (!isFPConstant) break;  // Error for integer constant.
+      if (isFloat || isLong) break; // FF, LF invalid.
+      isFloat = true;
+      continue;  // Success.
+    case 'u':
+    case 'U':
+      if (isFPConstant) break;  // Error for floating constant.
+      if (isUnsigned) break;    // Cannot be repeated.
+      isUnsigned = true;
+      continue;  // Success.
+    case 'l':
+    case 'L':
+      if (isLong || isLongLong) break;  // Cannot be repeated.
+      if (isFloat) break;               // LF invalid.
+      
+      // Check for long long.  The L's need to be adjacent and the same case.
+      if (s+1 != ThisTokEnd && s[1] == s[0]) {
+        if (isFPConstant) break;        // long long invalid for floats.
+        isLongLong = true;
+        ++s;  // Eat both of them.
+      } else {
+        isLong = true;
+      }
+      continue;  // Success.
+    case 'i':
+    case 'I':
+    case 'j':
+    case 'J':
+      if (isImaginary) break;   // Cannot be repeated.
+      PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-begin),
+              diag::ext_imaginary_constant);
+      isImaginary = true;
+      continue;  // Success.
+    }
+    // If we reached here, there was an error.
+    break;
+  }
+  
+  // Report an error if there are any.
+  if (s != ThisTokEnd) {
+    TokLoc = PP.AdvanceToTokenCharacter(TokLoc, s-begin);
+    Diag(TokLoc, isFPConstant ? diag::err_invalid_suffix_float_constant :
+                                diag::err_invalid_suffix_integer_constant, 
+         std::string(SuffixBegin, ThisTokEnd));
+    return;
+  }
+}
+
+/// GetIntegerValue - Convert this numeric literal value to an APInt that
+/// matches Val's input width.  If there is an overflow, set Val to the low bits
+/// of the result and return true.  Otherwise, return false.
+bool NumericLiteralParser::GetIntegerValue(llvm::APInt &Val) {
+  Val = 0;
+  s = DigitsBegin;
+
+  llvm::APInt RadixVal(Val.getBitWidth(), radix);
+  llvm::APInt CharVal(Val.getBitWidth(), 0);
+  llvm::APInt OldVal = Val;
+  
+  bool OverflowOccurred = false;
+  while (s < SuffixBegin) {
+    unsigned C = HexDigitValue(*s++);
+    
+    // If this letter is out of bound for this radix, reject it.
+    assert(C < radix && "NumericLiteralParser ctor should have rejected this");
+    
+    CharVal = C;
+    
+    // Add the digit to the value in the appropriate radix.  If adding in digits
+    // made the value smaller, then this overflowed.
+    OldVal = Val;
+
+    // Multiply by radix, did overflow occur on the multiply?
+    Val *= RadixVal;
+    OverflowOccurred |= Val.udiv(RadixVal) != OldVal;
+
+    OldVal = Val;
+    // Add value, did overflow occur on the value?
+    Val += CharVal;
+    OverflowOccurred |= Val.ult(OldVal);
+    OverflowOccurred |= Val.ult(CharVal);
+  }
+  return OverflowOccurred;
+}
+
+llvm::APFloat NumericLiteralParser::
+GetFloatValue(const llvm::fltSemantics &Format, bool* isExact) {
+  using llvm::APFloat;
+  
+  llvm::SmallVector<char,256> floatChars;
+  for (unsigned i = 0, n = ThisTokEnd-ThisTokBegin; i != n; ++i)
+    floatChars.push_back(ThisTokBegin[i]);
+  
+  floatChars.push_back('\0');
+  
+  APFloat V (Format, APFloat::fcZero, false);
+  APFloat::opStatus status;
+  
+  status = V.convertFromString(&floatChars[0],APFloat::rmNearestTiesToEven);
+  
+  if (isExact)
+    *isExact = status == APFloat::opOK;
+  
+  return V;
+}
+
+void NumericLiteralParser::Diag(SourceLocation Loc, unsigned DiagID, 
+          const std::string &M) {
+  PP.Diag(Loc, DiagID, M);
+  hadError = true;
+}
+
+
+CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
+                                     SourceLocation Loc, Preprocessor &PP) {
+  // At this point we know that the character matches the regex "L?'.*'".
+  HadError = false;
+  Value = 0;
+  
+  // Determine if this is a wide character.
+  IsWide = begin[0] == 'L';
+  if (IsWide) ++begin;
+  
+  // Skip over the entry quote.
+  assert(begin[0] == '\'' && "Invalid token lexed");
+  ++begin;
+
+  // FIXME: This assumes that 'int' is 32-bits in overflow calculation, and the
+  // size of "value".
+  assert(PP.getTargetInfo().getIntWidth() == 32 &&
+         "Assumes sizeof(int) == 4 for now");
+  // FIXME: This assumes that wchar_t is 32-bits for now.
+  assert(PP.getTargetInfo().getWCharWidth() == 32 && 
+         "Assumes sizeof(wchar_t) == 4 for now");
+  // FIXME: This extensively assumes that 'char' is 8-bits.
+  assert(PP.getTargetInfo().getCharWidth() == 8 &&
+         "Assumes char is 8 bits");
+  
+  bool isFirstChar = true;
+  bool isMultiChar = false;
+  while (begin[0] != '\'') {
+    unsigned ResultChar;
+    if (begin[0] != '\\')     // If this is a normal character, consume it.
+      ResultChar = *begin++;
+    else                      // Otherwise, this is an escape character.
+      ResultChar = ProcessCharEscape(begin, end, HadError, Loc, IsWide, PP);
+
+    // If this is a multi-character constant (e.g. 'abc'), handle it.  These are
+    // implementation defined (C99 6.4.4.4p10).
+    if (!isFirstChar) {
+      // If this is the second character being processed, do special handling.
+      if (!isMultiChar) {
+        isMultiChar = true;
+      
+        // Warn about discarding the top bits for multi-char wide-character
+        // constants (L'abcd').
+        if (IsWide)
+          PP.Diag(Loc, diag::warn_extraneous_wide_char_constant);
+      }
+
+      if (IsWide) {
+        // Emulate GCC's (unintentional?) behavior: L'ab' -> L'b'.
+        Value = 0;
+      } else {
+        // Narrow character literals act as though their value is concatenated
+        // in this implementation.
+        if (((Value << 8) >> 8) != Value)
+          PP.Diag(Loc, diag::warn_char_constant_too_large);
+        Value <<= 8;
+      }
+    }
+    
+    Value += ResultChar;
+    isFirstChar = false;
+  }
+  
+  // If this is a single narrow character, sign extend it (e.g. '\xFF' is "-1")
+  // if 'char' is signed for this target (C99 6.4.4.4p10).  Note that multiple
+  // character constants are not sign extended in the this implementation:
+  // '\xFF\xFF' = 65536 and '\x0\xFF' = 255, which matches GCC.
+  if (!IsWide && !isMultiChar && (Value & 128) &&
+      PP.getTargetInfo().isCharSigned())
+    Value = (signed char)Value;
+}
+
+
+///       string-literal: [C99 6.4.5]
+///          " [s-char-sequence] "
+///         L" [s-char-sequence] "
+///       s-char-sequence:
+///         s-char
+///         s-char-sequence s-char
+///       s-char:
+///         any source character except the double quote ",
+///           backslash \, or newline character
+///         escape-character
+///         universal-character-name
+///       escape-character: [C99 6.4.4.4]
+///         \ escape-code
+///         universal-character-name
+///       escape-code:
+///         character-escape-code
+///         octal-escape-code
+///         hex-escape-code
+///       character-escape-code: one of
+///         n t b r f v a
+///         \ ' " ?
+///       octal-escape-code:
+///         octal-digit
+///         octal-digit octal-digit
+///         octal-digit octal-digit octal-digit
+///       hex-escape-code:
+///         x hex-digit
+///         hex-escape-code hex-digit
+///       universal-character-name:
+///         \u hex-quad
+///         \U hex-quad hex-quad
+///       hex-quad:
+///         hex-digit hex-digit hex-digit hex-digit
+///
+StringLiteralParser::
+StringLiteralParser(const Token *StringToks, unsigned NumStringToks,
+                    Preprocessor &pp, TargetInfo &t)
+  : PP(pp), Target(t) {
+  // Scan all of the string portions, remember the max individual token length,
+  // computing a bound on the concatenated string length, and see whether any
+  // piece is a wide-string.  If any of the string portions is a wide-string
+  // literal, the result is a wide-string literal [C99 6.4.5p4].
+  MaxTokenLength = StringToks[0].getLength();
+  SizeBound = StringToks[0].getLength()-2;  // -2 for "".
+  AnyWide = StringToks[0].is(tok::wide_string_literal);
+  
+  hadError = false;
+
+  // Implement Translation Phase #6: concatenation of string literals
+  /// (C99 5.1.1.2p1).  The common case is only one string fragment.
+  for (unsigned i = 1; i != NumStringToks; ++i) {
+    // The string could be shorter than this if it needs cleaning, but this is a
+    // reasonable bound, which is all we need.
+    SizeBound += StringToks[i].getLength()-2;  // -2 for "".
+    
+    // Remember maximum string piece length.
+    if (StringToks[i].getLength() > MaxTokenLength) 
+      MaxTokenLength = StringToks[i].getLength();
+    
+    // Remember if we see any wide strings.
+    AnyWide |= StringToks[i].is(tok::wide_string_literal);
+  }
+  
+  
+  // Include space for the null terminator.
+  ++SizeBound;
+  
+  // TODO: K&R warning: "traditional C rejects string constant concatenation"
+  
+  // Get the width in bytes of wchar_t.  If no wchar_t strings are used, do not
+  // query the target.  As such, wchar_tByteWidth is only valid if AnyWide=true.
+  wchar_tByteWidth = ~0U;
+  if (AnyWide) {
+    wchar_tByteWidth = Target.getWCharWidth();
+    assert((wchar_tByteWidth & 7) == 0 && "Assumes wchar_t is byte multiple!");
+    wchar_tByteWidth /= 8;
+  }
+  
+  // The output buffer size needs to be large enough to hold wide characters.
+  // This is a worst-case assumption which basically corresponds to L"" "long".
+  if (AnyWide)
+    SizeBound *= wchar_tByteWidth;
+  
+  // Size the temporary buffer to hold the result string data.
+  ResultBuf.resize(SizeBound);
+  
+  // Likewise, but for each string piece.
+  llvm::SmallString<512> TokenBuf;
+  TokenBuf.resize(MaxTokenLength);
+  
+  // Loop over all the strings, getting their spelling, and expanding them to
+  // wide strings as appropriate.
+  ResultPtr = &ResultBuf[0];   // Next byte to fill in.
+  
+  Pascal = false;
+  
+  for (unsigned i = 0, e = NumStringToks; i != e; ++i) {
+    const char *ThisTokBuf = &TokenBuf[0];
+    // Get the spelling of the token, which eliminates trigraphs, etc.  We know
+    // that ThisTokBuf points to a buffer that is big enough for the whole token
+    // and 'spelled' tokens can only shrink.
+    unsigned ThisTokLen = PP.getSpelling(StringToks[i], ThisTokBuf);
+    const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1;  // Skip end quote.
+    
+    // TODO: Input character set mapping support.
+    
+    // Skip L marker for wide strings.
+    bool ThisIsWide = false;
+    if (ThisTokBuf[0] == 'L') {
+      ++ThisTokBuf;
+      ThisIsWide = true;
+    }
+    
+    assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");
+    ++ThisTokBuf;
+    
+    // Check if this is a pascal string
+    if (pp.getLangOptions().PascalStrings && ThisTokBuf + 1 != ThisTokEnd &&
+        ThisTokBuf[0] == '\\' && ThisTokBuf[1] == 'p') {
+      
+      // If the \p sequence is found in the first token, we have a pascal string
+      // Otherwise, if we already have a pascal string, ignore the first \p
+      if (i == 0) {
+        ++ThisTokBuf;
+        Pascal = true;
+      } else if (Pascal)
+        ThisTokBuf += 2;
+    }
+      
+    while (ThisTokBuf != ThisTokEnd) {
+      // Is this a span of non-escape characters?
+      if (ThisTokBuf[0] != '\\') {
+        const char *InStart = ThisTokBuf;
+        do {
+          ++ThisTokBuf;
+        } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');
+        
+        // Copy the character span over.
+        unsigned Len = ThisTokBuf-InStart;
+        if (!AnyWide) {
+          memcpy(ResultPtr, InStart, Len);
+          ResultPtr += Len;
+        } else {
+          // Note: our internal rep of wide char tokens is always little-endian.
+          for (; Len; --Len, ++InStart) {
+            *ResultPtr++ = InStart[0];
+            // Add zeros at the end.
+            for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
+            *ResultPtr++ = 0;
+          }
+        }
+        continue;
+      }
+      
+      // Otherwise, this is an escape character.  Process it.
+      unsigned ResultChar = ProcessCharEscape(ThisTokBuf, ThisTokEnd, hadError,
+                                              StringToks[i].getLocation(),
+                                              ThisIsWide, PP);
+      
+      // Note: our internal rep of wide char tokens is always little-endian.
+      *ResultPtr++ = ResultChar & 0xFF;
+      
+      if (AnyWide) {
+        for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
+          *ResultPtr++ = ResultChar >> i*8;
+      }
+    }
+  }
+  
+  // Add zero terminator.
+  *ResultPtr = 0;
+  if (AnyWide) {
+    for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
+    *ResultPtr++ = 0;
+  }
+    
+  if (Pascal) 
+    ResultBuf[0] = ResultPtr-&ResultBuf[0]-1;
+}
diff --git a/clang/lib/Lex/MacroArgs.cpp b/clang/lib/Lex/MacroArgs.cpp
new file mode 100644
index 00000000000..a26e50eb762
--- /dev/null
+++ b/clang/lib/Lex/MacroArgs.cpp
@@ -0,0 +1,225 @@
+//===--- TokenLexer.cpp - Lex from a token stream -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TokenLexer interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MacroArgs.h"
+#include "clang/Lex/MacroInfo.h"
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Basic/Diagnostic.h"
+using namespace clang;
+
+/// MacroArgs ctor function - This destroys the vector passed in.
+MacroArgs *MacroArgs::create(const MacroInfo *MI,
+                             const Token *UnexpArgTokens,
+                             unsigned NumToks, bool VarargsElided) {
+  assert(MI->isFunctionLike() &&
+         "Can't have args for an object-like macro!");
+
+  // Allocate memory for the MacroArgs object with the lexer tokens at the end.
+  MacroArgs *Result = (MacroArgs*)malloc(sizeof(MacroArgs) +
+                                         NumToks*sizeof(Token));
+  // Construct the macroargs object.
+  new (Result) MacroArgs(NumToks, VarargsElided);
+  
+  // Copy the actual unexpanded tokens to immediately after the result ptr.
+  if (NumToks)
+    memcpy(const_cast<Token*>(Result->getUnexpArgument(0)),
+           UnexpArgTokens, NumToks*sizeof(Token));
+  
+  return Result;
+}
+
+/// destroy - Destroy and deallocate the memory for this object.
+///
+void MacroArgs::destroy() {
+  // Run the dtor to deallocate the vectors.
+  this->~MacroArgs();
+  // Release the memory for the object.
+  free(this);
+}
+
+
+/// getArgLength - Given a pointer to an expanded or unexpanded argument,
+/// return the number of tokens, not counting the EOF, that make up the
+/// argument.
+unsigned MacroArgs::getArgLength(const Token *ArgPtr) {
+  unsigned NumArgTokens = 0;
+  for (; ArgPtr->isNot(tok::eof); ++ArgPtr)
+    ++NumArgTokens;
+  return NumArgTokens;
+}
+
+
+/// getUnexpArgument - Return the unexpanded tokens for the specified formal.
+///
+const Token *MacroArgs::getUnexpArgument(unsigned Arg) const {
+  // The unexpanded argument tokens start immediately after the MacroArgs object
+  // in memory.
+  const Token *Start = (const Token *)(this+1);
+  const Token *Result = Start;
+  // Scan to find Arg.
+  for (; Arg; ++Result) {
+    assert(Result < Start+NumUnexpArgTokens && "Invalid arg #");
+    if (Result->is(tok::eof))
+      --Arg;
+  }
+  return Result;
+}
+
+
+/// ArgNeedsPreexpansion - If we can prove that the argument won't be affected
+/// by pre-expansion, return false.  Otherwise, conservatively return true.
+bool MacroArgs::ArgNeedsPreexpansion(const Token *ArgTok,
+                                     Preprocessor &PP) const {
+  // If there are no identifiers in the argument list, or if the identifiers are
+  // known to not be macros, pre-expansion won't modify it.
+  for (; ArgTok->isNot(tok::eof); ++ArgTok)
+    if (IdentifierInfo *II = ArgTok->getIdentifierInfo()) {
+      if (II->hasMacroDefinition() && PP.getMacroInfo(II)->isEnabled())
+        // Return true even though the macro could be a function-like macro
+        // without a following '(' token.
+        return true;
+    }
+  return false;
+}
+
+/// getPreExpArgument - Return the pre-expanded form of the specified
+/// argument.
+const std::vector<Token> &
+MacroArgs::getPreExpArgument(unsigned Arg, Preprocessor &PP) {
+  assert(Arg < NumUnexpArgTokens && "Invalid argument number!");
+  
+  // If we have already computed this, return it.
+  if (PreExpArgTokens.empty())
+    PreExpArgTokens.resize(NumUnexpArgTokens);
+
+  std::vector<Token> &Result = PreExpArgTokens[Arg];
+  if (!Result.empty()) return Result;
+
+  const Token *AT = getUnexpArgument(Arg);
+  unsigned NumToks = getArgLength(AT)+1;  // Include the EOF.
+  
+  // Otherwise, we have to pre-expand this argument, populating Result.  To do
+  // this, we set up a fake TokenLexer to lex from the unexpanded argument
+  // list.  With this installed, we lex expanded tokens until we hit the EOF
+  // token at the end of the unexp list.
+  PP.EnterTokenStream(AT, NumToks, false /*disable expand*/, 
+                      false /*owns tokens*/);
+
+  // Lex all of the macro-expanded tokens into Result.
+  do {
+    Result.push_back(Token());
+    PP.Lex(Result.back());
+  } while (Result.back().isNot(tok::eof));
+  
+  // Pop the token stream off the top of the stack.  We know that the internal
+  // pointer inside of it is to the "end" of the token stream, but the stack
+  // will not otherwise be popped until the next token is lexed.  The problem is
+  // that the token may be lexed sometime after the vector of tokens itself is
+  // destroyed, which would be badness.
+  PP.RemoveTopOfLexerStack();
+  return Result;
+}
+
+
+/// StringifyArgument - Implement C99 6.10.3.2p2, converting a sequence of
+/// tokens into the literal string token that should be produced by the C #
+/// preprocessor operator.  If Charify is true, then it should be turned into
+/// a character literal for the Microsoft charize (#@) extension.
+///
+Token MacroArgs::StringifyArgument(const Token *ArgToks,
+                                   Preprocessor &PP, bool Charify) {
+  Token Tok;
+  Tok.startToken();
+  Tok.setKind(tok::string_literal);
+
+  const Token *ArgTokStart = ArgToks;
+  
+  // Stringify all the tokens.
+  std::string Result = "\"";
+  // FIXME: Optimize this loop to not use std::strings.
+  bool isFirst = true;
+  for (; ArgToks->isNot(tok::eof); ++ArgToks) {
+    const Token &Tok = *ArgToks;
+    if (!isFirst && (Tok.hasLeadingSpace() || Tok.isAtStartOfLine()))
+      Result += ' ';
+    isFirst = false;
+    
+    // If this is a string or character constant, escape the token as specified
+    // by 6.10.3.2p2.
+    if (Tok.is(tok::string_literal) ||       // "foo"
+        Tok.is(tok::wide_string_literal) ||  // L"foo"
+        Tok.is(tok::char_constant)) {        // 'x' and L'x'.
+      Result += Lexer::Stringify(PP.getSpelling(Tok));
+    } else {
+      // Otherwise, just append the token.
+      Result += PP.getSpelling(Tok);
+    }
+  }
+  
+  // If the last character of the string is a \, and if it isn't escaped, this
+  // is an invalid string literal, diagnose it as specified in C99.
+  if (Result[Result.size()-1] == '\\') {
+    // Count the number of consequtive \ characters.  If even, then they are
+    // just escaped backslashes, otherwise it's an error.
+    unsigned FirstNonSlash = Result.size()-2;
+    // Guaranteed to find the starting " if nothing else.
+    while (Result[FirstNonSlash] == '\\')
+      --FirstNonSlash;
+    if ((Result.size()-1-FirstNonSlash) & 1) {
+      // Diagnose errors for things like: #define F(X) #X   /   F(\)
+      PP.Diag(ArgToks[-1], diag::pp_invalid_string_literal);
+      Result.erase(Result.end()-1);  // remove one of the \'s.
+    }
+  }
+  Result += '"';
+  
+  // If this is the charify operation and the result is not a legal character
+  // constant, diagnose it.
+  if (Charify) {
+    // First step, turn double quotes into single quotes:
+    Result[0] = '\'';
+    Result[Result.size()-1] = '\'';
+    
+    // Check for bogus character.
+    bool isBad = false;
+    if (Result.size() == 3) {
+      isBad = Result[1] == '\'';   // ''' is not legal. '\' already fixed above.
+    } else {
+      isBad = (Result.size() != 4 || Result[1] != '\\');  // Not '\x'
+    }
+    
+    if (isBad) {
+      PP.Diag(ArgTokStart[0], diag::err_invalid_character_to_charify);
+      Result = "' '";  // Use something arbitrary, but legal.
+    }
+  }
+  
+  Tok.setLength(Result.size());
+  Tok.setLocation(PP.CreateString(&Result[0], Result.size()));
+  return Tok;
+}
+
+/// getStringifiedArgument - Compute, cache, and return the specified argument
+/// that has been 'stringified' as required by the # operator.
+const Token &MacroArgs::getStringifiedArgument(unsigned ArgNo,
+                                               Preprocessor &PP) {
+  assert(ArgNo < NumUnexpArgTokens && "Invalid argument number!");
+  if (StringifiedArgs.empty()) {
+    StringifiedArgs.resize(getNumArguments());
+    memset(&StringifiedArgs[0], 0,
+           sizeof(StringifiedArgs[0])*getNumArguments());
+  }
+  if (StringifiedArgs[ArgNo].isNot(tok::string_literal))
+    StringifiedArgs[ArgNo] = StringifyArgument(getUnexpArgument(ArgNo), PP);
+  return StringifiedArgs[ArgNo];
+}
diff --git a/clang/lib/Lex/MacroArgs.h b/clang/lib/Lex/MacroArgs.h
new file mode 100644
index 00000000000..4b22fa18aa8
--- /dev/null
+++ b/clang/lib/Lex/MacroArgs.h
@@ -0,0 +1,109 @@
+//===--- MacroArgs.h - Formal argument info for Macros ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MacroArgs interface.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_MACROARGS_H
+#define LLVM_CLANG_MACROARGS_H
+
+#include <vector>
+
+namespace clang {
+  class MacroInfo;
+  class Preprocessor;
+  class Token;
+  
+/// MacroArgs - An instance of this class captures information about
+/// the formal arguments specified to a function-like macro invocation.
+class MacroArgs {
+  /// NumUnexpArgTokens - The number of raw, unexpanded tokens for the
+  /// arguments.  All of the actual argument tokens are allocated immediately
+  /// after the MacroArgs object in memory.  This is all of the arguments
+  /// concatenated together, with 'EOF' markers at the end of each argument.
+  unsigned NumUnexpArgTokens;
+
+  /// PreExpArgTokens - Pre-expanded tokens for arguments that need them.  Empty
+  /// if not yet computed.  This includes the EOF marker at the end of the
+  /// stream.
+  std::vector<std::vector<Token> > PreExpArgTokens;
+
+  /// StringifiedArgs - This contains arguments in 'stringified' form.  If the
+  /// stringified form of an argument has not yet been computed, this is empty.
+  std::vector<Token> StringifiedArgs;
+
+  /// VarargsElided - True if this is a C99 style varargs macro invocation and
+  /// there was no argument specified for the "..." argument.  If the argument
+  /// was specified (even empty) or this isn't a C99 style varargs function, or
+  /// if in strict mode and the C99 varargs macro had only a ... argument, this
+  /// is false.
+  bool VarargsElided;
+  
+  MacroArgs(unsigned NumToks, bool varargsElided)
+    : NumUnexpArgTokens(NumToks), VarargsElided(varargsElided) {}
+  ~MacroArgs() {}
+public:
+  /// MacroArgs ctor function - Create a new MacroArgs object with the specified
+  /// macro and argument info.
+  static MacroArgs *create(const MacroInfo *MI,
+                           const Token *UnexpArgTokens,
+                           unsigned NumArgTokens, bool VarargsElided);
+  
+  /// destroy - Destroy and deallocate the memory for this object.
+  ///
+  void destroy();
+  
+  /// ArgNeedsPreexpansion - If we can prove that the argument won't be affected
+  /// by pre-expansion, return false.  Otherwise, conservatively return true.
+  bool ArgNeedsPreexpansion(const Token *ArgTok, Preprocessor &PP) const;
+  
+  /// getUnexpArgument - Return a pointer to the first token of the unexpanded
+  /// token list for the specified formal.
+  ///
+  const Token *getUnexpArgument(unsigned Arg) const;
+  
+  /// getArgLength - Given a pointer to an expanded or unexpanded argument,
+  /// return the number of tokens, not counting the EOF, that make up the
+  /// argument.
+  static unsigned getArgLength(const Token *ArgPtr);
+  
+  /// getPreExpArgument - Return the pre-expanded form of the specified
+  /// argument.
+  const std::vector<Token> &
+    getPreExpArgument(unsigned Arg, Preprocessor &PP);  
+  
+  /// getStringifiedArgument - Compute, cache, and return the specified argument
+  /// that has been 'stringified' as required by the # operator.
+  const Token &getStringifiedArgument(unsigned ArgNo, Preprocessor &PP);
+  
+  /// getNumArguments - Return the number of arguments passed into this macro
+  /// invocation.
+  unsigned getNumArguments() const { return NumUnexpArgTokens; }
+  
+  
+  /// isVarargsElidedUse - Return true if this is a C99 style varargs macro
+  /// invocation and there was no argument specified for the "..." argument.  If
+  /// the argument was specified (even empty) or this isn't a C99 style varargs
+  /// function, or if in strict mode and the C99 varargs macro had only a ...
+  /// argument, this returns false.
+  bool isVarargsElidedUse() const { return VarargsElided; }
+  
+  /// StringifyArgument - Implement C99 6.10.3.2p2, converting a sequence of
+  /// tokens into the literal string token that should be produced by the C #
+  /// preprocessor operator.  If Charify is true, then it should be turned into
+  /// a character literal for the Microsoft charize (#@) extension.
+  ///
+  static Token StringifyArgument(const Token *ArgToks,
+                                 Preprocessor &PP, bool Charify = false);
+};
+
+}  // end namespace clang
+
+#endif
diff --git a/clang/lib/Lex/MacroInfo.cpp b/clang/lib/Lex/MacroInfo.cpp
new file mode 100644
index 00000000000..de19ff502a6
--- /dev/null
+++ b/clang/lib/Lex/MacroInfo.cpp
@@ -0,0 +1,70 @@
+//===--- MacroInfo.cpp - Information about #defined identifiers -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MacroInfo interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/MacroInfo.h"
+#include "clang/Lex/Preprocessor.h"
+using namespace clang;
+
+MacroInfo::MacroInfo(SourceLocation DefLoc) : Location(DefLoc) {
+  IsFunctionLike = false;
+  IsC99Varargs = false;
+  IsGNUVarargs = false;
+  IsBuiltinMacro = false;
+  IsDisabled = false;
+  IsUsed = true;
+  
+  ArgumentList = 0;
+  NumArguments = 0;
+}
+
+/// isIdenticalTo - Return true if the specified macro definition is equal to
+/// this macro in spelling, arguments, and whitespace.  This is used to emit
+/// duplicate definition warnings.  This implements the rules in C99 6.10.3.
+///
+bool MacroInfo::isIdenticalTo(const MacroInfo &Other, Preprocessor &PP) const {
+  // Check # tokens in replacement, number of args, and various flags all match.
+  if (ReplacementTokens.size() != Other.ReplacementTokens.size() ||
+      getNumArgs() != Other.getNumArgs() ||
+      isFunctionLike() != Other.isFunctionLike() ||
+      isC99Varargs() != Other.isC99Varargs() ||
+      isGNUVarargs() != Other.isGNUVarargs())
+    return false;
+
+  // Check arguments.
+  for (arg_iterator I = arg_begin(), OI = Other.arg_begin(), E = arg_end();
+       I != E; ++I, ++OI)
+    if (*I != *OI) return false;
+       
+  // Check all the tokens.
+  for (unsigned i = 0, e = ReplacementTokens.size(); i != e; ++i) {
+    const Token &A = ReplacementTokens[i];
+    const Token &B = Other.ReplacementTokens[i];
+    if (A.getKind() != B.getKind() || 
+        A.isAtStartOfLine() != B.isAtStartOfLine() ||
+        A.hasLeadingSpace() != B.hasLeadingSpace())
+      return false;
+    
+    // If this is an identifier, it is easy.
+    if (A.getIdentifierInfo() || B.getIdentifierInfo()) {
+      if (A.getIdentifierInfo() != B.getIdentifierInfo())
+        return false;
+      continue;
+    }
+    
+    // Otherwise, check the spelling.
+    if (PP.getSpelling(A) != PP.getSpelling(B))
+      return false;
+  }
+  
+  return true;
+}
diff --git a/clang/lib/Lex/Makefile b/clang/lib/Lex/Makefile
new file mode 100644
index 00000000000..187448c9922
--- /dev/null
+++ b/clang/lib/Lex/Makefile
@@ -0,0 +1,28 @@
+##===- clang/lib/Lex/Makefile ------------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+#
+#  This implements the Lexer library for the C-Language front-end.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+include $(LEVEL)/Makefile.config
+
+LIBRARYNAME := clangLex
+BUILD_ARCHIVE = 1
+CXXFLAGS = -fno-rtti 
+
+ifeq ($(ARCH),PowerPC)
+CXXFLAGS += -maltivec
+endif
+
+CPPFLAGS += -I$(PROJ_SRC_DIR)/../../include
+
+include $(LEVEL)/Makefile.common
+
diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
new file mode 100644
index 00000000000..b24f5b63760
--- /dev/null
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -0,0 +1,1153 @@
+//===--- PPDirectives.cpp - Directive Handling for Preprocessor -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements # directive processing for the Preprocessor.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Lex/HeaderSearch.h"
+#include "clang/Lex/MacroInfo.h"
+#include "clang/Lex/PPCallbacks.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/SourceManager.h"
+using namespace clang;
+
+//===----------------------------------------------------------------------===//
+// Utility Methods for Preprocessor Directive Handling.
+//===----------------------------------------------------------------------===//
+
+/// DiscardUntilEndOfDirective - Read and discard all tokens remaining on the
+/// current line until the tok::eom token is found.
+void Preprocessor::DiscardUntilEndOfDirective() {
+  Token Tmp;
+  do {
+    LexUnexpandedToken(Tmp);
+  } while (Tmp.isNot(tok::eom));
+}
+
+/// isCXXNamedOperator - Returns "true" if the token is a named operator in C++.
+static bool isCXXNamedOperator(const std::string &Spelling) {
+  return Spelling == "and" || Spelling == "bitand" || Spelling == "bitor" ||
+    Spelling == "compl" || Spelling == "not" || Spelling == "not_eq" ||
+    Spelling == "or" || Spelling == "xor";
+}
+
+/// ReadMacroName - Lex and validate a macro name, which occurs after a
+/// #define or #undef.  This sets the token kind to eom and discards the rest
+/// of the macro line if the macro name is invalid.  isDefineUndef is 1 if
+/// this is due to a a #define, 2 if #undef directive, 0 if it is something
+/// else (e.g. #ifdef).
+void Preprocessor::ReadMacroName(Token &MacroNameTok, char isDefineUndef) {
+  // Read the token, don't allow macro expansion on it.
+  LexUnexpandedToken(MacroNameTok);
+  
+  // Missing macro name?
+  if (MacroNameTok.is(tok::eom))
+    return Diag(MacroNameTok, diag::err_pp_missing_macro_name);
+  
+  IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
+  if (II == 0) {
+    std::string Spelling = getSpelling(MacroNameTok);
+    if (isCXXNamedOperator(Spelling))
+      // C++ 2.5p2: Alternative tokens behave the same as its primary token
+      // except for their spellings.
+      Diag(MacroNameTok, diag::err_pp_operator_used_as_macro_name, Spelling);
+    else
+      Diag(MacroNameTok, diag::err_pp_macro_not_identifier);
+    // Fall through on error.
+  } else if (isDefineUndef && II->getPPKeywordID() == tok::pp_defined) {
+    // Error if defining "defined": C99 6.10.8.4.
+    Diag(MacroNameTok, diag::err_defined_macro_name);
+  } else if (isDefineUndef && II->hasMacroDefinition() &&
+             getMacroInfo(II)->isBuiltinMacro()) {
+    // Error if defining "__LINE__" and other builtins: C99 6.10.8.4.
+    if (isDefineUndef == 1)
+      Diag(MacroNameTok, diag::pp_redef_builtin_macro);
+    else
+      Diag(MacroNameTok, diag::pp_undef_builtin_macro);
+  } else {
+    // Okay, we got a good identifier node.  Return it.
+    return;
+  }
+  
+  // Invalid macro name, read and discard the rest of the line.  Then set the
+  // token kind to tok::eom.
+  MacroNameTok.setKind(tok::eom);
+  return DiscardUntilEndOfDirective();
+}
+
+/// CheckEndOfDirective - Ensure that the next token is a tok::eom token.  If
+/// not, emit a diagnostic and consume up until the eom.
+void Preprocessor::CheckEndOfDirective(const char *DirType) {
+  Token Tmp;
+  // Lex unexpanded tokens: macros might expand to zero tokens, causing us to
+  // miss diagnosing invalid lines.
+  LexUnexpandedToken(Tmp);
+  
+  // There should be no tokens after the directive, but we allow them as an
+  // extension.
+  while (Tmp.is(tok::comment))  // Skip comments in -C mode.
+    LexUnexpandedToken(Tmp);
+  
+  if (Tmp.isNot(tok::eom)) {
+    Diag(Tmp, diag::ext_pp_extra_tokens_at_eol, DirType);
+    DiscardUntilEndOfDirective();
+  }
+}
+
+
+
+/// SkipExcludedConditionalBlock - We just read a #if or related directive and
+/// decided that the subsequent tokens are in the #if'd out portion of the
+/// file.  Lex the rest of the file, until we see an #endif.  If
+/// FoundNonSkipPortion is true, then we have already emitted code for part of
+/// this #if directive, so #else/#elif blocks should never be entered. If ElseOk
+/// is true, then #else directives are ok, if not, then we have already seen one
+/// so a #else directive is a duplicate.  When this returns, the caller can lex
+/// the first valid token.
+void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
+                                                bool FoundNonSkipPortion,
+                                                bool FoundElse) {
+  ++NumSkipped;
+  assert(CurTokenLexer == 0 && CurLexer &&
+         "Lexing a macro, not a file?");
+
+  CurLexer->pushConditionalLevel(IfTokenLoc, /*isSkipping*/false,
+                                 FoundNonSkipPortion, FoundElse);
+  
+  // Enter raw mode to disable identifier lookup (and thus macro expansion),
+  // disabling warnings, etc.
+  CurLexer->LexingRawMode = true;
+  Token Tok;
+  while (1) {
+    CurLexer->Lex(Tok);
+    
+    // If this is the end of the buffer, we have an error.
+    if (Tok.is(tok::eof)) {
+      // Emit errors for each unterminated conditional on the stack, including
+      // the current one.
+      while (!CurLexer->ConditionalStack.empty()) {
+        Diag(CurLexer->ConditionalStack.back().IfLoc,
+             diag::err_pp_unterminated_conditional);
+        CurLexer->ConditionalStack.pop_back();
+      }  
+      
+      // Just return and let the caller lex after this #include.
+      break;
+    }
+    
+    // If this token is not a preprocessor directive, just skip it.
+    if (Tok.isNot(tok::hash) || !Tok.isAtStartOfLine())
+      continue;
+      
+    // We just parsed a # character at the start of a line, so we're in
+    // directive mode.  Tell the lexer this so any newlines we see will be
+    // converted into an EOM token (this terminates the macro).
+    CurLexer->ParsingPreprocessorDirective = true;
+    CurLexer->KeepCommentMode = false;
+
+    
+    // Read the next token, the directive flavor.
+    LexUnexpandedToken(Tok);
+    
+    // If this isn't an identifier directive (e.g. is "# 1\n" or "#\n", or
+    // something bogus), skip it.
+    if (Tok.isNot(tok::identifier)) {
+      CurLexer->ParsingPreprocessorDirective = false;
+      // Restore comment saving mode.
+      CurLexer->KeepCommentMode = KeepComments;
+      continue;
+    }
+
+    // If the first letter isn't i or e, it isn't intesting to us.  We know that
+    // this is safe in the face of spelling differences, because there is no way
+    // to spell an i/e in a strange way that is another letter.  Skipping this
+    // allows us to avoid looking up the identifier info for #define/#undef and
+    // other common directives.
+    const char *RawCharData = SourceMgr.getCharacterData(Tok.getLocation());
+    char FirstChar = RawCharData[0];
+    if (FirstChar >= 'a' && FirstChar <= 'z' && 
+        FirstChar != 'i' && FirstChar != 'e') {
+      CurLexer->ParsingPreprocessorDirective = false;
+      // Restore comment saving mode.
+      CurLexer->KeepCommentMode = KeepComments;
+      continue;
+    }
+    
+    // Get the identifier name without trigraphs or embedded newlines.  Note
+    // that we can't use Tok.getIdentifierInfo() because its lookup is disabled
+    // when skipping.
+    // TODO: could do this with zero copies in the no-clean case by using
+    // strncmp below.
+    char Directive[20];
+    unsigned IdLen;
+    if (!Tok.needsCleaning() && Tok.getLength() < 20) {
+      IdLen = Tok.getLength();
+      memcpy(Directive, RawCharData, IdLen);
+      Directive[IdLen] = 0;
+    } else {
+      std::string DirectiveStr = getSpelling(Tok);
+      IdLen = DirectiveStr.size();
+      if (IdLen >= 20) {
+        CurLexer->ParsingPreprocessorDirective = false;
+        // Restore comment saving mode.
+        CurLexer->KeepCommentMode = KeepComments;
+        continue;
+      }
+      memcpy(Directive, &DirectiveStr[0], IdLen);
+      Directive[IdLen] = 0;
+    }
+    
+    if (FirstChar == 'i' && Directive[1] == 'f') {
+      if ((IdLen == 2) ||   // "if"
+          (IdLen == 5 && !strcmp(Directive+2, "def")) ||   // "ifdef"
+          (IdLen == 6 && !strcmp(Directive+2, "ndef"))) {  // "ifndef"
+        // We know the entire #if/#ifdef/#ifndef block will be skipped, don't
+        // bother parsing the condition.
+        DiscardUntilEndOfDirective();
+        CurLexer->pushConditionalLevel(Tok.getLocation(), /*wasskipping*/true,
+                                       /*foundnonskip*/false,
+                                       /*fnddelse*/false);
+      }
+    } else if (FirstChar == 'e') {
+      if (IdLen == 5 && !strcmp(Directive+1, "ndif")) {  // "endif"
+        CheckEndOfDirective("#endif");
+        PPConditionalInfo CondInfo;
+        CondInfo.WasSkipping = true; // Silence bogus warning.
+        bool InCond = CurLexer->popConditionalLevel(CondInfo);
+        InCond = InCond;  // Silence warning in no-asserts mode.
+        assert(!InCond && "Can't be skipping if not in a conditional!");
+        
+        // If we popped the outermost skipping block, we're done skipping!
+        if (!CondInfo.WasSkipping)
+          break;
+      } else if (IdLen == 4 && !strcmp(Directive+1, "lse")) { // "else".
+        // #else directive in a skipping conditional.  If not in some other
+        // skipping conditional, and if #else hasn't already been seen, enter it
+        // as a non-skipping conditional.
+        CheckEndOfDirective("#else");
+        PPConditionalInfo &CondInfo = CurLexer->peekConditionalLevel();
+        
+        // If this is a #else with a #else before it, report the error.
+        if (CondInfo.FoundElse) Diag(Tok, diag::pp_err_else_after_else);
+        
+        // Note that we've seen a #else in this conditional.
+        CondInfo.FoundElse = true;
+        
+        // If the conditional is at the top level, and the #if block wasn't
+        // entered, enter the #else block now.
+        if (!CondInfo.WasSkipping && !CondInfo.FoundNonSkip) {
+          CondInfo.FoundNonSkip = true;
+          break;
+        }
+      } else if (IdLen == 4 && !strcmp(Directive+1, "lif")) {  // "elif".
+        PPConditionalInfo &CondInfo = CurLexer->peekConditionalLevel();
+
+        bool ShouldEnter;
+        // If this is in a skipping block or if we're already handled this #if
+        // block, don't bother parsing the condition.
+        if (CondInfo.WasSkipping || CondInfo.FoundNonSkip) {
+          DiscardUntilEndOfDirective();
+          ShouldEnter = false;
+        } else {
+          // Restore the value of LexingRawMode so that identifiers are
+          // looked up, etc, inside the #elif expression.
+          assert(CurLexer->LexingRawMode && "We have to be skipping here!");
+          CurLexer->LexingRawMode = false;
+          IdentifierInfo *IfNDefMacro = 0;
+          ShouldEnter = EvaluateDirectiveExpression(IfNDefMacro);
+          CurLexer->LexingRawMode = true;
+        }
+        
+        // If this is a #elif with a #else before it, report the error.
+        if (CondInfo.FoundElse) Diag(Tok, diag::pp_err_elif_after_else);
+        
+        // If this condition is true, enter it!
+        if (ShouldEnter) {
+          CondInfo.FoundNonSkip = true;
+          break;
+        }
+      }
+    }
+    
+    CurLexer->ParsingPreprocessorDirective = false;
+    // Restore comment saving mode.
+    CurLexer->KeepCommentMode = KeepComments;
+  }
+
+  // Finally, if we are out of the conditional (saw an #endif or ran off the end
+  // of the file, just stop skipping and return to lexing whatever came after
+  // the #if block.
+  CurLexer->LexingRawMode = false;
+}
+
+/// LookupFile - Given a "foo" or <foo> reference, look up the indicated file,
+/// return null on failure.  isAngled indicates whether the file reference is
+/// for system #include's or not (i.e. using <> instead of "").
+const FileEntry *Preprocessor::LookupFile(const char *FilenameStart,
+                                          const char *FilenameEnd,
+                                          bool isAngled,
+                                          const DirectoryLookup *FromDir,
+                                          const DirectoryLookup *&CurDir) {
+  // If the header lookup mechanism may be relative to the current file, pass in
+  // info about where the current file is.
+  const FileEntry *CurFileEnt = 0;
+  if (!FromDir) {
+    SourceLocation FileLoc = getCurrentFileLexer()->getFileLoc();
+    CurFileEnt = SourceMgr.getFileEntryForLoc(FileLoc);
+  }
+  
+  // Do a standard file entry lookup.
+  CurDir = CurDirLookup;
+  const FileEntry *FE =
+  HeaderInfo.LookupFile(FilenameStart, FilenameEnd,
+                        isAngled, FromDir, CurDir, CurFileEnt);
+  if (FE) return FE;
+  
+  // Otherwise, see if this is a subframework header.  If so, this is relative
+  // to one of the headers on the #include stack.  Walk the list of the current
+  // headers on the #include stack and pass them to HeaderInfo.
+  if (CurLexer && !CurLexer->Is_PragmaLexer) {
+    if ((CurFileEnt = SourceMgr.getFileEntryForLoc(CurLexer->getFileLoc())))
+      if ((FE = HeaderInfo.LookupSubframeworkHeader(FilenameStart, FilenameEnd,
+                                                    CurFileEnt)))
+        return FE;
+  }
+  
+  for (unsigned i = 0, e = IncludeMacroStack.size(); i != e; ++i) {
+    IncludeStackInfo &ISEntry = IncludeMacroStack[e-i-1];
+    if (ISEntry.TheLexer && !ISEntry.TheLexer->Is_PragmaLexer) {
+      if ((CurFileEnt = 
+           SourceMgr.getFileEntryForLoc(ISEntry.TheLexer->getFileLoc())))
+        if ((FE = HeaderInfo.LookupSubframeworkHeader(FilenameStart,
+                                                      FilenameEnd, CurFileEnt)))
+          return FE;
+    }
+  }
+  
+  // Otherwise, we really couldn't find the file.
+  return 0;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Preprocessor Directive Handling.
+//===----------------------------------------------------------------------===//
+
+/// HandleDirective - This callback is invoked when the lexer sees a # token
+/// at the start of a line.  This consumes the directive, modifies the 
+/// lexer/preprocessor state, and advances the lexer(s) so that the next token
+/// read is the correct one.
+void Preprocessor::HandleDirective(Token &Result) {
+  // FIXME: Traditional: # with whitespace before it not recognized by K&R?
+  
+  // We just parsed a # character at the start of a line, so we're in directive
+  // mode.  Tell the lexer this so any newlines we see will be converted into an
+  // EOM token (which terminates the directive).
+  CurLexer->ParsingPreprocessorDirective = true;
+  
+  ++NumDirectives;
+  
+  // We are about to read a token.  For the multiple-include optimization FA to
+  // work, we have to remember if we had read any tokens *before* this 
+  // pp-directive.
+  bool ReadAnyTokensBeforeDirective = CurLexer->MIOpt.getHasReadAnyTokensVal();
+  
+  // Read the next token, the directive flavor.  This isn't expanded due to
+  // C99 6.10.3p8.
+  LexUnexpandedToken(Result);
+  
+  // C99 6.10.3p11: Is this preprocessor directive in macro invocation?  e.g.:
+  //   #define A(x) #x
+  //   A(abc
+  //     #warning blah
+  //   def)
+  // If so, the user is relying on non-portable behavior, emit a diagnostic.
+  if (InMacroArgs)
+    Diag(Result, diag::ext_embedded_directive);
+  
+TryAgain:
+  switch (Result.getKind()) {
+  case tok::eom:
+    return;   // null directive.
+  case tok::comment:
+    // Handle stuff like "# /*foo*/ define X" in -E -C mode.
+    LexUnexpandedToken(Result);
+    goto TryAgain;
+
+  case tok::numeric_constant:
+    // FIXME: implement # 7 line numbers!
+    DiscardUntilEndOfDirective();
+    return;
+  default:
+    IdentifierInfo *II = Result.getIdentifierInfo();
+    if (II == 0) break;  // Not an identifier.
+      
+    // Ask what the preprocessor keyword ID is.
+    switch (II->getPPKeywordID()) {
+    default: break;
+    // C99 6.10.1 - Conditional Inclusion.
+    case tok::pp_if:
+      return HandleIfDirective(Result, ReadAnyTokensBeforeDirective);
+    case tok::pp_ifdef:
+      return HandleIfdefDirective(Result, false, true/*not valid for miopt*/);
+    case tok::pp_ifndef:
+      return HandleIfdefDirective(Result, true, ReadAnyTokensBeforeDirective);
+    case tok::pp_elif:
+      return HandleElifDirective(Result);
+    case tok::pp_else:
+      return HandleElseDirective(Result);
+    case tok::pp_endif:
+      return HandleEndifDirective(Result);
+      
+    // C99 6.10.2 - Source File Inclusion.
+    case tok::pp_include:
+      return HandleIncludeDirective(Result);            // Handle #include.
+
+    // C99 6.10.3 - Macro Replacement.
+    case tok::pp_define:
+      return HandleDefineDirective(Result);
+    case tok::pp_undef:
+      return HandleUndefDirective(Result);
+
+    // C99 6.10.4 - Line Control.
+    case tok::pp_line:
+      // FIXME: implement #line
+      DiscardUntilEndOfDirective();
+      return;
+      
+    // C99 6.10.5 - Error Directive.
+    case tok::pp_error:
+      return HandleUserDiagnosticDirective(Result, false);
+      
+    // C99 6.10.6 - Pragma Directive.
+    case tok::pp_pragma:
+      return HandlePragmaDirective();
+      
+    // GNU Extensions.
+    case tok::pp_import:
+      return HandleImportDirective(Result);
+    case tok::pp_include_next:
+      return HandleIncludeNextDirective(Result);
+      
+    case tok::pp_warning:
+      Diag(Result, diag::ext_pp_warning_directive);
+      return HandleUserDiagnosticDirective(Result, true);
+    case tok::pp_ident:
+      return HandleIdentSCCSDirective(Result);
+    case tok::pp_sccs:
+      return HandleIdentSCCSDirective(Result);
+    case tok::pp_assert:
+      //isExtension = true;  // FIXME: implement #assert
+      break;
+    case tok::pp_unassert:
+      //isExtension = true;  // FIXME: implement #unassert
+      break;
+    }
+    break;
+  }
+  
+  // If we reached here, the preprocessing token is not valid!
+  Diag(Result, diag::err_pp_invalid_directive);
+  
+  // Read the rest of the PP line.
+  DiscardUntilEndOfDirective();
+  
+  // Okay, we're done parsing the directive.
+}
+
+void Preprocessor::HandleUserDiagnosticDirective(Token &Tok, 
+                                                 bool isWarning) {
+  // Read the rest of the line raw.  We do this because we don't want macros
+  // to be expanded and we don't require that the tokens be valid preprocessing
+  // tokens.  For example, this is allowed: "#warning `   'foo".  GCC does
+  // collapse multiple consequtive white space between tokens, but this isn't
+  // specified by the standard.
+  std::string Message = CurLexer->ReadToEndOfLine();
+
+  unsigned DiagID = isWarning ? diag::pp_hash_warning : diag::err_pp_hash_error;
+  return Diag(Tok, DiagID, Message);
+}
+
+/// HandleIdentSCCSDirective - Handle a #ident/#sccs directive.
+///
+void Preprocessor::HandleIdentSCCSDirective(Token &Tok) {
+  // Yes, this directive is an extension.
+  Diag(Tok, diag::ext_pp_ident_directive);
+  
+  // Read the string argument.
+  Token StrTok;
+  Lex(StrTok);
+  
+  // If the token kind isn't a string, it's a malformed directive.
+  if (StrTok.isNot(tok::string_literal) &&
+      StrTok.isNot(tok::wide_string_literal))
+    return Diag(StrTok, diag::err_pp_malformed_ident);
+  
+  // Verify that there is nothing after the string, other than EOM.
+  CheckEndOfDirective("#ident");
+
+  if (Callbacks)
+    Callbacks->Ident(Tok.getLocation(), getSpelling(StrTok));
+}
+
+//===----------------------------------------------------------------------===//
+// Preprocessor Include Directive Handling.
+//===----------------------------------------------------------------------===//
+
+/// GetIncludeFilenameSpelling - Turn the specified lexer token into a fully
+/// checked and spelled filename, e.g. as an operand of #include. This returns
+/// true if the input filename was in <>'s or false if it were in ""'s.  The
+/// caller is expected to provide a buffer that is large enough to hold the
+/// spelling of the filename, but is also expected to handle the case when
+/// this method decides to use a different buffer.
+bool Preprocessor::GetIncludeFilenameSpelling(SourceLocation Loc,
+                                              const char *&BufStart,
+                                              const char *&BufEnd) {
+  // Get the text form of the filename.
+  assert(BufStart != BufEnd && "Can't have tokens with empty spellings!");
+  
+  // Make sure the filename is <x> or "x".
+  bool isAngled;
+  if (BufStart[0] == '<') {
+    if (BufEnd[-1] != '>') {
+      Diag(Loc, diag::err_pp_expects_filename);
+      BufStart = 0;
+      return true;
+    }
+    isAngled = true;
+  } else if (BufStart[0] == '"') {
+    if (BufEnd[-1] != '"') {
+      Diag(Loc, diag::err_pp_expects_filename);
+      BufStart = 0;
+      return true;
+    }
+    isAngled = false;
+  } else {
+    Diag(Loc, diag::err_pp_expects_filename);
+    BufStart = 0;
+    return true;
+  }
+  
+  // Diagnose #include "" as invalid.
+  if (BufEnd-BufStart <= 2) {
+    Diag(Loc, diag::err_pp_empty_filename);
+    BufStart = 0;
+    return "";
+  }
+  
+  // Skip the brackets.
+  ++BufStart;
+  --BufEnd;
+  return isAngled;
+}
+
+/// ConcatenateIncludeName - Handle cases where the #include name is expanded
+/// from a macro as multiple tokens, which need to be glued together.  This
+/// occurs for code like:
+///    #define FOO <a/b.h>
+///    #include FOO
+/// because in this case, "<a/b.h>" is returned as 7 tokens, not one.
+///
+/// This code concatenates and consumes tokens up to the '>' token.  It returns
+/// false if the > was found, otherwise it returns true if it finds and consumes
+/// the EOM marker.
+static bool ConcatenateIncludeName(llvm::SmallVector<char, 128> &FilenameBuffer,
+                                   Preprocessor &PP) {
+  Token CurTok;
+  
+  PP.Lex(CurTok);
+  while (CurTok.isNot(tok::eom)) {
+    // Append the spelling of this token to the buffer. If there was a space
+    // before it, add it now.
+    if (CurTok.hasLeadingSpace())
+      FilenameBuffer.push_back(' ');
+    
+    // Get the spelling of the token, directly into FilenameBuffer if possible.
+    unsigned PreAppendSize = FilenameBuffer.size();
+    FilenameBuffer.resize(PreAppendSize+CurTok.getLength());
+    
+    const char *BufPtr = &FilenameBuffer[PreAppendSize];
+    unsigned ActualLen = PP.getSpelling(CurTok, BufPtr);
+    
+    // If the token was spelled somewhere else, copy it into FilenameBuffer.
+    if (BufPtr != &FilenameBuffer[PreAppendSize])
+      memcpy(&FilenameBuffer[PreAppendSize], BufPtr, ActualLen);
+    
+    // Resize FilenameBuffer to the correct size.
+    if (CurTok.getLength() != ActualLen)
+      FilenameBuffer.resize(PreAppendSize+ActualLen);
+    
+    // If we found the '>' marker, return success.
+    if (CurTok.is(tok::greater))
+      return false;
+    
+    PP.Lex(CurTok);
+  }
+
+  // If we hit the eom marker, emit an error and return true so that the caller
+  // knows the EOM has been read.
+  PP.Diag(CurTok.getLocation(), diag::err_pp_expects_filename);
+  return true;
+}
+
+/// HandleIncludeDirective - The "#include" tokens have just been read, read the
+/// file to be included from the lexer, then include it!  This is a common
+/// routine with functionality shared between #include, #include_next and
+/// #import.
+void Preprocessor::HandleIncludeDirective(Token &IncludeTok,
+                                          const DirectoryLookup *LookupFrom,
+                                          bool isImport) {
+
+  Token FilenameTok;
+  CurLexer->LexIncludeFilename(FilenameTok);
+  
+  // Reserve a buffer to get the spelling.
+  llvm::SmallVector<char, 128> FilenameBuffer;
+  const char *FilenameStart, *FilenameEnd;
+
+  switch (FilenameTok.getKind()) {
+  case tok::eom:
+    // If the token kind is EOM, the error has already been diagnosed.
+    return;
+  
+  case tok::angle_string_literal:
+  case tok::string_literal: {
+    FilenameBuffer.resize(FilenameTok.getLength());
+    FilenameStart = &FilenameBuffer[0];
+    unsigned Len = getSpelling(FilenameTok, FilenameStart);
+    FilenameEnd = FilenameStart+Len;
+    break;
+  }
+    
+  case tok::less:
+    // This could be a <foo/bar.h> file coming from a macro expansion.  In this
+    // case, glue the tokens together into FilenameBuffer and interpret those.
+    FilenameBuffer.push_back('<');
+    if (ConcatenateIncludeName(FilenameBuffer, *this))
+      return;   // Found <eom> but no ">"?  Diagnostic already emitted.
+    FilenameStart = &FilenameBuffer[0];
+    FilenameEnd = &FilenameBuffer[FilenameBuffer.size()];
+    break;
+  default:
+    Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename);
+    DiscardUntilEndOfDirective();
+    return;
+  }
+  
+  bool isAngled = GetIncludeFilenameSpelling(FilenameTok.getLocation(),
+                                             FilenameStart, FilenameEnd);
+  // If GetIncludeFilenameSpelling set the start ptr to null, there was an
+  // error.
+  if (FilenameStart == 0) {
+    DiscardUntilEndOfDirective();
+    return;
+  }
+  
+  // Verify that there is nothing after the filename, other than EOM.  Use the
+  // preprocessor to lex this in case lexing the filename entered a macro.
+  CheckEndOfDirective("#include");
+
+  // Check that we don't have infinite #include recursion.
+  if (IncludeMacroStack.size() == MaxAllowedIncludeStackDepth-1)
+    return Diag(FilenameTok, diag::err_pp_include_too_deep);
+  
+  // Search include directories.
+  const DirectoryLookup *CurDir;
+  const FileEntry *File = LookupFile(FilenameStart, FilenameEnd,
+                                     isAngled, LookupFrom, CurDir);
+  if (File == 0)
+    return Diag(FilenameTok, diag::err_pp_file_not_found,
+                std::string(FilenameStart, FilenameEnd));
+  
+  // Ask HeaderInfo if we should enter this #include file.
+  if (!HeaderInfo.ShouldEnterIncludeFile(File, isImport)) {
+    // If it returns true, #including this file will have no effect.
+    return;
+  }
+
+  // Look up the file, create a File ID for it.
+  unsigned FileID = SourceMgr.createFileID(File, FilenameTok.getLocation());
+  if (FileID == 0)
+    return Diag(FilenameTok, diag::err_pp_file_not_found,
+                std::string(FilenameStart, FilenameEnd));
+
+  // Finally, if all is good, enter the new file!
+  EnterSourceFile(FileID, CurDir);
+}
+
+/// HandleIncludeNextDirective - Implements #include_next.
+///
+void Preprocessor::HandleIncludeNextDirective(Token &IncludeNextTok) {
+  Diag(IncludeNextTok, diag::ext_pp_include_next_directive);
+  
+  // #include_next is like #include, except that we start searching after
+  // the current found directory.  If we can't do this, issue a
+  // diagnostic.
+  const DirectoryLookup *Lookup = CurDirLookup;
+  if (isInPrimaryFile()) {
+    Lookup = 0;
+    Diag(IncludeNextTok, diag::pp_include_next_in_primary);
+  } else if (Lookup == 0) {
+    Diag(IncludeNextTok, diag::pp_include_next_absolute_path);
+  } else {
+    // Start looking up in the next directory.
+    ++Lookup;
+  }
+  
+  return HandleIncludeDirective(IncludeNextTok, Lookup);
+}
+
+/// HandleImportDirective - Implements #import.
+///
+void Preprocessor::HandleImportDirective(Token &ImportTok) {
+  Diag(ImportTok, diag::ext_pp_import_directive);
+  
+  return HandleIncludeDirective(ImportTok, 0, true);
+}
+
+//===----------------------------------------------------------------------===//
+// Preprocessor Macro Directive Handling.
+//===----------------------------------------------------------------------===//
+
+/// ReadMacroDefinitionArgList - The ( starting an argument list of a macro
+/// definition has just been read.  Lex the rest of the arguments and the
+/// closing ), updating MI with what we learn.  Return true if an error occurs
+/// parsing the arg list.
+bool Preprocessor::ReadMacroDefinitionArgList(MacroInfo *MI) {
+  llvm::SmallVector<IdentifierInfo*, 32> Arguments;
+  
+  Token Tok;
+  while (1) {
+    LexUnexpandedToken(Tok);
+    switch (Tok.getKind()) {
+    case tok::r_paren:
+      // Found the end of the argument list.
+      if (Arguments.empty()) {  // #define FOO()
+        MI->setArgumentList(Arguments.begin(), Arguments.end());
+        return false;
+      }
+      // Otherwise we have #define FOO(A,)
+      Diag(Tok, diag::err_pp_expected_ident_in_arg_list);
+      return true;
+    case tok::ellipsis:  // #define X(... -> C99 varargs
+      // Warn if use of C99 feature in non-C99 mode.
+      if (!Features.C99) Diag(Tok, diag::ext_variadic_macro);
+
+      // Lex the token after the identifier.
+      LexUnexpandedToken(Tok);
+      if (Tok.isNot(tok::r_paren)) {
+        Diag(Tok, diag::err_pp_missing_rparen_in_macro_def);
+        return true;
+      }
+      // Add the __VA_ARGS__ identifier as an argument.
+      Arguments.push_back(Ident__VA_ARGS__);
+      MI->setIsC99Varargs();
+      MI->setArgumentList(Arguments.begin(), Arguments.end());
+      return false;
+    case tok::eom:  // #define X(
+      Diag(Tok, diag::err_pp_missing_rparen_in_macro_def);
+      return true;
+    default:
+      // Handle keywords and identifiers here to accept things like
+      // #define Foo(for) for.
+      IdentifierInfo *II = Tok.getIdentifierInfo();
+      if (II == 0) {
+        // #define X(1
+        Diag(Tok, diag::err_pp_invalid_tok_in_arg_list);
+        return true;
+      }
+
+      // If this is already used as an argument, it is used multiple times (e.g.
+      // #define X(A,A.
+      if (std::find(Arguments.begin(), Arguments.end(), II) != 
+          Arguments.end()) {  // C99 6.10.3p6
+        Diag(Tok, diag::err_pp_duplicate_name_in_arg_list, II->getName());
+        return true;
+      }
+        
+      // Add the argument to the macro info.
+      Arguments.push_back(II);
+      
+      // Lex the token after the identifier.
+      LexUnexpandedToken(Tok);
+      
+      switch (Tok.getKind()) {
+      default:          // #define X(A B
+        Diag(Tok, diag::err_pp_expected_comma_in_arg_list);
+        return true;
+      case tok::r_paren: // #define X(A)
+        MI->setArgumentList(Arguments.begin(), Arguments.end());
+        return false;
+      case tok::comma:  // #define X(A,
+        break;
+      case tok::ellipsis:  // #define X(A... -> GCC extension
+        // Diagnose extension.
+        Diag(Tok, diag::ext_named_variadic_macro);
+        
+        // Lex the token after the identifier.
+        LexUnexpandedToken(Tok);
+        if (Tok.isNot(tok::r_paren)) {
+          Diag(Tok, diag::err_pp_missing_rparen_in_macro_def);
+          return true;
+        }
+          
+        MI->setIsGNUVarargs();
+        MI->setArgumentList(Arguments.begin(), Arguments.end());
+        return false;
+      }
+    }
+  }
+}
+
+/// HandleDefineDirective - Implements #define.  This consumes the entire macro
+/// line then lets the caller lex the next real token.
+void Preprocessor::HandleDefineDirective(Token &DefineTok) {
+  ++NumDefined;
+
+  Token MacroNameTok;
+  ReadMacroName(MacroNameTok, 1);
+  
+  // Error reading macro name?  If so, diagnostic already issued.
+  if (MacroNameTok.is(tok::eom))
+    return;
+
+  // If we are supposed to keep comments in #defines, reenable comment saving
+  // mode.
+  CurLexer->KeepCommentMode = KeepMacroComments;
+  
+  // Create the new macro.
+  MacroInfo *MI = new MacroInfo(MacroNameTok.getLocation());
+  
+  Token Tok;
+  LexUnexpandedToken(Tok);
+  
+  // If this is a function-like macro definition, parse the argument list,
+  // marking each of the identifiers as being used as macro arguments.  Also,
+  // check other constraints on the first token of the macro body.
+  if (Tok.is(tok::eom)) {
+    // If there is no body to this macro, we have no special handling here.
+  } else if (Tok.is(tok::l_paren) && !Tok.hasLeadingSpace()) {
+    // This is a function-like macro definition.  Read the argument list.
+    MI->setIsFunctionLike();
+    if (ReadMacroDefinitionArgList(MI)) {
+      // Forget about MI.
+      delete MI;
+      // Throw away the rest of the line.
+      if (CurLexer->ParsingPreprocessorDirective)
+        DiscardUntilEndOfDirective();
+      return;
+    }
+
+    // Read the first token after the arg list for down below.
+    LexUnexpandedToken(Tok);
+  } else if (!Tok.hasLeadingSpace()) {
+    // C99 requires whitespace between the macro definition and the body.  Emit
+    // a diagnostic for something like "#define X+".
+    if (Features.C99) {
+      Diag(Tok, diag::ext_c99_whitespace_required_after_macro_name);
+    } else {
+      // FIXME: C90/C++ do not get this diagnostic, but it does get a similar
+      // one in some cases!
+    }
+  } else {
+    // This is a normal token with leading space.  Clear the leading space
+    // marker on the first token to get proper expansion.
+    Tok.clearFlag(Token::LeadingSpace);
+  }
+  
+  // If this is a definition of a variadic C99 function-like macro, not using
+  // the GNU named varargs extension, enabled __VA_ARGS__.
+  
+  // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro.
+  // This gets unpoisoned where it is allowed.
+  assert(Ident__VA_ARGS__->isPoisoned() && "__VA_ARGS__ should be poisoned!");
+  if (MI->isC99Varargs())
+    Ident__VA_ARGS__->setIsPoisoned(false);
+  
+  // Read the rest of the macro body.
+  if (MI->isObjectLike()) {
+    // Object-like macros are very simple, just read their body.
+    while (Tok.isNot(tok::eom)) {
+      MI->AddTokenToBody(Tok);
+      // Get the next token of the macro.
+      LexUnexpandedToken(Tok);
+    }
+    
+  } else {
+    // Otherwise, read the body of a function-like macro.  This has to validate
+    // the # (stringize) operator.
+    while (Tok.isNot(tok::eom)) {
+      MI->AddTokenToBody(Tok);
+
+      // Check C99 6.10.3.2p1: ensure that # operators are followed by macro
+      // parameters in function-like macro expansions.
+      if (Tok.isNot(tok::hash)) {
+        // Get the next token of the macro.
+        LexUnexpandedToken(Tok);
+        continue;
+      }
+      
+      // Get the next token of the macro.
+      LexUnexpandedToken(Tok);
+     
+      // Not a macro arg identifier?
+      if (!Tok.getIdentifierInfo() ||
+          MI->getArgumentNum(Tok.getIdentifierInfo()) == -1) {
+        Diag(Tok, diag::err_pp_stringize_not_parameter);
+        delete MI;
+        
+        // Disable __VA_ARGS__ again.
+        Ident__VA_ARGS__->setIsPoisoned(true);
+        return;
+      }
+      
+      // Things look ok, add the param name token to the macro.
+      MI->AddTokenToBody(Tok);
+
+      // Get the next token of the macro.
+      LexUnexpandedToken(Tok);
+    }
+  }
+  
+  
+  // Disable __VA_ARGS__ again.
+  Ident__VA_ARGS__->setIsPoisoned(true);
+
+  // Check that there is no paste (##) operator at the begining or end of the
+  // replacement list.
+  unsigned NumTokens = MI->getNumTokens();
+  if (NumTokens != 0) {
+    if (MI->getReplacementToken(0).is(tok::hashhash)) {
+      Diag(MI->getReplacementToken(0), diag::err_paste_at_start);
+      delete MI;
+      return;
+    }
+    if (MI->getReplacementToken(NumTokens-1).is(tok::hashhash)) {
+      Diag(MI->getReplacementToken(NumTokens-1), diag::err_paste_at_end);
+      delete MI;
+      return;
+    }
+  }
+  
+  // If this is the primary source file, remember that this macro hasn't been
+  // used yet.
+  if (isInPrimaryFile())
+    MI->setIsUsed(false);
+  
+  // Finally, if this identifier already had a macro defined for it, verify that
+  // the macro bodies are identical and free the old definition.
+  if (MacroInfo *OtherMI = getMacroInfo(MacroNameTok.getIdentifierInfo())) {
+    if (!OtherMI->isUsed())
+      Diag(OtherMI->getDefinitionLoc(), diag::pp_macro_not_used);
+
+    // Macros must be identical.  This means all tokes and whitespace separation
+    // must be the same.  C99 6.10.3.2.
+    if (!MI->isIdenticalTo(*OtherMI, *this)) {
+      Diag(MI->getDefinitionLoc(), diag::ext_pp_macro_redef,
+           MacroNameTok.getIdentifierInfo()->getName());
+      Diag(OtherMI->getDefinitionLoc(), diag::ext_pp_macro_redef2);
+    }
+    delete OtherMI;
+  }
+  
+  setMacroInfo(MacroNameTok.getIdentifierInfo(), MI);
+}
+
+/// HandleUndefDirective - Implements #undef.
+///
+void Preprocessor::HandleUndefDirective(Token &UndefTok) {
+  ++NumUndefined;
+
+  Token MacroNameTok;
+  ReadMacroName(MacroNameTok, 2);
+  
+  // Error reading macro name?  If so, diagnostic already issued.
+  if (MacroNameTok.is(tok::eom))
+    return;
+  
+  // Check to see if this is the last token on the #undef line.
+  CheckEndOfDirective("#undef");
+  
+  // Okay, we finally have a valid identifier to undef.
+  MacroInfo *MI = getMacroInfo(MacroNameTok.getIdentifierInfo());
+  
+  // If the macro is not defined, this is a noop undef, just return.
+  if (MI == 0) return;
+
+  if (!MI->isUsed())
+    Diag(MI->getDefinitionLoc(), diag::pp_macro_not_used);
+  
+  // Free macro definition.
+  delete MI;
+  setMacroInfo(MacroNameTok.getIdentifierInfo(), 0);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Preprocessor Conditional Directive Handling.
+//===----------------------------------------------------------------------===//
+
+/// HandleIfdefDirective - Implements the #ifdef/#ifndef directive.  isIfndef is
+/// true when this is a #ifndef directive.  ReadAnyTokensBeforeDirective is true
+/// if any tokens have been returned or pp-directives activated before this
+/// #ifndef has been lexed.
+///
+void Preprocessor::HandleIfdefDirective(Token &Result, bool isIfndef,
+                                        bool ReadAnyTokensBeforeDirective) {
+  ++NumIf;
+  Token DirectiveTok = Result;
+
+  Token MacroNameTok;
+  ReadMacroName(MacroNameTok);
+  
+  // Error reading macro name?  If so, diagnostic already issued.
+  if (MacroNameTok.is(tok::eom)) {
+    // Skip code until we get to #endif.  This helps with recovery by not
+    // emitting an error when the #endif is reached.
+    SkipExcludedConditionalBlock(DirectiveTok.getLocation(),
+                                 /*Foundnonskip*/false, /*FoundElse*/false);
+    return;
+  }
+  
+  // Check to see if this is the last token on the #if[n]def line.
+  CheckEndOfDirective(isIfndef ? "#ifndef" : "#ifdef");
+
+  if (CurLexer->getConditionalStackDepth() == 0) {
+    // If the start of a top-level #ifdef, inform MIOpt.
+    if (!ReadAnyTokensBeforeDirective) {
+      assert(isIfndef && "#ifdef shouldn't reach here");
+      CurLexer->MIOpt.EnterTopLevelIFNDEF(MacroNameTok.getIdentifierInfo());
+    } else
+      CurLexer->MIOpt.EnterTopLevelConditional();
+  }
+
+  IdentifierInfo *MII = MacroNameTok.getIdentifierInfo();
+  MacroInfo *MI = getMacroInfo(MII);
+
+  // If there is a macro, process it.
+  if (MI)  // Mark it used.
+    MI->setIsUsed(true);
+  
+  // Should we include the stuff contained by this directive?
+  if (!MI == isIfndef) {
+    // Yes, remember that we are inside a conditional, then lex the next token.
+    CurLexer->pushConditionalLevel(DirectiveTok.getLocation(), /*wasskip*/false,
+                                   /*foundnonskip*/true, /*foundelse*/false);
+  } else {
+    // No, skip the contents of this block and return the first token after it.
+    SkipExcludedConditionalBlock(DirectiveTok.getLocation(),
+                                 /*Foundnonskip*/false, 
+                                 /*FoundElse*/false);
+  }
+}
+
+/// HandleIfDirective - Implements the #if directive.
+///
+void Preprocessor::HandleIfDirective(Token &IfToken,
+                                     bool ReadAnyTokensBeforeDirective) {
+  ++NumIf;
+  
+  // Parse and evaluation the conditional expression.
+  IdentifierInfo *IfNDefMacro = 0;
+  bool ConditionalTrue = EvaluateDirectiveExpression(IfNDefMacro);
+  
+  // Should we include the stuff contained by this directive?
+  if (ConditionalTrue) {
+    // If this condition is equivalent to #ifndef X, and if this is the first
+    // directive seen, handle it for the multiple-include optimization.
+    if (CurLexer->getConditionalStackDepth() == 0) {
+      if (!ReadAnyTokensBeforeDirective && IfNDefMacro)
+        CurLexer->MIOpt.EnterTopLevelIFNDEF(IfNDefMacro);
+      else
+        CurLexer->MIOpt.EnterTopLevelConditional();
+    }
+    
+    // Yes, remember that we are inside a conditional, then lex the next token.
+    CurLexer->pushConditionalLevel(IfToken.getLocation(), /*wasskip*/false,
+                                   /*foundnonskip*/true, /*foundelse*/false);
+  } else {
+    // No, skip the contents of this block and return the first token after it.
+    SkipExcludedConditionalBlock(IfToken.getLocation(), /*Foundnonskip*/false, 
+                                 /*FoundElse*/false);
+  }
+}
+
+/// HandleEndifDirective - Implements the #endif directive.
+///
+void Preprocessor::HandleEndifDirective(Token &EndifToken) {
+  ++NumEndif;
+  
+  // Check that this is the whole directive.
+  CheckEndOfDirective("#endif");
+  
+  PPConditionalInfo CondInfo;
+  if (CurLexer->popConditionalLevel(CondInfo)) {
+    // No conditionals on the stack: this is an #endif without an #if.
+    return Diag(EndifToken, diag::err_pp_endif_without_if);
+  }
+  
+  // If this the end of a top-level #endif, inform MIOpt.
+  if (CurLexer->getConditionalStackDepth() == 0)
+    CurLexer->MIOpt.ExitTopLevelConditional();
+  
+  assert(!CondInfo.WasSkipping && !CurLexer->LexingRawMode &&
+         "This code should only be reachable in the non-skipping case!");
+}
+
+
+void Preprocessor::HandleElseDirective(Token &Result) {
+  ++NumElse;
+  
+  // #else directive in a non-skipping conditional... start skipping.
+  CheckEndOfDirective("#else");
+  
+  PPConditionalInfo CI;
+  if (CurLexer->popConditionalLevel(CI))
+    return Diag(Result, diag::pp_err_else_without_if);
+  
+  // If this is a top-level #else, inform the MIOpt.
+  if (CurLexer->getConditionalStackDepth() == 0)
+    CurLexer->MIOpt.EnterTopLevelConditional();
+
+  // If this is a #else with a #else before it, report the error.
+  if (CI.FoundElse) Diag(Result, diag::pp_err_else_after_else);
+  
+  // Finally, skip the rest of the contents of this block and return the first
+  // token after it.
+  return SkipExcludedConditionalBlock(CI.IfLoc, /*Foundnonskip*/true,
+                                      /*FoundElse*/true);
+}
+
+void Preprocessor::HandleElifDirective(Token &ElifToken) {
+  ++NumElse;
+  
+  // #elif directive in a non-skipping conditional... start skipping.
+  // We don't care what the condition is, because we will always skip it (since
+  // the block immediately before it was included).
+  DiscardUntilEndOfDirective();
+
+  PPConditionalInfo CI;
+  if (CurLexer->popConditionalLevel(CI))
+    return Diag(ElifToken, diag::pp_err_elif_without_if);
+  
+  // If this is a top-level #elif, inform the MIOpt.
+  if (CurLexer->getConditionalStackDepth() == 0)
+    CurLexer->MIOpt.EnterTopLevelConditional();
+  
+  // If this is a #elif with a #else before it, report the error.
+  if (CI.FoundElse) Diag(ElifToken, diag::pp_err_elif_after_else);
+
+  // Finally, skip the rest of the contents of this block and return the first
+  // token after it.
+  return SkipExcludedConditionalBlock(CI.IfLoc, /*Foundnonskip*/true,
+                                      /*FoundElse*/CI.FoundElse);
+}
+
diff --git a/clang/lib/Lex/PPExpressions.cpp b/clang/lib/Lex/PPExpressions.cpp
new file mode 100644
index 00000000000..cca76289176
--- /dev/null
+++ b/clang/lib/Lex/PPExpressions.cpp
@@ -0,0 +1,639 @@
+//===--- PPExpressions.cpp - Preprocessor Expression Evaluation -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Preprocessor::EvaluateDirectiveExpression method,
+// which parses and evaluates integer constant expressions for #if directives.
+//
+//===----------------------------------------------------------------------===//
+//
+// FIXME: implement testing for #assert's.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Lex/MacroInfo.h"
+#include "clang/Lex/LiteralSupport.h"
+#include "clang/Basic/TargetInfo.h"
+#include "clang/Basic/TokenKinds.h"
+#include "clang/Basic/Diagnostic.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/SmallString.h"
+using namespace clang;
+
+static bool EvaluateDirectiveSubExpr(llvm::APSInt &LHS, unsigned MinPrec,
+                                     Token &PeekTok, bool ValueLive,
+                                     Preprocessor &PP);
+
+/// DefinedTracker - This struct is used while parsing expressions to keep track
+/// of whether !defined(X) has been seen.
+///
+/// With this simple scheme, we handle the basic forms:
+///    !defined(X)   and !defined X
+/// but we also trivially handle (silly) stuff like:
+///    !!!defined(X) and +!defined(X) and !+!+!defined(X) and !(defined(X)).
+struct DefinedTracker {
+  /// Each time a Value is evaluated, it returns information about whether the
+  /// parsed value is of the form defined(X), !defined(X) or is something else.
+  enum TrackerState {
+    DefinedMacro,        // defined(X)
+    NotDefinedMacro,     // !defined(X)
+    Unknown              // Something else.
+  } State;
+  /// TheMacro - When the state is DefinedMacro or NotDefinedMacro, this
+  /// indicates the macro that was checked.
+  IdentifierInfo *TheMacro;
+};
+
+
+
+/// EvaluateValue - Evaluate the token PeekTok (and any others needed) and
+/// return the computed value in Result.  Return true if there was an error
+/// parsing.  This function also returns information about the form of the
+/// expression in DT.  See above for information on what DT means.
+///
+/// If ValueLive is false, then this value is being evaluated in a context where
+/// the result is not used.  As such, avoid diagnostics that relate to
+/// evaluation.
+static bool EvaluateValue(llvm::APSInt &Result, Token &PeekTok,
+                          DefinedTracker &DT, bool ValueLive,
+                          Preprocessor &PP) {
+  Result = 0;
+  DT.State = DefinedTracker::Unknown;
+  
+  // If this token's spelling is a pp-identifier, check to see if it is
+  // 'defined' or if it is a macro.  Note that we check here because many
+  // keywords are pp-identifiers, so we can't check the kind.
+  if (IdentifierInfo *II = PeekTok.getIdentifierInfo()) {
+    // If this identifier isn't 'defined' and it wasn't macro expanded, it turns
+    // into a simple 0, unless it is the C++ keyword "true", in which case it
+    // turns into "1".
+    if (II->getPPKeywordID() != tok::pp_defined) {
+      PP.Diag(PeekTok, diag::warn_pp_undef_identifier, II->getName());
+      Result = II->getTokenID() == tok::kw_true;
+      Result.setIsUnsigned(false);  // "0" is signed intmax_t 0.
+      PP.LexNonComment(PeekTok);
+      return false;
+    }
+
+    // Handle "defined X" and "defined(X)".
+
+    // Get the next token, don't expand it.
+    PP.LexUnexpandedToken(PeekTok);
+
+    // Two options, it can either be a pp-identifier or a (.
+    bool InParens = false;
+    if (PeekTok.is(tok::l_paren)) {
+      // Found a paren, remember we saw it and skip it.
+      InParens = true;
+      PP.LexUnexpandedToken(PeekTok);
+    }
+    
+    // If we don't have a pp-identifier now, this is an error.
+    if ((II = PeekTok.getIdentifierInfo()) == 0) {
+      PP.Diag(PeekTok, diag::err_pp_defined_requires_identifier);
+      return true;
+    }
+    
+    // Otherwise, we got an identifier, is it defined to something?
+    Result = II->hasMacroDefinition();
+    Result.setIsUnsigned(false);  // Result is signed intmax_t.
+
+    // If there is a macro, mark it used.
+    if (Result != 0 && ValueLive) {
+      MacroInfo *Macro = PP.getMacroInfo(II);
+      Macro->setIsUsed(true);
+    }
+
+    // Consume identifier.
+    PP.LexNonComment(PeekTok);
+
+    // If we are in parens, ensure we have a trailing ).
+    if (InParens) {
+      if (PeekTok.isNot(tok::r_paren)) {
+        PP.Diag(PeekTok, diag::err_pp_missing_rparen);
+        return true;
+      }
+      // Consume the ).
+      PP.LexNonComment(PeekTok);
+    }
+    
+    // Success, remember that we saw defined(X).
+    DT.State = DefinedTracker::DefinedMacro;
+    DT.TheMacro = II;
+    return false;
+  }
+  
+  switch (PeekTok.getKind()) {
+  default:  // Non-value token.
+    PP.Diag(PeekTok, diag::err_pp_expr_bad_token);
+    return true;
+  case tok::eom:
+  case tok::r_paren:
+    // If there is no expression, report and exit.
+    PP.Diag(PeekTok, diag::err_pp_expected_value_in_expr);
+    return true;
+  case tok::numeric_constant: {
+    llvm::SmallString<64> IntegerBuffer;
+    IntegerBuffer.resize(PeekTok.getLength());
+    const char *ThisTokBegin = &IntegerBuffer[0];
+    unsigned ActualLength = PP.getSpelling(PeekTok, ThisTokBegin);
+    NumericLiteralParser Literal(ThisTokBegin, ThisTokBegin+ActualLength, 
+                                 PeekTok.getLocation(), PP);
+    if (Literal.hadError)
+      return true; // a diagnostic was already reported.
+    
+    if (Literal.isFloatingLiteral() || Literal.isImaginary) {
+      PP.Diag(PeekTok, diag::err_pp_illegal_floating_literal);
+      return true;
+    }
+    assert(Literal.isIntegerLiteral() && "Unknown ppnumber");
+
+    // long long is a C99 feature.
+    if (!PP.getLangOptions().C99 && !PP.getLangOptions().CPlusPlus0x
+        && Literal.isLongLong)
+      PP.Diag(PeekTok, diag::ext_longlong);
+
+    // Parse the integer literal into Result.
+    if (Literal.GetIntegerValue(Result)) {
+      // Overflow parsing integer literal.
+      if (ValueLive) PP.Diag(PeekTok, diag::warn_integer_too_large);
+      Result.setIsUnsigned(true);
+    } else {
+      // Set the signedness of the result to match whether there was a U suffix
+      // or not.
+      Result.setIsUnsigned(Literal.isUnsigned);
+    
+      // Detect overflow based on whether the value is signed.  If signed
+      // and if the value is too large, emit a warning "integer constant is so
+      // large that it is unsigned" e.g. on 12345678901234567890 where intmax_t
+      // is 64-bits.
+      if (!Literal.isUnsigned && Result.isNegative()) {
+        if (ValueLive)PP.Diag(PeekTok, diag::warn_integer_too_large_for_signed);
+        Result.setIsUnsigned(true);
+      }
+    }
+    
+    // Consume the token.
+    PP.LexNonComment(PeekTok);
+    return false;
+  }
+  case tok::char_constant: {   // 'x'
+    llvm::SmallString<32> CharBuffer;
+    CharBuffer.resize(PeekTok.getLength());
+    const char *ThisTokBegin = &CharBuffer[0];
+    unsigned ActualLength = PP.getSpelling(PeekTok, ThisTokBegin);
+    CharLiteralParser Literal(ThisTokBegin, ThisTokBegin+ActualLength, 
+                              PeekTok.getLocation(), PP);
+    if (Literal.hadError())
+      return true;  // A diagnostic was already emitted.
+
+    // Character literals are always int or wchar_t, expand to intmax_t.
+    TargetInfo &TI = PP.getTargetInfo();
+    unsigned NumBits = TI.getCharWidth(Literal.isWide());
+    
+    // Set the width.
+    llvm::APSInt Val(NumBits);
+    // Set the value.
+    Val = Literal.getValue();
+    // Set the signedness.
+    Val.setIsUnsigned(!TI.isCharSigned());
+    
+    if (Result.getBitWidth() > Val.getBitWidth()) {
+      Result = Val.extend(Result.getBitWidth());
+    } else {
+      assert(Result.getBitWidth() == Val.getBitWidth() &&
+             "intmax_t smaller than char/wchar_t?");
+      Result = Val;
+    }
+
+    // Consume the token.
+    PP.LexNonComment(PeekTok);
+    return false;
+  }
+  case tok::l_paren:
+    PP.LexNonComment(PeekTok);  // Eat the (.
+    // Parse the value and if there are any binary operators involved, parse
+    // them.
+    if (EvaluateValue(Result, PeekTok, DT, ValueLive, PP)) return true;
+
+    // If this is a silly value like (X), which doesn't need parens, check for
+    // !(defined X).
+    if (PeekTok.is(tok::r_paren)) {
+      // Just use DT unmodified as our result.
+    } else {
+      if (EvaluateDirectiveSubExpr(Result, 1, PeekTok, ValueLive, PP))
+        return true;
+      
+      if (PeekTok.isNot(tok::r_paren)) {
+        PP.Diag(PeekTok, diag::err_pp_expected_rparen);
+        return true;
+      }
+      DT.State = DefinedTracker::Unknown;
+    }
+    PP.LexNonComment(PeekTok);  // Eat the ).
+    return false;
+ 
+  case tok::plus:
+    // Unary plus doesn't modify the value.
+    PP.LexNonComment(PeekTok);
+    return EvaluateValue(Result, PeekTok, DT, ValueLive, PP);
+  case tok::minus: {
+    SourceLocation Loc = PeekTok.getLocation();
+    PP.LexNonComment(PeekTok);
+    if (EvaluateValue(Result, PeekTok, DT, ValueLive, PP)) return true;
+    // C99 6.5.3.3p3: The sign of the result matches the sign of the operand.
+    Result = -Result;
+    
+    bool Overflow = false;
+    if (Result.isUnsigned())
+      Overflow = Result.isNegative();
+    else if (Result.isMinSignedValue())
+      Overflow = true;   // -MININT is the only thing that overflows.
+      
+    // If this operator is live and overflowed, report the issue.
+    if (Overflow && ValueLive)
+      PP.Diag(Loc, diag::warn_pp_expr_overflow);
+    
+    DT.State = DefinedTracker::Unknown;
+    return false;
+  }
+    
+  case tok::tilde:
+    PP.LexNonComment(PeekTok);
+    if (EvaluateValue(Result, PeekTok, DT, ValueLive, PP)) return true;
+    // C99 6.5.3.3p4: The sign of the result matches the sign of the operand.
+    Result = ~Result;
+    DT.State = DefinedTracker::Unknown;
+    return false;
+    
+  case tok::exclaim:
+    PP.LexNonComment(PeekTok);
+    if (EvaluateValue(Result, PeekTok, DT, ValueLive, PP)) return true;
+    Result = !Result;
+    // C99 6.5.3.3p5: The sign of the result is 'int', aka it is signed.
+    Result.setIsUnsigned(false);
+    
+    if (DT.State == DefinedTracker::DefinedMacro)
+      DT.State = DefinedTracker::NotDefinedMacro;
+    else if (DT.State == DefinedTracker::NotDefinedMacro)
+      DT.State = DefinedTracker::DefinedMacro;
+    return false;
+    
+  // FIXME: Handle #assert
+  }
+}
+
+
+
+/// getPrecedence - Return the precedence of the specified binary operator
+/// token.  This returns:
+///   ~0 - Invalid token.
+///   14 - *,/,%
+///   13 - -,+
+///   12 - <<,>>
+///   11 - >=, <=, >, <
+///   10 - ==, !=
+///    9 - &
+///    8 - ^
+///    7 - |
+///    6 - &&
+///    5 - ||
+///    4 - ?
+///    3 - :
+///    0 - eom, )
+static unsigned getPrecedence(tok::TokenKind Kind) {
+  switch (Kind) {
+  default: return ~0U;
+  case tok::percent:
+  case tok::slash:
+  case tok::star:                 return 14;
+  case tok::plus:
+  case tok::minus:                return 13;
+  case tok::lessless:
+  case tok::greatergreater:       return 12;
+  case tok::lessequal:
+  case tok::less:
+  case tok::greaterequal:
+  case tok::greater:              return 11;
+  case tok::exclaimequal:
+  case tok::equalequal:           return 10;
+  case tok::amp:                  return 9;
+  case tok::caret:                return 8;
+  case tok::pipe:                 return 7;
+  case tok::ampamp:               return 6;
+  case tok::pipepipe:             return 5;
+  case tok::question:             return 4;
+  case tok::colon:                return 3;
+  case tok::comma:                return 2;
+  case tok::r_paren:              return 0;   // Lowest priority, end of expr.
+  case tok::eom:                  return 0;   // Lowest priority, end of macro.
+  }
+}
+
+
+/// EvaluateDirectiveSubExpr - Evaluate the subexpression whose first token is
+/// PeekTok, and whose precedence is PeekPrec.
+///
+/// If ValueLive is false, then this value is being evaluated in a context where
+/// the result is not used.  As such, avoid diagnostics that relate to
+/// evaluation.
+static bool EvaluateDirectiveSubExpr(llvm::APSInt &LHS, unsigned MinPrec,
+                                     Token &PeekTok, bool ValueLive,
+                                     Preprocessor &PP) {
+  unsigned PeekPrec = getPrecedence(PeekTok.getKind());
+  // If this token isn't valid, report the error.
+  if (PeekPrec == ~0U) {
+    PP.Diag(PeekTok, diag::err_pp_expr_bad_token);
+    return true;
+  }
+  
+  while (1) {
+    // If this token has a lower precedence than we are allowed to parse, return
+    // it so that higher levels of the recursion can parse it.
+    if (PeekPrec < MinPrec)
+      return false;
+    
+    tok::TokenKind Operator = PeekTok.getKind();
+    
+    // If this is a short-circuiting operator, see if the RHS of the operator is
+    // dead.  Note that this cannot just clobber ValueLive.  Consider 
+    // "0 && 1 ? 4 : 1 / 0", which is parsed as "(0 && 1) ? 4 : (1 / 0)".  In
+    // this example, the RHS of the && being dead does not make the rest of the
+    // expr dead.
+    bool RHSIsLive;
+    if (Operator == tok::ampamp && LHS == 0)
+      RHSIsLive = false;   // RHS of "0 && x" is dead.
+    else if (Operator == tok::pipepipe && LHS != 0)
+      RHSIsLive = false;   // RHS of "1 || x" is dead.
+    else if (Operator == tok::question && LHS == 0)
+      RHSIsLive = false;   // RHS (x) of "0 ? x : y" is dead.
+    else
+      RHSIsLive = ValueLive;
+
+    // Consume the operator, saving the operator token for error reporting.
+    Token OpToken = PeekTok;
+    PP.LexNonComment(PeekTok);
+
+    llvm::APSInt RHS(LHS.getBitWidth());
+    // Parse the RHS of the operator.
+    DefinedTracker DT;
+    if (EvaluateValue(RHS, PeekTok, DT, RHSIsLive, PP)) return true;
+
+    // Remember the precedence of this operator and get the precedence of the
+    // operator immediately to the right of the RHS.
+    unsigned ThisPrec = PeekPrec;
+    PeekPrec = getPrecedence(PeekTok.getKind());
+
+    // If this token isn't valid, report the error.
+    if (PeekPrec == ~0U) {
+      PP.Diag(PeekTok, diag::err_pp_expr_bad_token);
+      return true;
+    }
+    
+    bool isRightAssoc = Operator == tok::question;
+    
+    // Get the precedence of the operator to the right of the RHS.  If it binds
+    // more tightly with RHS than we do, evaluate it completely first.
+    if (ThisPrec < PeekPrec ||
+        (ThisPrec == PeekPrec && isRightAssoc)) {
+      if (EvaluateDirectiveSubExpr(RHS, ThisPrec+1, PeekTok, RHSIsLive, PP))
+        return true;
+      PeekPrec = getPrecedence(PeekTok.getKind());
+    }
+    assert(PeekPrec <= ThisPrec && "Recursion didn't work!");
+    
+    // Usual arithmetic conversions (C99 6.3.1.8p1): result is unsigned if
+    // either operand is unsigned.  Don't do this for x and y in "x ? y : z".
+    llvm::APSInt Res(LHS.getBitWidth());
+    if (Operator != tok::question) {
+      Res.setIsUnsigned(LHS.isUnsigned()|RHS.isUnsigned());
+      // If this just promoted something from signed to unsigned, and if the
+      // value was negative, warn about it.
+      if (ValueLive && Res.isUnsigned()) {
+        if (!LHS.isUnsigned() && LHS.isNegative())
+          PP.Diag(OpToken, diag::warn_pp_convert_lhs_to_positive,
+                  LHS.toStringSigned() + " to " + LHS.toStringUnsigned());
+        if (!RHS.isUnsigned() && RHS.isNegative())
+          PP.Diag(OpToken, diag::warn_pp_convert_rhs_to_positive,
+                  RHS.toStringSigned() + " to " + RHS.toStringUnsigned());
+      }
+      LHS.setIsUnsigned(Res.isUnsigned());
+      RHS.setIsUnsigned(Res.isUnsigned());
+    }
+    
+    // FIXME: All of these should detect and report overflow??
+    bool Overflow = false;
+    switch (Operator) {
+    default: assert(0 && "Unknown operator token!");
+    case tok::percent:
+      if (RHS == 0) {
+        if (ValueLive) PP.Diag(OpToken, diag::err_pp_remainder_by_zero);
+        return true;
+      }
+      Res = LHS % RHS;
+      break;
+    case tok::slash:
+      if (RHS == 0) {
+        if (ValueLive) PP.Diag(OpToken, diag::err_pp_division_by_zero);
+        return true;
+      }
+      Res = LHS / RHS;
+      if (LHS.isSigned())
+        Overflow = LHS.isMinSignedValue() && RHS.isAllOnesValue(); // MININT/-1
+      break;
+    case tok::star:
+      Res = LHS * RHS;
+      if (LHS != 0 && RHS != 0)
+        Overflow = Res/RHS != LHS || Res/LHS != RHS;
+      break;
+    case tok::lessless: {
+      // Determine whether overflow is about to happen.
+      unsigned ShAmt = static_cast<unsigned>(RHS.getLimitedValue());
+      if (ShAmt >= LHS.getBitWidth())
+        Overflow = true, ShAmt = LHS.getBitWidth()-1;
+      else if (LHS.isUnsigned())
+        Overflow = ShAmt > LHS.countLeadingZeros();
+      else if (LHS.isNonNegative())
+        Overflow = ShAmt >= LHS.countLeadingZeros(); // Don't allow sign change.
+      else
+        Overflow = ShAmt >= LHS.countLeadingOnes();
+      
+      Res = LHS << ShAmt;
+      break;
+    }
+    case tok::greatergreater: {
+      // Determine whether overflow is about to happen.
+      unsigned ShAmt = static_cast<unsigned>(RHS.getLimitedValue());
+      if (ShAmt >= LHS.getBitWidth())
+        Overflow = true, ShAmt = LHS.getBitWidth()-1;
+      Res = LHS >> ShAmt;
+      break;
+    }
+    case tok::plus:
+      Res = LHS + RHS;
+      if (LHS.isUnsigned())
+        Overflow = Res.ult(LHS);
+      else if (LHS.isNonNegative() == RHS.isNonNegative() &&
+               Res.isNonNegative() != LHS.isNonNegative())
+        Overflow = true;  // Overflow for signed addition.
+      break;
+    case tok::minus:
+      Res = LHS - RHS;
+      if (LHS.isUnsigned())
+        Overflow = Res.ugt(LHS);
+      else if (LHS.isNonNegative() != RHS.isNonNegative() &&
+               Res.isNonNegative() != LHS.isNonNegative())
+        Overflow = true;  // Overflow for signed subtraction.
+      break;
+    case tok::lessequal:
+      Res = LHS <= RHS;
+      Res.setIsUnsigned(false);  // C99 6.5.8p6, result is always int (signed)
+      break;
+    case tok::less:
+      Res = LHS < RHS;
+      Res.setIsUnsigned(false);  // C99 6.5.8p6, result is always int (signed)
+      break;
+    case tok::greaterequal:
+      Res = LHS >= RHS;
+      Res.setIsUnsigned(false);  // C99 6.5.8p6, result is always int (signed)
+      break;
+    case tok::greater:
+      Res = LHS > RHS;
+      Res.setIsUnsigned(false);  // C99 6.5.8p6, result is always int (signed)
+      break;
+    case tok::exclaimequal:
+      Res = LHS != RHS;
+      Res.setIsUnsigned(false);  // C99 6.5.9p3, result is always int (signed)
+      break;
+    case tok::equalequal:
+      Res = LHS == RHS;
+      Res.setIsUnsigned(false);  // C99 6.5.9p3, result is always int (signed)
+      break;
+    case tok::amp:
+      Res = LHS & RHS;
+      break;
+    case tok::caret:
+      Res = LHS ^ RHS;
+      break;
+    case tok::pipe:
+      Res = LHS | RHS;
+      break;
+    case tok::ampamp:
+      Res = (LHS != 0 && RHS != 0);
+      Res.setIsUnsigned(false);  // C99 6.5.13p3, result is always int (signed)
+      break;
+    case tok::pipepipe:
+      Res = (LHS != 0 || RHS != 0);
+      Res.setIsUnsigned(false);  // C99 6.5.14p3, result is always int (signed)
+      break;
+    case tok::comma:
+      PP.Diag(OpToken, diag::ext_pp_comma_expr);
+      Res = RHS; // LHS = LHS,RHS -> RHS.
+      break; 
+    case tok::question: {
+      // Parse the : part of the expression.
+      if (PeekTok.isNot(tok::colon)) {
+        PP.Diag(OpToken, diag::err_pp_question_without_colon);
+        return true;
+      }
+      // Consume the :.
+      PP.LexNonComment(PeekTok);
+
+      // Evaluate the value after the :.
+      bool AfterColonLive = ValueLive && LHS == 0;
+      llvm::APSInt AfterColonVal(LHS.getBitWidth());
+      DefinedTracker DT;
+      if (EvaluateValue(AfterColonVal, PeekTok, DT, AfterColonLive, PP))
+        return true;
+
+      // Parse anything after the : RHS that has a higher precedence than ?.
+      if (EvaluateDirectiveSubExpr(AfterColonVal, ThisPrec+1,
+                                   PeekTok, AfterColonLive, PP))
+        return true;
+      
+      // Now that we have the condition, the LHS and the RHS of the :, evaluate.
+      Res = LHS != 0 ? RHS : AfterColonVal;
+
+      // Usual arithmetic conversions (C99 6.3.1.8p1): result is unsigned if
+      // either operand is unsigned.
+      Res.setIsUnsigned(RHS.isUnsigned() | AfterColonVal.isUnsigned());
+      
+      // Figure out the precedence of the token after the : part.
+      PeekPrec = getPrecedence(PeekTok.getKind());
+      break;
+    }
+    case tok::colon:
+      // Don't allow :'s to float around without being part of ?: exprs.
+      PP.Diag(OpToken, diag::err_pp_colon_without_question);
+      return true;
+    }
+
+    // If this operator is live and overflowed, report the issue.
+    if (Overflow && ValueLive)
+      PP.Diag(OpToken, diag::warn_pp_expr_overflow);
+    
+    // Put the result back into 'LHS' for our next iteration.
+    LHS = Res;
+  }
+  
+  return false;
+}
+
+/// EvaluateDirectiveExpression - Evaluate an integer constant expression that
+/// may occur after a #if or #elif directive.  If the expression is equivalent
+/// to "!defined(X)" return X in IfNDefMacro.
+bool Preprocessor::
+EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) {
+  // Peek ahead one token.
+  Token Tok;
+  Lex(Tok);
+  
+  // C99 6.10.1p3 - All expressions are evaluated as intmax_t or uintmax_t.
+  unsigned BitWidth = getTargetInfo().getIntMaxTWidth();
+    
+  llvm::APSInt ResVal(BitWidth);
+  DefinedTracker DT;
+  if (EvaluateValue(ResVal, Tok, DT, true, *this)) {
+    // Parse error, skip the rest of the macro line.
+    if (Tok.isNot(tok::eom))
+      DiscardUntilEndOfDirective();
+    return false;
+  }
+  
+  // If we are at the end of the expression after just parsing a value, there
+  // must be no (unparenthesized) binary operators involved, so we can exit
+  // directly.
+  if (Tok.is(tok::eom)) {
+    // If the expression we parsed was of the form !defined(macro), return the
+    // macro in IfNDefMacro.
+    if (DT.State == DefinedTracker::NotDefinedMacro)
+      IfNDefMacro = DT.TheMacro;
+    
+    return ResVal != 0;
+  }
+  
+  // Otherwise, we must have a binary operator (e.g. "#if 1 < 2"), so parse the
+  // operator and the stuff after it.
+  if (EvaluateDirectiveSubExpr(ResVal, 1, Tok, true, *this)) {
+    // Parse error, skip the rest of the macro line.
+    if (Tok.isNot(tok::eom))
+      DiscardUntilEndOfDirective();
+    return false;
+  }
+  
+  // If we aren't at the tok::eom token, something bad happened, like an extra
+  // ')' token.
+  if (Tok.isNot(tok::eom)) {
+    Diag(Tok, diag::err_pp_expected_eol);
+    DiscardUntilEndOfDirective();
+  }
+  
+  return ResVal != 0;
+}
+
diff --git a/clang/lib/Lex/PPLexerChange.cpp b/clang/lib/Lex/PPLexerChange.cpp
new file mode 100644
index 00000000000..bd0ff7f94a1
--- /dev/null
+++ b/clang/lib/Lex/PPLexerChange.cpp
@@ -0,0 +1,401 @@
+//===--- PPLexerChange.cpp - Handle changing lexers in the preprocessor ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements pieces of the Preprocessor interface that manage the
+// current lexer stack.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Lex/HeaderSearch.h"
+#include "clang/Lex/MacroInfo.h"
+#include "clang/Lex/PPCallbacks.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/SourceManager.h"
+using namespace clang;
+
+PPCallbacks::~PPCallbacks() {
+}
+
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Methods.
+//===----------------------------------------------------------------------===//
+
+/// isInPrimaryFile - Return true if we're in the top-level file, not in a
+/// #include.  This looks through macro expansions and active _Pragma lexers.
+bool Preprocessor::isInPrimaryFile() const {
+  if (CurLexer && !CurLexer->Is_PragmaLexer)
+    return IncludeMacroStack.empty();
+  
+  // If there are any stacked lexers, we're in a #include.
+  assert(IncludeMacroStack[0].TheLexer &&
+         !IncludeMacroStack[0].TheLexer->Is_PragmaLexer &&
+         "Top level include stack isn't our primary lexer?");
+  for (unsigned i = 1, e = IncludeMacroStack.size(); i != e; ++i)
+    if (IncludeMacroStack[i].TheLexer &&
+        !IncludeMacroStack[i].TheLexer->Is_PragmaLexer)
+      return false;
+  return true;
+}
+
+/// getCurrentLexer - Return the current file lexer being lexed from.  Note
+/// that this ignores any potentially active macro expansions and _Pragma
+/// expansions going on at the time.
+Lexer *Preprocessor::getCurrentFileLexer() const {
+  if (CurLexer && !CurLexer->Is_PragmaLexer) return CurLexer;
+  
+  // Look for a stacked lexer.
+  for (unsigned i = IncludeMacroStack.size(); i != 0; --i) {
+    Lexer *L = IncludeMacroStack[i-1].TheLexer;
+    if (L && !L->Is_PragmaLexer) // Ignore macro & _Pragma expansions.
+      return L;
+  }
+  return 0;
+}
+
+/// LookAhead - This peeks ahead N tokens and returns that token without
+/// consuming any tokens.  LookAhead(0) returns 'Tok', LookAhead(1) returns
+/// the token after Tok, etc.
+///
+/// NOTE: is a relatively expensive method, so it should not be used in common
+/// code paths if possible!
+///
+Token Preprocessor::LookAhead(unsigned N) {
+  // FIXME: Optimize the case where multiple lookahead calls are used back to
+  // back.  Consider if the the parser contained (dynamically):
+  //    Lookahead(1); Lookahead(1); Lookahead(1)
+  // This would return the same token 3 times, but would end up making lots of
+  // token stream lexers to do it.  To handle this common case, see if the top
+  // of the lexer stack is a TokenStreamLexer with macro expansion disabled.  If
+  // so, see if it has 'N' tokens available in it.  If so, just return the
+  // token.
+  
+  // FIXME: Optimize the case when the parser does multiple nearby lookahead
+  // calls.  For example, consider:
+  //   Lookahead(0); Lookahead(1); Lookahead(2);
+  // The previous optimization won't apply, and there won't be any space left in
+  // the array that was previously new'd.  To handle this, always round up the
+  // size we new to a multiple of 16 tokens.  If the previous buffer has space
+  // left, we can just grow it.  This means we only have to do the new 1/16th as
+  // often.
+  
+  Token *LookaheadTokens = new Token[N];
+
+  // Read N+1 tokens into LookaheadTokens.  After this loop, Tok is the token
+  // to return.
+  Token Tok;
+  unsigned NumTokens = 0;
+  for (; N != ~0U; --N, ++NumTokens) {
+    Lex(Tok);
+    LookaheadTokens[NumTokens] = Tok;
+    
+    // If we got to EOF, don't lex past it.  This will cause LookAhead to return
+    // the EOF token.
+    if (Tok.is(tok::eof))
+      break;
+  }
+
+  // Okay, at this point, we have the token we want to return in Tok.  However,
+  // we read it and a bunch of other stuff (in LookaheadTokens) that we must
+  // allow subsequent calls to 'Lex' to return.  To do this, we push a new token
+  // lexer onto the lexer stack with the tokens we read here.  This passes
+  // ownership of LookaheadTokens to EnterTokenStream.
+  //
+  // Note that we disable macro expansion of the tokens from this buffer, since
+  // any macros have already been expanded, and the internal preprocessor state
+  // may already read past new macros.  Consider something like LookAhead(1) on
+  //      X
+  //      #define X 14
+  //      Y
+  // The lookahead call should return 'Y', and the next Lex call should return
+  // 'X' even though X -> 14 has already been entered as a macro.
+  //
+  EnterTokenStream(LookaheadTokens, NumTokens, true /*DisableExpansion*/,
+                   true /*OwnsTokens*/);
+  return Tok;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Methods for Entering and Callbacks for leaving various contexts
+//===----------------------------------------------------------------------===//
+
+/// EnterSourceFile - Add a source file to the top of the include stack and
+/// start lexing tokens from it instead of the current buffer.  Return true
+/// on failure.
+void Preprocessor::EnterSourceFile(unsigned FileID,
+                                   const DirectoryLookup *CurDir) {
+  assert(CurTokenLexer == 0 && "Cannot #include a file inside a macro!");
+  ++NumEnteredSourceFiles;
+  
+  if (MaxIncludeStackDepth < IncludeMacroStack.size())
+    MaxIncludeStackDepth = IncludeMacroStack.size();
+
+  Lexer *TheLexer = new Lexer(SourceLocation::getFileLoc(FileID, 0), *this);
+  EnterSourceFileWithLexer(TheLexer, CurDir);
+}  
+  
+/// EnterSourceFile - Add a source file to the top of the include stack and
+/// start lexing tokens from it instead of the current buffer.
+void Preprocessor::EnterSourceFileWithLexer(Lexer *TheLexer, 
+                                            const DirectoryLookup *CurDir) {
+    
+  // Add the current lexer to the include stack.
+  if (CurLexer || CurTokenLexer)
+    IncludeMacroStack.push_back(IncludeStackInfo(CurLexer, CurDirLookup,
+                                                 CurTokenLexer));
+  
+  CurLexer = TheLexer;
+  CurDirLookup = CurDir;
+  CurTokenLexer = 0;
+  
+  // Notify the client, if desired, that we are in a new source file.
+  if (Callbacks && !CurLexer->Is_PragmaLexer) {
+    DirectoryLookup::DirType FileType = DirectoryLookup::NormalHeaderDir;
+    
+    // Get the file entry for the current file.
+    if (const FileEntry *FE = 
+           SourceMgr.getFileEntryForLoc(CurLexer->getFileLoc()))
+      FileType = HeaderInfo.getFileDirFlavor(FE);
+    
+    Callbacks->FileChanged(CurLexer->getFileLoc(),
+                           PPCallbacks::EnterFile, FileType);
+  }
+}
+
+
+
+/// EnterMacro - Add a Macro to the top of the include stack and start lexing
+/// tokens from it instead of the current buffer.
+void Preprocessor::EnterMacro(Token &Tok, MacroArgs *Args) {
+  IncludeMacroStack.push_back(IncludeStackInfo(CurLexer, CurDirLookup,
+                                               CurTokenLexer));
+  CurLexer     = 0;
+  CurDirLookup = 0;
+  
+  if (NumCachedTokenLexers == 0) {
+    CurTokenLexer = new TokenLexer(Tok, Args, *this);
+  } else {
+    CurTokenLexer = TokenLexerCache[--NumCachedTokenLexers];
+    CurTokenLexer->Init(Tok, Args);
+  }
+}
+
+/// EnterTokenStream - Add a "macro" context to the top of the include stack,
+/// which will cause the lexer to start returning the specified tokens.
+///
+/// If DisableMacroExpansion is true, tokens lexed from the token stream will
+/// not be subject to further macro expansion.  Otherwise, these tokens will
+/// be re-macro-expanded when/if expansion is enabled.
+///
+/// If OwnsTokens is false, this method assumes that the specified stream of
+/// tokens has a permanent owner somewhere, so they do not need to be copied.
+/// If it is true, it assumes the array of tokens is allocated with new[] and
+/// must be freed.
+///
+void Preprocessor::EnterTokenStream(const Token *Toks, unsigned NumToks,
+                                    bool DisableMacroExpansion,
+                                    bool OwnsTokens) {
+  // Save our current state.
+  IncludeMacroStack.push_back(IncludeStackInfo(CurLexer, CurDirLookup,
+                                               CurTokenLexer));
+  CurLexer     = 0;
+  CurDirLookup = 0;
+
+  // Create a macro expander to expand from the specified token stream.
+  if (NumCachedTokenLexers == 0) {
+    CurTokenLexer = new TokenLexer(Toks, NumToks, DisableMacroExpansion,
+                                   OwnsTokens, *this);
+  } else {
+    CurTokenLexer = TokenLexerCache[--NumCachedTokenLexers];
+    CurTokenLexer->Init(Toks, NumToks, DisableMacroExpansion, OwnsTokens);
+  }
+}
+
+/// HandleEndOfFile - This callback is invoked when the lexer hits the end of
+/// the current file.  This either returns the EOF token or pops a level off
+/// the include stack and keeps going.
+bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) {
+  assert(!CurTokenLexer &&
+         "Ending a file when currently in a macro!");
+  
+  // See if this file had a controlling macro.
+  if (CurLexer) {  // Not ending a macro, ignore it.
+    if (const IdentifierInfo *ControllingMacro = 
+          CurLexer->MIOpt.GetControllingMacroAtEndOfFile()) {
+      // Okay, this has a controlling macro, remember in PerFileInfo.
+      if (const FileEntry *FE = 
+            SourceMgr.getFileEntryForLoc(CurLexer->getFileLoc()))
+        HeaderInfo.SetFileControllingMacro(FE, ControllingMacro);
+    }
+  }
+  
+  // If this is a #include'd file, pop it off the include stack and continue
+  // lexing the #includer file.
+  if (!IncludeMacroStack.empty()) {
+    // We're done with the #included file.
+    RemoveTopOfLexerStack();
+
+    // Notify the client, if desired, that we are in a new source file.
+    if (Callbacks && !isEndOfMacro && CurLexer) {
+      DirectoryLookup::DirType FileType = DirectoryLookup::NormalHeaderDir;
+      
+      // Get the file entry for the current file.
+      if (const FileEntry *FE = 
+            SourceMgr.getFileEntryForLoc(CurLexer->getFileLoc()))
+        FileType = HeaderInfo.getFileDirFlavor(FE);
+
+      Callbacks->FileChanged(CurLexer->getSourceLocation(CurLexer->BufferPtr),
+                             PPCallbacks::ExitFile, FileType);
+    }
+
+    // Client should lex another token.
+    return false;
+  }
+
+  // If the file ends with a newline, form the EOF token on the newline itself,
+  // rather than "on the line following it", which doesn't exist.  This makes
+  // diagnostics relating to the end of file include the last file that the user
+  // actually typed, which is goodness.
+  const char *EndPos = CurLexer->BufferEnd;
+  if (EndPos != CurLexer->BufferStart && 
+      (EndPos[-1] == '\n' || EndPos[-1] == '\r')) {
+    --EndPos;
+    
+    // Handle \n\r and \r\n:
+    if (EndPos != CurLexer->BufferStart && 
+        (EndPos[-1] == '\n' || EndPos[-1] == '\r') &&
+        EndPos[-1] != EndPos[0])
+      --EndPos;
+  }
+  
+  Result.startToken();
+  CurLexer->BufferPtr = EndPos;
+  CurLexer->FormTokenWithChars(Result, EndPos);
+  Result.setKind(tok::eof);
+  
+  // We're done with the #included file.
+  delete CurLexer;
+  CurLexer = 0;
+
+  // This is the end of the top-level file.  If the diag::pp_macro_not_used
+  // diagnostic is enabled, look for macros that have not been used.
+  if (Diags.getDiagnosticLevel(diag::pp_macro_not_used) != Diagnostic::Ignored){
+    for (llvm::DenseMap<IdentifierInfo*, MacroInfo*>::iterator I =
+         Macros.begin(), E = Macros.end(); I != E; ++I) {
+      if (!I->second->isUsed())
+        Diag(I->second->getDefinitionLoc(), diag::pp_macro_not_used);
+    }
+  }
+  return true;
+}
+
+/// HandleEndOfTokenLexer - This callback is invoked when the current TokenLexer
+/// hits the end of its token stream.
+bool Preprocessor::HandleEndOfTokenLexer(Token &Result) {
+  assert(CurTokenLexer && !CurLexer &&
+         "Ending a macro when currently in a #include file!");
+
+  // Delete or cache the now-dead macro expander.
+  if (NumCachedTokenLexers == TokenLexerCacheSize)
+    delete CurTokenLexer;
+  else
+    TokenLexerCache[NumCachedTokenLexers++] = CurTokenLexer;
+
+  // Handle this like a #include file being popped off the stack.
+  CurTokenLexer = 0;
+  return HandleEndOfFile(Result, true);
+}
+
+/// RemoveTopOfLexerStack - Pop the current lexer/macro exp off the top of the
+/// lexer stack.  This should only be used in situations where the current
+/// state of the top-of-stack lexer is unknown.
+void Preprocessor::RemoveTopOfLexerStack() {
+  assert(!IncludeMacroStack.empty() && "Ran out of stack entries to load");
+  
+  if (CurTokenLexer) {
+    // Delete or cache the now-dead macro expander.
+    if (NumCachedTokenLexers == TokenLexerCacheSize)
+      delete CurTokenLexer;
+    else
+      TokenLexerCache[NumCachedTokenLexers++] = CurTokenLexer;
+  } else {
+    delete CurLexer;
+  }
+  CurLexer      = IncludeMacroStack.back().TheLexer;
+  CurDirLookup  = IncludeMacroStack.back().TheDirLookup;
+  CurTokenLexer = IncludeMacroStack.back().TheTokenLexer;
+  IncludeMacroStack.pop_back();
+}
+
+/// HandleMicrosoftCommentPaste - When the macro expander pastes together a
+/// comment (/##/) in microsoft mode, this method handles updating the current
+/// state, returning the token on the next source line.
+void Preprocessor::HandleMicrosoftCommentPaste(Token &Tok) {
+  assert(CurTokenLexer && !CurLexer &&
+         "Pasted comment can only be formed from macro");
+  
+  // We handle this by scanning for the closest real lexer, switching it to
+  // raw mode and preprocessor mode.  This will cause it to return \n as an
+  // explicit EOM token.
+  Lexer *FoundLexer = 0;
+  bool LexerWasInPPMode = false;
+  for (unsigned i = 0, e = IncludeMacroStack.size(); i != e; ++i) {
+    IncludeStackInfo &ISI = *(IncludeMacroStack.end()-i-1);
+    if (ISI.TheLexer == 0) continue;  // Scan for a real lexer.
+    
+    // Once we find a real lexer, mark it as raw mode (disabling macro
+    // expansions) and preprocessor mode (return EOM).  We know that the lexer
+    // was *not* in raw mode before, because the macro that the comment came
+    // from was expanded.  However, it could have already been in preprocessor
+    // mode (#if COMMENT) in which case we have to return it to that mode and
+    // return EOM.
+    FoundLexer = ISI.TheLexer;
+    FoundLexer->LexingRawMode = true;
+    LexerWasInPPMode = FoundLexer->ParsingPreprocessorDirective;
+    FoundLexer->ParsingPreprocessorDirective = true;
+    break;
+  }
+  
+  // Okay, we either found and switched over the lexer, or we didn't find a
+  // lexer.  In either case, finish off the macro the comment came from, getting
+  // the next token.
+  if (!HandleEndOfTokenLexer(Tok)) Lex(Tok);
+  
+  // Discarding comments as long as we don't have EOF or EOM.  This 'comments
+  // out' the rest of the line, including any tokens that came from other macros
+  // that were active, as in:
+  //  #define submacro a COMMENT b
+  //    submacro c
+  // which should lex to 'a' only: 'b' and 'c' should be removed.
+  while (Tok.isNot(tok::eom) && Tok.isNot(tok::eof))
+    Lex(Tok);
+  
+  // If we got an eom token, then we successfully found the end of the line.
+  if (Tok.is(tok::eom)) {
+    assert(FoundLexer && "Can't get end of line without an active lexer");
+    // Restore the lexer back to normal mode instead of raw mode.
+    FoundLexer->LexingRawMode = false;
+    
+    // If the lexer was already in preprocessor mode, just return the EOM token
+    // to finish the preprocessor line.
+    if (LexerWasInPPMode) return;
+    
+    // Otherwise, switch out of PP mode and return the next lexed token.
+    FoundLexer->ParsingPreprocessorDirective = false;
+    return Lex(Tok);
+  }
+  
+  // If we got an EOF token, then we reached the end of the token stream but
+  // didn't find an explicit \n.  This can only happen if there was no lexer
+  // active (an active lexer would return EOM at EOF if there was no \n in
+  // preprocessor directive mode), so just return EOF as our token.
+  assert(!FoundLexer && "Lexer should return EOM before EOF in PP mode");
+}
diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp
new file mode 100644
index 00000000000..8218d0ac06e
--- /dev/null
+++ b/clang/lib/Lex/PPMacroExpansion.cpp
@@ -0,0 +1,523 @@
+//===--- MacroExpansion.cpp - Top level Macro Expansion -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the top level handling of macro expasion for the
+// preprocessor.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/Preprocessor.h"
+#include "MacroArgs.h"
+#include "clang/Lex/MacroInfo.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/Diagnostic.h"
+using namespace clang;
+
+/// setMacroInfo - Specify a macro for this identifier.
+///
+void Preprocessor::setMacroInfo(IdentifierInfo *II, MacroInfo *MI) {
+  if (MI == 0) {
+    if (II->hasMacroDefinition()) {
+      Macros.erase(II);
+      II->setHasMacroDefinition(false);
+    }
+  } else {
+    Macros[II] = MI;
+    II->setHasMacroDefinition(true);
+  }
+}
+
+/// RegisterBuiltinMacro - Register the specified identifier in the identifier
+/// table and mark it as a builtin macro to be expanded.
+IdentifierInfo *Preprocessor::RegisterBuiltinMacro(const char *Name) {
+  // Get the identifier.
+  IdentifierInfo *Id = getIdentifierInfo(Name);
+  
+  // Mark it as being a macro that is builtin.
+  MacroInfo *MI = new MacroInfo(SourceLocation());
+  MI->setIsBuiltinMacro();
+  setMacroInfo(Id, MI);
+  return Id;
+}
+
+
+/// RegisterBuiltinMacros - Register builtin macros, such as __LINE__ with the
+/// identifier table.
+void Preprocessor::RegisterBuiltinMacros() {
+  Ident__LINE__ = RegisterBuiltinMacro("__LINE__");
+  Ident__FILE__ = RegisterBuiltinMacro("__FILE__");
+  Ident__DATE__ = RegisterBuiltinMacro("__DATE__");
+  Ident__TIME__ = RegisterBuiltinMacro("__TIME__");
+  Ident_Pragma  = RegisterBuiltinMacro("_Pragma");
+  
+  // GCC Extensions.
+  Ident__BASE_FILE__     = RegisterBuiltinMacro("__BASE_FILE__");
+  Ident__INCLUDE_LEVEL__ = RegisterBuiltinMacro("__INCLUDE_LEVEL__");
+  Ident__TIMESTAMP__     = RegisterBuiltinMacro("__TIMESTAMP__");
+}
+
+/// isTrivialSingleTokenExpansion - Return true if MI, which has a single token
+/// in its expansion, currently expands to that token literally.
+static bool isTrivialSingleTokenExpansion(const MacroInfo *MI,
+                                          const IdentifierInfo *MacroIdent,
+                                          Preprocessor &PP) {
+  IdentifierInfo *II = MI->getReplacementToken(0).getIdentifierInfo();
+
+  // If the token isn't an identifier, it's always literally expanded.
+  if (II == 0) return true;
+  
+  // If the identifier is a macro, and if that macro is enabled, it may be
+  // expanded so it's not a trivial expansion.
+  if (II->hasMacroDefinition() && PP.getMacroInfo(II)->isEnabled() &&
+      // Fast expanding "#define X X" is ok, because X would be disabled.
+      II != MacroIdent)
+    return false;
+  
+  // If this is an object-like macro invocation, it is safe to trivially expand
+  // it.
+  if (MI->isObjectLike()) return true;
+
+  // If this is a function-like macro invocation, it's safe to trivially expand
+  // as long as the identifier is not a macro argument.
+  for (MacroInfo::arg_iterator I = MI->arg_begin(), E = MI->arg_end();
+       I != E; ++I)
+    if (*I == II)
+      return false;   // Identifier is a macro argument.
+  
+  return true;
+}
+
+
+/// isNextPPTokenLParen - Determine whether the next preprocessor token to be
+/// lexed is a '('.  If so, consume the token and return true, if not, this
+/// method should have no observable side-effect on the lexed tokens.
+bool Preprocessor::isNextPPTokenLParen() {
+  // Do some quick tests for rejection cases.
+  unsigned Val;
+  if (CurLexer)
+    Val = CurLexer->isNextPPTokenLParen();
+  else
+    Val = CurTokenLexer->isNextTokenLParen();
+  
+  if (Val == 2) {
+    // We have run off the end.  If it's a source file we don't
+    // examine enclosing ones (C99 5.1.1.2p4).  Otherwise walk up the
+    // macro stack.
+    if (CurLexer)
+      return false;
+    for (unsigned i = IncludeMacroStack.size(); i != 0; --i) {
+      IncludeStackInfo &Entry = IncludeMacroStack[i-1];
+      if (Entry.TheLexer)
+        Val = Entry.TheLexer->isNextPPTokenLParen();
+      else
+        Val = Entry.TheTokenLexer->isNextTokenLParen();
+      
+      if (Val != 2)
+        break;
+      
+      // Ran off the end of a source file?
+      if (Entry.TheLexer)
+        return false;
+    }
+  }
+
+  // Okay, if we know that the token is a '(', lex it and return.  Otherwise we
+  // have found something that isn't a '(' or we found the end of the
+  // translation unit.  In either case, return false.
+  if (Val != 1)
+    return false;
+  
+  Token Tok;
+  LexUnexpandedToken(Tok);
+  assert(Tok.is(tok::l_paren) && "Error computing l-paren-ness?");
+  return true;
+}
+
+/// HandleMacroExpandedIdentifier - If an identifier token is read that is to be
+/// expanded as a macro, handle it and return the next token as 'Identifier'.
+bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier, 
+                                                 MacroInfo *MI) {
+  // If this is a macro exapnsion in the "#if !defined(x)" line for the file,
+  // then the macro could expand to different things in other contexts, we need
+  // to disable the optimization in this case.
+  if (CurLexer) CurLexer->MIOpt.ExpandedMacro();
+  
+  // If this is a builtin macro, like __LINE__ or _Pragma, handle it specially.
+  if (MI->isBuiltinMacro()) {
+    ExpandBuiltinMacro(Identifier);
+    return false;
+  }
+  
+  /// Args - If this is a function-like macro expansion, this contains,
+  /// for each macro argument, the list of tokens that were provided to the
+  /// invocation.
+  MacroArgs *Args = 0;
+  
+  // If this is a function-like macro, read the arguments.
+  if (MI->isFunctionLike()) {
+    // C99 6.10.3p10: If the preprocessing token immediately after the the macro
+    // name isn't a '(', this macro should not be expanded.  Otherwise, consume
+    // it.
+    if (!isNextPPTokenLParen())
+      return true;
+    
+    // Remember that we are now parsing the arguments to a macro invocation.
+    // Preprocessor directives used inside macro arguments are not portable, and
+    // this enables the warning.
+    InMacroArgs = true;
+    Args = ReadFunctionLikeMacroArgs(Identifier, MI);
+    
+    // Finished parsing args.
+    InMacroArgs = false;
+    
+    // If there was an error parsing the arguments, bail out.
+    if (Args == 0) return false;
+    
+    ++NumFnMacroExpanded;
+  } else {
+    ++NumMacroExpanded;
+  }
+  
+  // Notice that this macro has been used.
+  MI->setIsUsed(true);
+  
+  // If we started lexing a macro, enter the macro expansion body.
+  
+  // If this macro expands to no tokens, don't bother to push it onto the
+  // expansion stack, only to take it right back off.
+  if (MI->getNumTokens() == 0) {
+    // No need for arg info.
+    if (Args) Args->destroy();
+    
+    // Ignore this macro use, just return the next token in the current
+    // buffer.
+    bool HadLeadingSpace = Identifier.hasLeadingSpace();
+    bool IsAtStartOfLine = Identifier.isAtStartOfLine();
+    
+    Lex(Identifier);
+    
+    // If the identifier isn't on some OTHER line, inherit the leading
+    // whitespace/first-on-a-line property of this token.  This handles
+    // stuff like "! XX," -> "! ," and "   XX," -> "    ,", when XX is
+    // empty.
+    if (!Identifier.isAtStartOfLine()) {
+      if (IsAtStartOfLine) Identifier.setFlag(Token::StartOfLine);
+      if (HadLeadingSpace) Identifier.setFlag(Token::LeadingSpace);
+    }
+    ++NumFastMacroExpanded;
+    return false;
+    
+  } else if (MI->getNumTokens() == 1 &&
+             isTrivialSingleTokenExpansion(MI, Identifier.getIdentifierInfo(),
+                                           *this)){
+    // Otherwise, if this macro expands into a single trivially-expanded
+    // token: expand it now.  This handles common cases like 
+    // "#define VAL 42".
+    
+    // Propagate the isAtStartOfLine/hasLeadingSpace markers of the macro
+    // identifier to the expanded token.
+    bool isAtStartOfLine = Identifier.isAtStartOfLine();
+    bool hasLeadingSpace = Identifier.hasLeadingSpace();
+    
+    // Remember where the token is instantiated.
+    SourceLocation InstantiateLoc = Identifier.getLocation();
+    
+    // Replace the result token.
+    Identifier = MI->getReplacementToken(0);
+    
+    // Restore the StartOfLine/LeadingSpace markers.
+    Identifier.setFlagValue(Token::StartOfLine , isAtStartOfLine);
+    Identifier.setFlagValue(Token::LeadingSpace, hasLeadingSpace);
+    
+    // Update the tokens location to include both its logical and physical
+    // locations.
+    SourceLocation Loc =
+      SourceMgr.getInstantiationLoc(Identifier.getLocation(), InstantiateLoc);
+    Identifier.setLocation(Loc);
+    
+    // If this is #define X X, we must mark the result as unexpandible.
+    if (IdentifierInfo *NewII = Identifier.getIdentifierInfo())
+      if (getMacroInfo(NewII) == MI)
+        Identifier.setFlag(Token::DisableExpand);
+    
+    // Since this is not an identifier token, it can't be macro expanded, so
+    // we're done.
+    ++NumFastMacroExpanded;
+    return false;
+  }
+  
+  // Start expanding the macro.
+  EnterMacro(Identifier, Args);
+  
+  // Now that the macro is at the top of the include stack, ask the
+  // preprocessor to read the next token from it.
+  Lex(Identifier);
+  return false;
+}
+
+/// ReadFunctionLikeMacroArgs - After reading "MACRO(", this method is
+/// invoked to read all of the actual arguments specified for the macro
+/// invocation.  This returns null on error.
+MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName,
+                                                   MacroInfo *MI) {
+  // The number of fixed arguments to parse.
+  unsigned NumFixedArgsLeft = MI->getNumArgs();
+  bool isVariadic = MI->isVariadic();
+  
+  // Outer loop, while there are more arguments, keep reading them.
+  Token Tok;
+  Tok.setKind(tok::comma);
+  --NumFixedArgsLeft;  // Start reading the first arg.
+
+  // ArgTokens - Build up a list of tokens that make up each argument.  Each
+  // argument is separated by an EOF token.  Use a SmallVector so we can avoid
+  // heap allocations in the common case.
+  llvm::SmallVector<Token, 64> ArgTokens;
+
+  unsigned NumActuals = 0;
+  while (Tok.is(tok::comma)) {
+    // C99 6.10.3p11: Keep track of the number of l_parens we have seen.  Note
+    // that we already consumed the first one.
+    unsigned NumParens = 0;
+    
+    while (1) {
+      // Read arguments as unexpanded tokens.  This avoids issues, e.g., where
+      // an argument value in a macro could expand to ',' or '(' or ')'.
+      LexUnexpandedToken(Tok);
+      
+      if (Tok.is(tok::eof) || Tok.is(tok::eom)) { // "#if f(<eof>" & "#if f(\n"
+        Diag(MacroName, diag::err_unterm_macro_invoc);
+        // Do not lose the EOF/EOM.  Return it to the client.
+        MacroName = Tok;
+        return 0;
+      } else if (Tok.is(tok::r_paren)) {
+        // If we found the ) token, the macro arg list is done.
+        if (NumParens-- == 0)
+          break;
+      } else if (Tok.is(tok::l_paren)) {
+        ++NumParens;
+      } else if (Tok.is(tok::comma) && NumParens == 0) {
+        // Comma ends this argument if there are more fixed arguments expected.
+        if (NumFixedArgsLeft)
+          break;
+        
+        // If this is not a variadic macro, too many args were specified.
+        if (!isVariadic) {
+          // Emit the diagnostic at the macro name in case there is a missing ).
+          // Emitting it at the , could be far away from the macro name.
+          Diag(MacroName, diag::err_too_many_args_in_macro_invoc);
+          return 0;
+        }
+        // Otherwise, continue to add the tokens to this variable argument.
+      } else if (Tok.is(tok::comment) && !KeepMacroComments) {
+        // If this is a comment token in the argument list and we're just in
+        // -C mode (not -CC mode), discard the comment.
+        continue;
+      } else if (Tok.is(tok::identifier)) {
+        // Reading macro arguments can cause macros that we are currently
+        // expanding from to be popped off the expansion stack.  Doing so causes
+        // them to be reenabled for expansion.  Here we record whether any
+        // identifiers we lex as macro arguments correspond to disabled macros.
+        // If so, we mark the token as noexpand.  This is a subtle aspect of 
+        // C99 6.10.3.4p2.
+        if (MacroInfo *MI = getMacroInfo(Tok.getIdentifierInfo()))
+          if (!MI->isEnabled())
+            Tok.setFlag(Token::DisableExpand);
+      }
+  
+      ArgTokens.push_back(Tok);
+    }
+
+    // Empty arguments are standard in C99 and supported as an extension in
+    // other modes.
+    if (ArgTokens.empty() && !Features.C99)
+      Diag(Tok, diag::ext_empty_fnmacro_arg);
+    
+    // Add a marker EOF token to the end of the token list for this argument.
+    Token EOFTok;
+    EOFTok.startToken();
+    EOFTok.setKind(tok::eof);
+    EOFTok.setLocation(Tok.getLocation());
+    EOFTok.setLength(0);
+    ArgTokens.push_back(EOFTok);
+    ++NumActuals;
+    --NumFixedArgsLeft;
+  };
+  
+  // Okay, we either found the r_paren.  Check to see if we parsed too few
+  // arguments.
+  unsigned MinArgsExpected = MI->getNumArgs();
+  
+  // See MacroArgs instance var for description of this.
+  bool isVarargsElided = false;
+  
+  if (NumActuals < MinArgsExpected) {
+    // There are several cases where too few arguments is ok, handle them now.
+    if (NumActuals+1 == MinArgsExpected && MI->isVariadic()) {
+      // Varargs where the named vararg parameter is missing: ok as extension.
+      // #define A(x, ...)
+      // A("blah")
+      Diag(Tok, diag::ext_missing_varargs_arg);
+
+      // Remember this occurred if this is a C99 macro invocation with at least
+      // one actual argument.
+      isVarargsElided = MI->isC99Varargs() && MI->getNumArgs() > 1;
+    } else if (MI->getNumArgs() == 1) {
+      // #define A(x)
+      //   A()
+      // is ok because it is an empty argument.
+      
+      // Empty arguments are standard in C99 and supported as an extension in
+      // other modes.
+      if (ArgTokens.empty() && !Features.C99)
+        Diag(Tok, diag::ext_empty_fnmacro_arg);
+    } else {
+      // Otherwise, emit the error.
+      Diag(Tok, diag::err_too_few_args_in_macro_invoc);
+      return 0;
+    }
+    
+    // Add a marker EOF token to the end of the token list for this argument.
+    SourceLocation EndLoc = Tok.getLocation();
+    Tok.startToken();
+    Tok.setKind(tok::eof);
+    Tok.setLocation(EndLoc);
+    Tok.setLength(0);
+    ArgTokens.push_back(Tok);
+  }
+  
+  return MacroArgs::create(MI, &ArgTokens[0], ArgTokens.size(),isVarargsElided);
+}
+
+/// ComputeDATE_TIME - Compute the current time, enter it into the specified
+/// scratch buffer, then return DATELoc/TIMELoc locations with the position of
+/// the identifier tokens inserted.
+static void ComputeDATE_TIME(SourceLocation &DATELoc, SourceLocation &TIMELoc,
+                             Preprocessor &PP) {
+  time_t TT = time(0);
+  struct tm *TM = localtime(&TT);
+  
+  static const char * const Months[] = {
+    "Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"
+  };
+  
+  char TmpBuffer[100];
+  sprintf(TmpBuffer, "\"%s %2d %4d\"", Months[TM->tm_mon], TM->tm_mday, 
+          TM->tm_year+1900);
+  DATELoc = PP.CreateString(TmpBuffer, strlen(TmpBuffer));
+
+  sprintf(TmpBuffer, "\"%02d:%02d:%02d\"", TM->tm_hour, TM->tm_min, TM->tm_sec);
+  TIMELoc = PP.CreateString(TmpBuffer, strlen(TmpBuffer));
+}
+
+/// ExpandBuiltinMacro - If an identifier token is read that is to be expanded
+/// as a builtin macro, handle it and return the next token as 'Tok'.
+void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
+  // Figure out which token this is.
+  IdentifierInfo *II = Tok.getIdentifierInfo();
+  assert(II && "Can't be a macro without id info!");
+  
+  // If this is an _Pragma directive, expand it, invoke the pragma handler, then
+  // lex the token after it.
+  if (II == Ident_Pragma)
+    return Handle_Pragma(Tok);
+  
+  ++NumBuiltinMacroExpanded;
+
+  char TmpBuffer[100];
+
+  // Set up the return result.
+  Tok.setIdentifierInfo(0);
+  Tok.clearFlag(Token::NeedsCleaning);
+  
+  if (II == Ident__LINE__) {
+    // __LINE__ expands to a simple numeric value.
+    sprintf(TmpBuffer, "%u", SourceMgr.getLogicalLineNumber(Tok.getLocation()));
+    unsigned Length = strlen(TmpBuffer);
+    Tok.setKind(tok::numeric_constant);
+    Tok.setLength(Length);
+    Tok.setLocation(CreateString(TmpBuffer, Length, Tok.getLocation()));
+  } else if (II == Ident__FILE__ || II == Ident__BASE_FILE__) {
+    SourceLocation Loc = Tok.getLocation();
+    if (II == Ident__BASE_FILE__) {
+      Diag(Tok, diag::ext_pp_base_file);
+      SourceLocation NextLoc = SourceMgr.getIncludeLoc(Loc);
+      while (NextLoc.isValid()) {
+        Loc = NextLoc;
+        NextLoc = SourceMgr.getIncludeLoc(Loc);
+      }
+    }
+    
+    // Escape this filename.  Turn '\' -> '\\' '"' -> '\"'
+    std::string FN = SourceMgr.getSourceName(SourceMgr.getLogicalLoc(Loc));
+    FN = '"' + Lexer::Stringify(FN) + '"';
+    Tok.setKind(tok::string_literal);
+    Tok.setLength(FN.size());
+    Tok.setLocation(CreateString(&FN[0], FN.size(), Tok.getLocation()));
+  } else if (II == Ident__DATE__) {
+    if (!DATELoc.isValid())
+      ComputeDATE_TIME(DATELoc, TIMELoc, *this);
+    Tok.setKind(tok::string_literal);
+    Tok.setLength(strlen("\"Mmm dd yyyy\""));
+    Tok.setLocation(SourceMgr.getInstantiationLoc(DATELoc, Tok.getLocation()));
+  } else if (II == Ident__TIME__) {
+    if (!TIMELoc.isValid())
+      ComputeDATE_TIME(DATELoc, TIMELoc, *this);
+    Tok.setKind(tok::string_literal);
+    Tok.setLength(strlen("\"hh:mm:ss\""));
+    Tok.setLocation(SourceMgr.getInstantiationLoc(TIMELoc, Tok.getLocation()));
+  } else if (II == Ident__INCLUDE_LEVEL__) {
+    Diag(Tok, diag::ext_pp_include_level);
+
+    // Compute the include depth of this token.
+    unsigned Depth = 0;
+    SourceLocation Loc = SourceMgr.getIncludeLoc(Tok.getLocation());
+    for (; Loc.isValid(); ++Depth)
+      Loc = SourceMgr.getIncludeLoc(Loc);
+    
+    // __INCLUDE_LEVEL__ expands to a simple numeric value.
+    sprintf(TmpBuffer, "%u", Depth);
+    unsigned Length = strlen(TmpBuffer);
+    Tok.setKind(tok::numeric_constant);
+    Tok.setLength(Length);
+    Tok.setLocation(CreateString(TmpBuffer, Length, Tok.getLocation()));
+  } else if (II == Ident__TIMESTAMP__) {
+    // MSVC, ICC, GCC, VisualAge C++ extension.  The generated string should be
+    // of the form "Ddd Mmm dd hh::mm::ss yyyy", which is returned by asctime.
+    Diag(Tok, diag::ext_pp_timestamp);
+
+    // Get the file that we are lexing out of.  If we're currently lexing from
+    // a macro, dig into the include stack.
+    const FileEntry *CurFile = 0;
+    Lexer *TheLexer = getCurrentFileLexer();
+    
+    if (TheLexer)
+      CurFile = SourceMgr.getFileEntryForLoc(TheLexer->getFileLoc());
+    
+    // If this file is older than the file it depends on, emit a diagnostic.
+    const char *Result;
+    if (CurFile) {
+      time_t TT = CurFile->getModificationTime();
+      struct tm *TM = localtime(&TT);
+      Result = asctime(TM);
+    } else {
+      Result = "??? ??? ?? ??:??:?? ????\n";
+    }
+    TmpBuffer[0] = '"';
+    strcpy(TmpBuffer+1, Result);
+    unsigned Len = strlen(TmpBuffer);
+    TmpBuffer[Len-1] = '"';  // Replace the newline with a quote.
+    Tok.setKind(tok::string_literal);
+    Tok.setLength(Len);
+    Tok.setLocation(CreateString(TmpBuffer, Len, Tok.getLocation()));
+  } else {
+    assert(0 && "Unknown identifier!");
+  }
+}
diff --git a/clang/lib/Lex/Pragma.cpp b/clang/lib/Lex/Pragma.cpp
new file mode 100644
index 00000000000..08ad1cf1d2f
--- /dev/null
+++ b/clang/lib/Lex/Pragma.cpp
@@ -0,0 +1,386 @@
+//===--- Pragma.cpp - Pragma registration and handling --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PragmaHandler/PragmaTable interfaces and implements
+// pragma related methods of the Preprocessor class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/Pragma.h"
+#include "clang/Lex/PPCallbacks.h"
+#include "clang/Lex/HeaderSearch.h"
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/SourceManager.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace clang;
+
+// Out-of-line destructor to provide a home for the class.
+PragmaHandler::~PragmaHandler() {
+}
+
+//===----------------------------------------------------------------------===//
+// PragmaNamespace Implementation.
+//===----------------------------------------------------------------------===//
+
+
+PragmaNamespace::~PragmaNamespace() {
+  for (unsigned i = 0, e = Handlers.size(); i != e; ++i)
+    delete Handlers[i];
+}
+
+/// FindHandler - Check to see if there is already a handler for the
+/// specified name.  If not, return the handler for the null identifier if it
+/// exists, otherwise return null.  If IgnoreNull is true (the default) then
+/// the null handler isn't returned on failure to match.
+PragmaHandler *PragmaNamespace::FindHandler(const IdentifierInfo *Name,
+                                            bool IgnoreNull) const {
+  PragmaHandler *NullHandler = 0;
+  for (unsigned i = 0, e = Handlers.size(); i != e; ++i) {
+    if (Handlers[i]->getName() == Name) 
+      return Handlers[i];
+    
+    if (Handlers[i]->getName() == 0)
+      NullHandler = Handlers[i];
+  }
+  return IgnoreNull ? 0 : NullHandler;
+}
+
+void PragmaNamespace::HandlePragma(Preprocessor &PP, Token &Tok) {
+  // Read the 'namespace' that the directive is in, e.g. STDC.  Do not macro
+  // expand it, the user can have a STDC #define, that should not affect this.
+  PP.LexUnexpandedToken(Tok);
+  
+  // Get the handler for this token.  If there is no handler, ignore the pragma.
+  PragmaHandler *Handler = FindHandler(Tok.getIdentifierInfo(), false);
+  if (Handler == 0) return;
+  
+  // Otherwise, pass it down.
+  Handler->HandlePragma(PP, Tok);
+}
+
+//===----------------------------------------------------------------------===//
+// Preprocessor Pragma Directive Handling.
+//===----------------------------------------------------------------------===//
+
+/// HandlePragmaDirective - The "#pragma" directive has been parsed.  Lex the
+/// rest of the pragma, passing it to the registered pragma handlers.
+void Preprocessor::HandlePragmaDirective() {
+  ++NumPragma;
+  
+  // Invoke the first level of pragma handlers which reads the namespace id.
+  Token Tok;
+  PragmaHandlers->HandlePragma(*this, Tok);
+  
+  // If the pragma handler didn't read the rest of the line, consume it now.
+  if (CurLexer->ParsingPreprocessorDirective)
+    DiscardUntilEndOfDirective();
+}
+
+/// Handle_Pragma - Read a _Pragma directive, slice it up, process it, then
+/// return the first token after the directive.  The _Pragma token has just
+/// been read into 'Tok'.
+void Preprocessor::Handle_Pragma(Token &Tok) {
+  // Remember the pragma token location.
+  SourceLocation PragmaLoc = Tok.getLocation();
+  
+  // Read the '('.
+  Lex(Tok);
+  if (Tok.isNot(tok::l_paren))
+    return Diag(PragmaLoc, diag::err__Pragma_malformed);
+
+  // Read the '"..."'.
+  Lex(Tok);
+  if (Tok.isNot(tok::string_literal) && Tok.isNot(tok::wide_string_literal))
+    return Diag(PragmaLoc, diag::err__Pragma_malformed);
+  
+  // Remember the string.
+  std::string StrVal = getSpelling(Tok);
+  SourceLocation StrLoc = Tok.getLocation();
+
+  // Read the ')'.
+  Lex(Tok);
+  if (Tok.isNot(tok::r_paren))
+    return Diag(PragmaLoc, diag::err__Pragma_malformed);
+  
+  // The _Pragma is lexically sound.  Destringize according to C99 6.10.9.1.
+  if (StrVal[0] == 'L')  // Remove L prefix.
+    StrVal.erase(StrVal.begin());
+  assert(StrVal[0] == '"' && StrVal[StrVal.size()-1] == '"' &&
+         "Invalid string token!");
+  
+  // Remove the front quote, replacing it with a space, so that the pragma
+  // contents appear to have a space before them.
+  StrVal[0] = ' ';
+  
+  // Replace the terminating quote with a \n\0.
+  StrVal[StrVal.size()-1] = '\n';
+  StrVal += '\0';
+  
+  // Remove escaped quotes and escapes.
+  for (unsigned i = 0, e = StrVal.size(); i != e-1; ++i) {
+    if (StrVal[i] == '\\' &&
+        (StrVal[i+1] == '\\' || StrVal[i+1] == '"')) {
+      // \\ -> '\' and \" -> '"'.
+      StrVal.erase(StrVal.begin()+i);
+      --e;
+    }
+  }
+  
+  // Plop the string (including the newline and trailing null) into a buffer
+  // where we can lex it.
+  SourceLocation TokLoc = CreateString(&StrVal[0], StrVal.size(), StrLoc);
+  const char *StrData = SourceMgr.getCharacterData(TokLoc);
+
+  // Make and enter a lexer object so that we lex and expand the tokens just
+  // like any others.
+  Lexer *TL = new Lexer(TokLoc, *this,
+                        StrData, StrData+StrVal.size()-1 /* no null */);
+  
+  // Ensure that the lexer thinks it is inside a directive, so that end \n will
+  // return an EOM token.
+  TL->ParsingPreprocessorDirective = true;
+  
+  // This lexer really is for _Pragma.
+  TL->Is_PragmaLexer = true;
+
+  EnterSourceFileWithLexer(TL, 0);
+
+  // With everything set up, lex this as a #pragma directive.
+  HandlePragmaDirective();
+  
+  // Finally, return whatever came after the pragma directive.
+  return Lex(Tok);
+}
+
+
+
+/// HandlePragmaOnce - Handle #pragma once.  OnceTok is the 'once'.
+///
+void Preprocessor::HandlePragmaOnce(Token &OnceTok) {
+  if (isInPrimaryFile()) {
+    Diag(OnceTok, diag::pp_pragma_once_in_main_file);
+    return;
+  }
+  
+  // Get the current file lexer we're looking at.  Ignore _Pragma 'files' etc.
+  SourceLocation FileLoc = getCurrentFileLexer()->getFileLoc();
+  
+  // Mark the file as a once-only file now.
+  HeaderInfo.MarkFileIncludeOnce(SourceMgr.getFileEntryForLoc(FileLoc));
+}
+
+void Preprocessor::HandlePragmaMark() {
+  assert(CurLexer && "No current lexer?");
+  CurLexer->ReadToEndOfLine();
+}
+
+
+/// HandlePragmaPoison - Handle #pragma GCC poison.  PoisonTok is the 'poison'.
+///
+void Preprocessor::HandlePragmaPoison(Token &PoisonTok) {
+  Token Tok;
+
+  while (1) {
+    // Read the next token to poison.  While doing this, pretend that we are
+    // skipping while reading the identifier to poison.
+    // This avoids errors on code like:
+    //   #pragma GCC poison X
+    //   #pragma GCC poison X
+    if (CurLexer) CurLexer->LexingRawMode = true;
+    LexUnexpandedToken(Tok);
+    if (CurLexer) CurLexer->LexingRawMode = false;
+    
+    // If we reached the end of line, we're done.
+    if (Tok.is(tok::eom)) return;
+    
+    // Can only poison identifiers.
+    if (Tok.isNot(tok::identifier)) {
+      Diag(Tok, diag::err_pp_invalid_poison);
+      return;
+    }
+    
+    // Look up the identifier info for the token.  We disabled identifier lookup
+    // by saying we're skipping contents, so we need to do this manually.
+    IdentifierInfo *II = LookUpIdentifierInfo(Tok);
+    
+    // Already poisoned.
+    if (II->isPoisoned()) continue;
+    
+    // If this is a macro identifier, emit a warning.
+    if (II->hasMacroDefinition())
+      Diag(Tok, diag::pp_poisoning_existing_macro);
+    
+    // Finally, poison it!
+    II->setIsPoisoned();
+  }
+}
+
+/// HandlePragmaSystemHeader - Implement #pragma GCC system_header.  We know
+/// that the whole directive has been parsed.
+void Preprocessor::HandlePragmaSystemHeader(Token &SysHeaderTok) {
+  if (isInPrimaryFile()) {
+    Diag(SysHeaderTok, diag::pp_pragma_sysheader_in_main_file);
+    return;
+  }
+  
+  // Get the current file lexer we're looking at.  Ignore _Pragma 'files' etc.
+  Lexer *TheLexer = getCurrentFileLexer();
+  
+  // Mark the file as a system header.
+  const FileEntry *File = SourceMgr.getFileEntryForLoc(TheLexer->getFileLoc());
+  HeaderInfo.MarkFileSystemHeader(File);
+  
+  // Notify the client, if desired, that we are in a new source file.
+  if (Callbacks)
+    Callbacks->FileChanged(TheLexer->getSourceLocation(TheLexer->BufferPtr),
+                           PPCallbacks::SystemHeaderPragma,
+                           DirectoryLookup::SystemHeaderDir);
+}
+
+/// HandlePragmaDependency - Handle #pragma GCC dependency "foo" blah.
+///
+void Preprocessor::HandlePragmaDependency(Token &DependencyTok) {
+  Token FilenameTok;
+  CurLexer->LexIncludeFilename(FilenameTok);
+
+  // If the token kind is EOM, the error has already been diagnosed.
+  if (FilenameTok.is(tok::eom))
+    return;
+  
+  // Reserve a buffer to get the spelling.
+  llvm::SmallVector<char, 128> FilenameBuffer;
+  FilenameBuffer.resize(FilenameTok.getLength());
+  
+  const char *FilenameStart = &FilenameBuffer[0];
+  unsigned Len = getSpelling(FilenameTok, FilenameStart);
+  const char *FilenameEnd = FilenameStart+Len;
+  bool isAngled = GetIncludeFilenameSpelling(FilenameTok.getLocation(),
+                                             FilenameStart, FilenameEnd);
+  // If GetIncludeFilenameSpelling set the start ptr to null, there was an
+  // error.
+  if (FilenameStart == 0)
+    return;
+  
+  // Search include directories for this file.
+  const DirectoryLookup *CurDir;
+  const FileEntry *File = LookupFile(FilenameStart, FilenameEnd,
+                                     isAngled, 0, CurDir);
+  if (File == 0)
+    return Diag(FilenameTok, diag::err_pp_file_not_found,
+                std::string(FilenameStart, FilenameEnd));
+  
+  SourceLocation FileLoc = getCurrentFileLexer()->getFileLoc();
+  const FileEntry *CurFile = SourceMgr.getFileEntryForLoc(FileLoc);
+
+  // If this file is older than the file it depends on, emit a diagnostic.
+  if (CurFile && CurFile->getModificationTime() < File->getModificationTime()) {
+    // Lex tokens at the end of the message and include them in the message.
+    std::string Message;
+    Lex(DependencyTok);
+    while (DependencyTok.isNot(tok::eom)) {
+      Message += getSpelling(DependencyTok) + " ";
+      Lex(DependencyTok);
+    }
+    
+    Message.erase(Message.end()-1);
+    Diag(FilenameTok, diag::pp_out_of_date_dependency, Message);
+  }
+}
+
+
+/// AddPragmaHandler - Add the specified pragma handler to the preprocessor.
+/// If 'Namespace' is non-null, then it is a token required to exist on the
+/// pragma line before the pragma string starts, e.g. "STDC" or "GCC".
+void Preprocessor::AddPragmaHandler(const char *Namespace, 
+                                    PragmaHandler *Handler) {
+  PragmaNamespace *InsertNS = PragmaHandlers;
+  
+  // If this is specified to be in a namespace, step down into it.
+  if (Namespace) {
+    IdentifierInfo *NSID = getIdentifierInfo(Namespace);
+    
+    // If there is already a pragma handler with the name of this namespace,
+    // we either have an error (directive with the same name as a namespace) or
+    // we already have the namespace to insert into.
+    if (PragmaHandler *Existing = PragmaHandlers->FindHandler(NSID)) {
+      InsertNS = Existing->getIfNamespace();
+      assert(InsertNS != 0 && "Cannot have a pragma namespace and pragma"
+             " handler with the same name!");
+    } else {
+      // Otherwise, this namespace doesn't exist yet, create and insert the
+      // handler for it.
+      InsertNS = new PragmaNamespace(NSID);
+      PragmaHandlers->AddPragma(InsertNS);
+    }
+  }
+  
+  // Check to make sure we don't already have a pragma for this identifier.
+  assert(!InsertNS->FindHandler(Handler->getName()) &&
+         "Pragma handler already exists for this identifier!");
+  InsertNS->AddPragma(Handler);
+}
+
+namespace {
+/// PragmaOnceHandler - "#pragma once" marks the file as atomically included.
+struct PragmaOnceHandler : public PragmaHandler {
+  PragmaOnceHandler(const IdentifierInfo *OnceID) : PragmaHandler(OnceID) {}
+  virtual void HandlePragma(Preprocessor &PP, Token &OnceTok) {
+    PP.CheckEndOfDirective("#pragma once");
+    PP.HandlePragmaOnce(OnceTok);
+  }
+};
+
+/// PragmaMarkHandler - "#pragma mark ..." is ignored by the compiler, and the
+/// rest of the line is not lexed.
+struct PragmaMarkHandler : public PragmaHandler {
+  PragmaMarkHandler(const IdentifierInfo *MarkID) : PragmaHandler(MarkID) {}
+  virtual void HandlePragma(Preprocessor &PP, Token &MarkTok) {
+    PP.HandlePragmaMark();
+  }
+};
+
+/// PragmaPoisonHandler - "#pragma poison x" marks x as not usable.
+struct PragmaPoisonHandler : public PragmaHandler {
+  PragmaPoisonHandler(const IdentifierInfo *ID) : PragmaHandler(ID) {}
+  virtual void HandlePragma(Preprocessor &PP, Token &PoisonTok) {
+    PP.HandlePragmaPoison(PoisonTok);
+  }
+};
+
+/// PragmaSystemHeaderHandler - "#pragma system_header" marks the current file
+/// as a system header, which silences warnings in it.
+struct PragmaSystemHeaderHandler : public PragmaHandler {
+  PragmaSystemHeaderHandler(const IdentifierInfo *ID) : PragmaHandler(ID) {}
+  virtual void HandlePragma(Preprocessor &PP, Token &SHToken) {
+    PP.HandlePragmaSystemHeader(SHToken);
+    PP.CheckEndOfDirective("#pragma");
+  }
+};
+struct PragmaDependencyHandler : public PragmaHandler {
+  PragmaDependencyHandler(const IdentifierInfo *ID) : PragmaHandler(ID) {}
+  virtual void HandlePragma(Preprocessor &PP, Token &DepToken) {
+    PP.HandlePragmaDependency(DepToken);
+  }
+};
+}  // end anonymous namespace
+
+
+/// RegisterBuiltinPragmas - Install the standard preprocessor pragmas:
+/// #pragma GCC poison/system_header/dependency and #pragma once.
+void Preprocessor::RegisterBuiltinPragmas() {
+  AddPragmaHandler(0, new PragmaOnceHandler(getIdentifierInfo("once")));
+  AddPragmaHandler(0, new PragmaMarkHandler(getIdentifierInfo("mark")));
+  AddPragmaHandler("GCC", new PragmaPoisonHandler(getIdentifierInfo("poison")));
+  AddPragmaHandler("GCC", new PragmaSystemHeaderHandler(
+                                          getIdentifierInfo("system_header")));
+  AddPragmaHandler("GCC", new PragmaDependencyHandler(
+                                          getIdentifierInfo("dependency")));
+}
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
new file mode 100644
index 00000000000..86156a07728
--- /dev/null
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -0,0 +1,560 @@
+//===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements the Preprocessor interface.
+//
+//===----------------------------------------------------------------------===//
+//
+// Options to support:
+//   -H       - Print the name of each header file used.
+//   -d[MDNI] - Dump various things.
+//   -fworking-directory - #line's with preprocessor's working dir.
+//   -fpreprocessed
+//   -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD
+//   -W*
+//   -w
+//
+// Messages to emit:
+//   "Multiple include guards may be useful for:\n"
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Lex/HeaderSearch.h"
+#include "clang/Lex/MacroInfo.h"
+#include "clang/Lex/Pragma.h"
+#include "clang/Lex/ScratchBuffer.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/TargetInfo.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Streams.h"
+#include <ctime>
+using namespace clang;
+
+//===----------------------------------------------------------------------===//
+
+Preprocessor::Preprocessor(Diagnostic &diags, const LangOptions &opts,
+                           TargetInfo &target, SourceManager &SM, 
+                           HeaderSearch &Headers) 
+  : Diags(diags), Features(opts), Target(target), FileMgr(Headers.getFileMgr()),
+    SourceMgr(SM), HeaderInfo(Headers), Identifiers(opts),
+    CurLexer(0), CurDirLookup(0), CurTokenLexer(0), Callbacks(0) {
+  ScratchBuf = new ScratchBuffer(SourceMgr);
+
+  // Clear stats.
+  NumDirectives = NumDefined = NumUndefined = NumPragma = 0;
+  NumIf = NumElse = NumEndif = 0;
+  NumEnteredSourceFiles = 0;
+  NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0;
+  NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0;
+  MaxIncludeStackDepth = 0; 
+  NumSkipped = 0;
+
+  // Default to discarding comments.
+  KeepComments = false;
+  KeepMacroComments = false;
+  
+  // Macro expansion is enabled.
+  DisableMacroExpansion = false;
+  InMacroArgs = false;
+  NumCachedTokenLexers = 0;
+
+  // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro.
+  // This gets unpoisoned where it is allowed.
+  (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
+  
+  Predefines = 0;
+  
+  // Initialize the pragma handlers.
+  PragmaHandlers = new PragmaNamespace(0);
+  RegisterBuiltinPragmas();
+  
+  // Initialize builtin macros like __LINE__ and friends.
+  RegisterBuiltinMacros();
+}
+
+Preprocessor::~Preprocessor() {
+  // Free any active lexers.
+  delete CurLexer;
+  
+  while (!IncludeMacroStack.empty()) {
+    delete IncludeMacroStack.back().TheLexer;
+    delete IncludeMacroStack.back().TheTokenLexer;
+    IncludeMacroStack.pop_back();
+  }
+
+  // Free any macro definitions.
+  for (llvm::DenseMap<IdentifierInfo*, MacroInfo*>::iterator I =
+       Macros.begin(), E = Macros.end(); I != E; ++I) {
+    // Free the macro definition.
+    delete I->second;
+    I->second = 0;
+    I->first->setHasMacroDefinition(false);
+  }
+  
+  // Free any cached macro expanders.
+  for (unsigned i = 0, e = NumCachedTokenLexers; i != e; ++i)
+    delete TokenLexerCache[i];
+  
+  // Release pragma information.
+  delete PragmaHandlers;
+
+  // Delete the scratch buffer info.
+  delete ScratchBuf;
+
+  delete Callbacks;
+}
+
+/// Diag - Forwarding function for diagnostics.  This emits a diagnostic at
+/// the specified Token's location, translating the token's start
+/// position in the current buffer into a SourcePosition object for rendering.
+void Preprocessor::Diag(SourceLocation Loc, unsigned DiagID) {
+  Diags.Report(getFullLoc(Loc), DiagID);
+}
+
+void Preprocessor::Diag(SourceLocation Loc, unsigned DiagID, 
+                        const std::string &Msg) {
+  Diags.Report(getFullLoc(Loc), DiagID, &Msg, 1);
+}
+
+void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
+  llvm::cerr << tok::getTokenName(Tok.getKind()) << " '"
+             << getSpelling(Tok) << "'";
+  
+  if (!DumpFlags) return;
+  
+  llvm::cerr << "\t";
+  if (Tok.isAtStartOfLine())
+    llvm::cerr << " [StartOfLine]";
+  if (Tok.hasLeadingSpace())
+    llvm::cerr << " [LeadingSpace]";
+  if (Tok.isExpandDisabled())
+    llvm::cerr << " [ExpandDisabled]";
+  if (Tok.needsCleaning()) {
+    const char *Start = SourceMgr.getCharacterData(Tok.getLocation());
+    llvm::cerr << " [UnClean='" << std::string(Start, Start+Tok.getLength())
+               << "']";
+  }
+  
+  llvm::cerr << "\tLoc=<";
+  DumpLocation(Tok.getLocation());
+  llvm::cerr << ">";
+}
+
+void Preprocessor::DumpLocation(SourceLocation Loc) const {
+  SourceLocation LogLoc = SourceMgr.getLogicalLoc(Loc);
+  llvm::cerr << SourceMgr.getSourceName(LogLoc) << ':'
+             << SourceMgr.getLineNumber(LogLoc) << ':'
+             << SourceMgr.getLineNumber(LogLoc);
+  
+  SourceLocation PhysLoc = SourceMgr.getPhysicalLoc(Loc);
+  if (PhysLoc != LogLoc) {
+    llvm::cerr << " <PhysLoc=";
+    DumpLocation(PhysLoc);
+    llvm::cerr << ">";
+  }
+}
+
+void Preprocessor::DumpMacro(const MacroInfo &MI) const {
+  llvm::cerr << "MACRO: ";
+  for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) {
+    DumpToken(MI.getReplacementToken(i));
+    llvm::cerr << "  ";
+  }
+  llvm::cerr << "\n";
+}
+
+void Preprocessor::PrintStats() {
+  llvm::cerr << "\n*** Preprocessor Stats:\n";
+  llvm::cerr << NumDirectives << " directives found:\n";
+  llvm::cerr << "  " << NumDefined << " #define.\n";
+  llvm::cerr << "  " << NumUndefined << " #undef.\n";
+  llvm::cerr << "  #include/#include_next/#import:\n";
+  llvm::cerr << "    " << NumEnteredSourceFiles << " source files entered.\n";
+  llvm::cerr << "    " << MaxIncludeStackDepth << " max include stack depth\n";
+  llvm::cerr << "  " << NumIf << " #if/#ifndef/#ifdef.\n";
+  llvm::cerr << "  " << NumElse << " #else/#elif.\n";
+  llvm::cerr << "  " << NumEndif << " #endif.\n";
+  llvm::cerr << "  " << NumPragma << " #pragma.\n";
+  llvm::cerr << NumSkipped << " #if/#ifndef#ifdef regions skipped\n";
+
+  llvm::cerr << NumMacroExpanded << "/" << NumFnMacroExpanded << "/"
+             << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, "
+             << NumFastMacroExpanded << " on the fast path.\n";
+  llvm::cerr << (NumFastTokenPaste+NumTokenPaste)
+             << " token paste (##) operations performed, "
+             << NumFastTokenPaste << " on the fast path.\n";
+}
+
+//===----------------------------------------------------------------------===//
+// Token Spelling
+//===----------------------------------------------------------------------===//
+
+
+/// getSpelling() - Return the 'spelling' of this token.  The spelling of a
+/// token are the characters used to represent the token in the source file
+/// after trigraph expansion and escaped-newline folding.  In particular, this
+/// wants to get the true, uncanonicalized, spelling of things like digraphs
+/// UCNs, etc.
+std::string Preprocessor::getSpelling(const Token &Tok) const {
+  assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");
+  
+  // If this token contains nothing interesting, return it directly.
+  const char *TokStart = SourceMgr.getCharacterData(Tok.getLocation());
+  if (!Tok.needsCleaning())
+    return std::string(TokStart, TokStart+Tok.getLength());
+  
+  std::string Result;
+  Result.reserve(Tok.getLength());
+  
+  // Otherwise, hard case, relex the characters into the string.
+  for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength();
+       Ptr != End; ) {
+    unsigned CharSize;
+    Result.push_back(Lexer::getCharAndSizeNoWarn(Ptr, CharSize, Features));
+    Ptr += CharSize;
+  }
+  assert(Result.size() != unsigned(Tok.getLength()) &&
+         "NeedsCleaning flag set on something that didn't need cleaning!");
+  return Result;
+}
+
+/// getSpelling - This method is used to get the spelling of a token into a
+/// preallocated buffer, instead of as an std::string.  The caller is required
+/// to allocate enough space for the token, which is guaranteed to be at least
+/// Tok.getLength() bytes long.  The actual length of the token is returned.
+///
+/// Note that this method may do two possible things: it may either fill in
+/// the buffer specified with characters, or it may *change the input pointer*
+/// to point to a constant buffer with the data already in it (avoiding a
+/// copy).  The caller is not allowed to modify the returned buffer pointer
+/// if an internal buffer is returned.
+unsigned Preprocessor::getSpelling(const Token &Tok,
+                                   const char *&Buffer) const {
+  assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");
+  
+  // If this token is an identifier, just return the string from the identifier
+  // table, which is very quick.
+  if (const IdentifierInfo *II = Tok.getIdentifierInfo()) {
+    Buffer = II->getName();
+    
+    // Return the length of the token.  If the token needed cleaning, don't
+    // include the size of the newlines or trigraphs in it.
+    if (!Tok.needsCleaning())
+      return Tok.getLength();
+    else
+      return strlen(Buffer);
+  }
+  
+  // Otherwise, compute the start of the token in the input lexer buffer.
+  const char *TokStart = SourceMgr.getCharacterData(Tok.getLocation());
+
+  // If this token contains nothing interesting, return it directly.
+  if (!Tok.needsCleaning()) {
+    Buffer = TokStart;
+    return Tok.getLength();
+  }
+  // Otherwise, hard case, relex the characters into the string.
+  char *OutBuf = const_cast<char*>(Buffer);
+  for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength();
+       Ptr != End; ) {
+    unsigned CharSize;
+    *OutBuf++ = Lexer::getCharAndSizeNoWarn(Ptr, CharSize, Features);
+    Ptr += CharSize;
+  }
+  assert(unsigned(OutBuf-Buffer) != Tok.getLength() &&
+         "NeedsCleaning flag set on something that didn't need cleaning!");
+  
+  return OutBuf-Buffer;
+}
+
+
+/// CreateString - Plop the specified string into a scratch buffer and return a
+/// location for it.  If specified, the source location provides a source
+/// location for the token.
+SourceLocation Preprocessor::
+CreateString(const char *Buf, unsigned Len, SourceLocation SLoc) {
+  if (SLoc.isValid())
+    return ScratchBuf->getToken(Buf, Len, SLoc);
+  return ScratchBuf->getToken(Buf, Len);
+}
+
+
+/// AdvanceToTokenCharacter - Given a location that specifies the start of a
+/// token, return a new location that specifies a character within the token.
+SourceLocation Preprocessor::AdvanceToTokenCharacter(SourceLocation TokStart, 
+                                                     unsigned CharNo) {
+  // If they request the first char of the token, we're trivially done.  If this
+  // is a macro expansion, it doesn't make sense to point to a character within
+  // the instantiation point (the name).  We could point to the source
+  // character, but without also pointing to instantiation info, this is
+  // confusing.
+  if (CharNo == 0 || TokStart.isMacroID()) return TokStart;
+  
+  // Figure out how many physical characters away the specified logical
+  // character is.  This needs to take into consideration newlines and
+  // trigraphs.
+  const char *TokPtr = SourceMgr.getCharacterData(TokStart);
+  unsigned PhysOffset = 0;
+  
+  // The usual case is that tokens don't contain anything interesting.  Skip
+  // over the uninteresting characters.  If a token only consists of simple
+  // chars, this method is extremely fast.
+  while (CharNo && Lexer::isObviouslySimpleCharacter(*TokPtr))
+    ++TokPtr, --CharNo, ++PhysOffset;
+  
+  // If we have a character that may be a trigraph or escaped newline, create a
+  // lexer to parse it correctly.
+  if (CharNo != 0) {
+    // Create a lexer starting at this token position.
+    Lexer TheLexer(TokStart, *this, TokPtr);
+    Token Tok;
+    // Skip over characters the remaining characters.
+    const char *TokStartPtr = TokPtr;
+    for (; CharNo; --CharNo)
+      TheLexer.getAndAdvanceChar(TokPtr, Tok);
+    
+    PhysOffset += TokPtr-TokStartPtr;
+  }
+  
+  return TokStart.getFileLocWithOffset(PhysOffset);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Preprocessor Initialization Methods
+//===----------------------------------------------------------------------===//
+
+// Append a #define line to Buf for Macro.  Macro should be of the form XXX,
+// in which case we emit "#define XXX 1" or "XXX=Y z W" in which case we emit
+// "#define XXX Y z W".  To get a #define with no value, use "XXX=".
+static void DefineBuiltinMacro(std::vector<char> &Buf, const char *Macro,
+                               const char *Command = "#define ") {
+  Buf.insert(Buf.end(), Command, Command+strlen(Command));
+  if (const char *Equal = strchr(Macro, '=')) {
+    // Turn the = into ' '.
+    Buf.insert(Buf.end(), Macro, Equal);
+    Buf.push_back(' ');
+    Buf.insert(Buf.end(), Equal+1, Equal+strlen(Equal));
+  } else {
+    // Push "macroname 1".
+    Buf.insert(Buf.end(), Macro, Macro+strlen(Macro));
+    Buf.push_back(' ');
+    Buf.push_back('1');
+  }
+  Buf.push_back('\n');
+}
+
+
+static void InitializePredefinedMacros(Preprocessor &PP, 
+                                       std::vector<char> &Buf) {
+  // FIXME: Implement magic like cpp_init_builtins for things like __STDC__
+  // and __DATE__ etc.
+#if 0
+  /* __STDC__ has the value 1 under normal circumstances.
+  However, if (a) we are in a system header, (b) the option
+  stdc_0_in_system_headers is true (set by target config), and
+  (c) we are not in strictly conforming mode, then it has the
+  value 0.  (b) and (c) are already checked in cpp_init_builtins.  */
+  //case BT_STDC:
+  if (cpp_in_system_header (pfile))
+    number = 0;
+  else
+    number = 1;
+  break;
+#endif    
+  // These should all be defined in the preprocessor according to the
+  // current language configuration.
+  DefineBuiltinMacro(Buf, "__STDC__=1");
+  //DefineBuiltinMacro(Buf, "__ASSEMBLER__=1");
+  if (PP.getLangOptions().C99 && !PP.getLangOptions().CPlusPlus)
+    DefineBuiltinMacro(Buf, "__STDC_VERSION__=199901L");
+  else if (0) // STDC94 ?
+    DefineBuiltinMacro(Buf, "__STDC_VERSION__=199409L");
+  
+  DefineBuiltinMacro(Buf, "__STDC_HOSTED__=1");
+  if (PP.getLangOptions().ObjC1)
+    DefineBuiltinMacro(Buf, "__OBJC__=1");
+  if (PP.getLangOptions().ObjC2)
+    DefineBuiltinMacro(Buf, "__OBJC2__=1");
+
+  // Add __builtin_va_list typedef.
+  {
+    const char *VAList = PP.getTargetInfo().getVAListDeclaration();
+    Buf.insert(Buf.end(), VAList, VAList+strlen(VAList));
+    Buf.push_back('\n');
+  }
+  
+  // Get the target #defines.
+  PP.getTargetInfo().getTargetDefines(Buf);
+  
+  // Compiler set macros.
+  DefineBuiltinMacro(Buf, "__APPLE_CC__=5250");
+  DefineBuiltinMacro(Buf, "__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__=1050");
+  DefineBuiltinMacro(Buf, "__GNUC_MINOR__=0");
+  DefineBuiltinMacro(Buf, "__GNUC_PATCHLEVEL__=1");
+  DefineBuiltinMacro(Buf, "__GNUC__=4");
+  DefineBuiltinMacro(Buf, "__GXX_ABI_VERSION=1002");
+  DefineBuiltinMacro(Buf, "__VERSION__=\"4.0.1 (Apple Computer, Inc. "
+                     "build 5250)\"");
+  
+  // Build configuration options.
+  DefineBuiltinMacro(Buf, "__DYNAMIC__=1");
+  DefineBuiltinMacro(Buf, "__FINITE_MATH_ONLY__=0");
+  DefineBuiltinMacro(Buf, "__NO_INLINE__=1");
+  DefineBuiltinMacro(Buf, "__PIC__=1");
+  
+  
+  if (PP.getLangOptions().CPlusPlus) {
+    DefineBuiltinMacro(Buf, "__DEPRECATED=1");
+    DefineBuiltinMacro(Buf, "__EXCEPTIONS=1");
+    DefineBuiltinMacro(Buf, "__GNUG__=4");
+    DefineBuiltinMacro(Buf, "__GXX_WEAK__=1");
+    DefineBuiltinMacro(Buf, "__cplusplus=1");
+    DefineBuiltinMacro(Buf, "__private_extern__=extern");
+  }
+  if (PP.getLangOptions().Microsoft) {
+    DefineBuiltinMacro(Buf, "__stdcall=");
+    DefineBuiltinMacro(Buf, "__cdecl=");
+    DefineBuiltinMacro(Buf, "_cdecl=");
+    DefineBuiltinMacro(Buf, "__ptr64=");
+    DefineBuiltinMacro(Buf, "__w64=");
+    DefineBuiltinMacro(Buf, "__forceinline=");
+    DefineBuiltinMacro(Buf, "__int8=char");
+    DefineBuiltinMacro(Buf, "__int16=short");
+    DefineBuiltinMacro(Buf, "__int32=int");
+    DefineBuiltinMacro(Buf, "__int64=long long");
+    DefineBuiltinMacro(Buf, "__declspec(X)=");
+  }
+  // FIXME: Should emit a #line directive here.
+}
+
+
+/// EnterMainSourceFile - Enter the specified FileID as the main source file,
+/// which implicitly adds the builtin defines etc.
+void Preprocessor::EnterMainSourceFile() {
+  
+  unsigned MainFileID = SourceMgr.getMainFileID();
+  
+  // Enter the main file source buffer.
+  EnterSourceFile(MainFileID, 0);
+  
+  // Tell the header info that the main file was entered.  If the file is later
+  // #imported, it won't be re-entered.
+  if (const FileEntry *FE = 
+        SourceMgr.getFileEntryForLoc(SourceLocation::getFileLoc(MainFileID, 0)))
+    HeaderInfo.IncrementIncludeCount(FE);
+    
+  std::vector<char> PrologFile;
+  PrologFile.reserve(4080);
+  
+  // Install things like __POWERPC__, __GNUC__, etc into the macro table.
+  InitializePredefinedMacros(*this, PrologFile);
+  
+  // Add on the predefines from the driver.
+  PrologFile.insert(PrologFile.end(), Predefines,Predefines+strlen(Predefines));
+  
+  // Memory buffer must end with a null byte!
+  PrologFile.push_back(0);
+
+  // Now that we have emitted the predefined macros, #includes, etc into
+  // PrologFile, preprocess it to populate the initial preprocessor state.
+  llvm::MemoryBuffer *SB = 
+    llvm::MemoryBuffer::getMemBufferCopy(&PrologFile.front(),&PrologFile.back(),
+                                         "<predefines>");
+  assert(SB && "Cannot fail to create predefined source buffer");
+  unsigned FileID = SourceMgr.createFileIDForMemBuffer(SB);
+  assert(FileID && "Could not create FileID for predefines?");
+  
+  // Start parsing the predefines.
+  EnterSourceFile(FileID, 0);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Lexer Event Handling.
+//===----------------------------------------------------------------------===//
+
+/// LookUpIdentifierInfo - Given a tok::identifier token, look up the
+/// identifier information for the token and install it into the token.
+IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier,
+                                                   const char *BufPtr) {
+  assert(Identifier.is(tok::identifier) && "Not an identifier!");
+  assert(Identifier.getIdentifierInfo() == 0 && "Identinfo already exists!");
+  
+  // Look up this token, see if it is a macro, or if it is a language keyword.
+  IdentifierInfo *II;
+  if (BufPtr && !Identifier.needsCleaning()) {
+    // No cleaning needed, just use the characters from the lexed buffer.
+    II = getIdentifierInfo(BufPtr, BufPtr+Identifier.getLength());
+  } else {
+    // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
+    llvm::SmallVector<char, 64> IdentifierBuffer;
+    IdentifierBuffer.resize(Identifier.getLength());
+    const char *TmpBuf = &IdentifierBuffer[0];
+    unsigned Size = getSpelling(Identifier, TmpBuf);
+    II = getIdentifierInfo(TmpBuf, TmpBuf+Size);
+  }
+  Identifier.setIdentifierInfo(II);
+  return II;
+}
+
+
+/// HandleIdentifier - This callback is invoked when the lexer reads an
+/// identifier.  This callback looks up the identifier in the map and/or
+/// potentially macro expands it or turns it into a named token (like 'for').
+void Preprocessor::HandleIdentifier(Token &Identifier) {
+  assert(Identifier.getIdentifierInfo() &&
+         "Can't handle identifiers without identifier info!");
+  
+  IdentifierInfo &II = *Identifier.getIdentifierInfo();
+
+  // If this identifier was poisoned, and if it was not produced from a macro
+  // expansion, emit an error.
+  if (II.isPoisoned() && CurLexer) {
+    if (&II != Ident__VA_ARGS__)   // We warn about __VA_ARGS__ with poisoning.
+      Diag(Identifier, diag::err_pp_used_poisoned_id);
+    else
+      Diag(Identifier, diag::ext_pp_bad_vaargs_use);
+  }
+  
+  // If this is a macro to be expanded, do it.
+  if (MacroInfo *MI = getMacroInfo(&II)) {
+    if (!DisableMacroExpansion && !Identifier.isExpandDisabled()) {
+      if (MI->isEnabled()) {
+        if (!HandleMacroExpandedIdentifier(Identifier, MI))
+          return;
+      } else {
+        // C99 6.10.3.4p2 says that a disabled macro may never again be
+        // expanded, even if it's in a context where it could be expanded in the
+        // future.
+        Identifier.setFlag(Token::DisableExpand);
+      }
+    }
+  }
+
+  // C++ 2.11p2: If this is an alternative representation of a C++ operator,
+  // then we act as if it is the actual operator and not the textual
+  // representation of it.
+  if (II.isCPlusPlusOperatorKeyword())
+    Identifier.setIdentifierInfo(0);
+
+  // Change the kind of this identifier to the appropriate token kind, e.g.
+  // turning "for" into a keyword.
+  Identifier.setKind(II.getTokenID());
+    
+  // If this is an extension token, diagnose its use.
+  // FIXME: tried (unsuccesfully) to shut this up when compiling with gnu99
+  // For now, I'm just commenting it out (while I work on attributes).
+  if (II.isExtensionToken() && Features.C99) 
+    Diag(Identifier, diag::ext_token_used);
+}
+
diff --git a/clang/lib/Lex/ScratchBuffer.cpp b/clang/lib/Lex/ScratchBuffer.cpp
new file mode 100644
index 00000000000..99fbdf75654
--- /dev/null
+++ b/clang/lib/Lex/ScratchBuffer.cpp
@@ -0,0 +1,72 @@
+//===--- ScratchBuffer.cpp - Scratch space for forming tokens -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements the ScratchBuffer interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/ScratchBuffer.h"
+#include "clang/Basic/SourceManager.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <cstring>
+using namespace clang;
+
+// ScratchBufSize - The size of each chunk of scratch memory.  Slightly less
+//than a page, almost certainly enough for anything. :)
+static const unsigned ScratchBufSize = 4060;
+
+ScratchBuffer::ScratchBuffer(SourceManager &SM) : SourceMgr(SM), CurBuffer(0) {
+  // Set BytesUsed so that the first call to getToken will require an alloc.
+  BytesUsed = ScratchBufSize;
+  FileID = 0;
+}
+
+/// getToken - Splat the specified text into a temporary MemoryBuffer and
+/// return a SourceLocation that refers to the token.  This is just like the
+/// method below, but returns a location that indicates the physloc of the
+/// token.
+SourceLocation ScratchBuffer::getToken(const char *Buf, unsigned Len) {
+  if (BytesUsed+Len > ScratchBufSize)
+    AllocScratchBuffer(Len);
+  
+  // Copy the token data into the buffer.
+  memcpy(CurBuffer+BytesUsed, Buf, Len);
+
+  // Remember that we used these bytes.
+  BytesUsed += Len;
+
+  assert(BytesUsed-Len < (1 << SourceLocation::FilePosBits) &&
+         "Out of range file position!");
+  
+  return SourceLocation::getFileLoc(FileID, BytesUsed-Len);
+}
+
+
+/// getToken - Splat the specified text into a temporary MemoryBuffer and
+/// return a SourceLocation that refers to the token.  The SourceLoc value
+/// gives a virtual location that the token will appear to be from.
+SourceLocation ScratchBuffer::getToken(const char *Buf, unsigned Len,
+                                       SourceLocation SourceLoc) {
+  // Map the physloc to the specified sourceloc.
+  return SourceMgr.getInstantiationLoc(getToken(Buf, Len), SourceLoc);
+}
+
+void ScratchBuffer::AllocScratchBuffer(unsigned RequestLen) {
+  // Only pay attention to the requested length if it is larger than our default
+  // page size.  If it is, we allocate an entire chunk for it.  This is to
+  // support gigantic tokens, which almost certainly won't happen. :)
+  if (RequestLen < ScratchBufSize)
+    RequestLen = ScratchBufSize;
+  
+  llvm::MemoryBuffer *Buf = 
+    llvm::MemoryBuffer::getNewMemBuffer(RequestLen, "<scratch space>");
+  FileID = SourceMgr.createFileIDForMemBuffer(Buf);
+  CurBuffer = const_cast<char*>(Buf->getBufferStart());
+  BytesUsed = 0;
+}
diff --git a/clang/lib/Lex/TokenLexer.cpp b/clang/lib/Lex/TokenLexer.cpp
new file mode 100644
index 00000000000..fc8cfd715c4
--- /dev/null
+++ b/clang/lib/Lex/TokenLexer.cpp
@@ -0,0 +1,488 @@
+//===--- TokenLexer.cpp - Lex from a token stream -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TokenLexer interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/TokenLexer.h"
+#include "MacroArgs.h"
+#include "clang/Lex/MacroInfo.h"
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/Diagnostic.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace clang;
+
+
+/// Create a TokenLexer for the specified macro with the specified actual
+/// arguments.  Note that this ctor takes ownership of the ActualArgs pointer.
+void TokenLexer::Init(Token &Tok, MacroArgs *Actuals) {
+  // If the client is reusing a TokenLexer, make sure to free any memory
+  // associated with it.
+  destroy();
+  
+  Macro = PP.getMacroInfo(Tok.getIdentifierInfo());
+  ActualArgs = Actuals;
+  CurToken = 0;
+  InstantiateLoc = Tok.getLocation();
+  AtStartOfLine = Tok.isAtStartOfLine();
+  HasLeadingSpace = Tok.hasLeadingSpace();
+  Tokens = &*Macro->tokens_begin();
+  OwnsTokens = false;
+  DisableMacroExpansion = false;
+  NumTokens = Macro->tokens_end()-Macro->tokens_begin();
+
+  // If this is a function-like macro, expand the arguments and change
+  // Tokens to point to the expanded tokens.
+  if (Macro->isFunctionLike() && Macro->getNumArgs())
+    ExpandFunctionArguments();
+  
+  // Mark the macro as currently disabled, so that it is not recursively
+  // expanded.  The macro must be disabled only after argument pre-expansion of
+  // function-like macro arguments occurs.
+  Macro->DisableMacro();
+}
+
+
+
+/// Create a TokenLexer for the specified token stream.  This does not
+/// take ownership of the specified token vector.
+void TokenLexer::Init(const Token *TokArray, unsigned NumToks,
+                      bool disableMacroExpansion, bool ownsTokens) {
+  // If the client is reusing a TokenLexer, make sure to free any memory
+  // associated with it.
+  destroy();
+  
+  Macro = 0;
+  ActualArgs = 0;
+  Tokens = TokArray;
+  OwnsTokens = ownsTokens;
+  DisableMacroExpansion = disableMacroExpansion;
+  NumTokens = NumToks;
+  CurToken = 0;
+  InstantiateLoc = SourceLocation();
+  AtStartOfLine = false;
+  HasLeadingSpace = false;
+      
+  // Set HasLeadingSpace/AtStartOfLine so that the first token will be
+  // returned unmodified.
+  if (NumToks != 0) {
+    AtStartOfLine   = TokArray[0].isAtStartOfLine();
+    HasLeadingSpace = TokArray[0].hasLeadingSpace();
+  }
+}
+
+
+void TokenLexer::destroy() {
+  // If this was a function-like macro that actually uses its arguments, delete
+  // the expanded tokens.
+  if (OwnsTokens) {
+    delete [] Tokens;
+    Tokens = 0;
+  }
+  
+  // TokenLexer owns its formal arguments.
+  if (ActualArgs) ActualArgs->destroy();
+}
+
+/// Expand the arguments of a function-like macro so that we can quickly
+/// return preexpanded tokens from Tokens.
+void TokenLexer::ExpandFunctionArguments() {
+  llvm::SmallVector<Token, 128> ResultToks;
+  
+  // Loop through 'Tokens', expanding them into ResultToks.  Keep
+  // track of whether we change anything.  If not, no need to keep them.  If so,
+  // we install the newly expanded sequence as the new 'Tokens' list.
+  bool MadeChange = false;
+  
+  // NextTokGetsSpace - When this is true, the next token appended to the
+  // output list will get a leading space, regardless of whether it had one to
+  // begin with or not.  This is used for placemarker support.
+  bool NextTokGetsSpace = false;
+  
+  for (unsigned i = 0, e = NumTokens; i != e; ++i) {
+    // If we found the stringify operator, get the argument stringified.  The
+    // preprocessor already verified that the following token is a macro name
+    // when the #define was parsed.
+    const Token &CurTok = Tokens[i];
+    if (CurTok.is(tok::hash) || CurTok.is(tok::hashat)) {
+      int ArgNo = Macro->getArgumentNum(Tokens[i+1].getIdentifierInfo());
+      assert(ArgNo != -1 && "Token following # is not an argument?");
+    
+      Token Res;
+      if (CurTok.is(tok::hash))  // Stringify
+        Res = ActualArgs->getStringifiedArgument(ArgNo, PP);
+      else {
+        // 'charify': don't bother caching these.
+        Res = MacroArgs::StringifyArgument(ActualArgs->getUnexpArgument(ArgNo),
+                                           PP, true);
+      }
+      
+      // The stringified/charified string leading space flag gets set to match
+      // the #/#@ operator.
+      if (CurTok.hasLeadingSpace() || NextTokGetsSpace)
+        Res.setFlag(Token::LeadingSpace);
+      
+      ResultToks.push_back(Res);
+      MadeChange = true;
+      ++i;  // Skip arg name.
+      NextTokGetsSpace = false;
+      continue;
+    }
+    
+    // Otherwise, if this is not an argument token, just add the token to the
+    // output buffer.
+    IdentifierInfo *II = CurTok.getIdentifierInfo();
+    int ArgNo = II ? Macro->getArgumentNum(II) : -1;
+    if (ArgNo == -1) {
+      // This isn't an argument, just add it.
+      ResultToks.push_back(CurTok);
+
+      if (NextTokGetsSpace) {
+        ResultToks.back().setFlag(Token::LeadingSpace);
+        NextTokGetsSpace = false;
+      }
+      continue;
+    }
+      
+    // An argument is expanded somehow, the result is different than the
+    // input.
+    MadeChange = true;
+
+    // Otherwise, this is a use of the argument.  Find out if there is a paste
+    // (##) operator before or after the argument.
+    bool PasteBefore = 
+      !ResultToks.empty() && ResultToks.back().is(tok::hashhash);
+    bool PasteAfter = i+1 != e && Tokens[i+1].is(tok::hashhash);
+    
+    // If it is not the LHS/RHS of a ## operator, we must pre-expand the
+    // argument and substitute the expanded tokens into the result.  This is
+    // C99 6.10.3.1p1.
+    if (!PasteBefore && !PasteAfter) {
+      const Token *ResultArgToks;
+
+      // Only preexpand the argument if it could possibly need it.  This
+      // avoids some work in common cases.
+      const Token *ArgTok = ActualArgs->getUnexpArgument(ArgNo);
+      if (ActualArgs->ArgNeedsPreexpansion(ArgTok, PP))
+        ResultArgToks = &ActualArgs->getPreExpArgument(ArgNo, PP)[0];
+      else
+        ResultArgToks = ArgTok;  // Use non-preexpanded tokens.
+      
+      // If the arg token expanded into anything, append it.
+      if (ResultArgToks->isNot(tok::eof)) {
+        unsigned FirstResult = ResultToks.size();
+        unsigned NumToks = MacroArgs::getArgLength(ResultArgToks);
+        ResultToks.append(ResultArgToks, ResultArgToks+NumToks);
+      
+        // If any tokens were substituted from the argument, the whitespace
+        // before the first token should match the whitespace of the arg
+        // identifier.
+        ResultToks[FirstResult].setFlagValue(Token::LeadingSpace,
+                                             CurTok.hasLeadingSpace() ||
+                                             NextTokGetsSpace);
+        NextTokGetsSpace = false;
+      } else {
+        // If this is an empty argument, and if there was whitespace before the
+        // formal token, make sure the next token gets whitespace before it.
+        NextTokGetsSpace = CurTok.hasLeadingSpace();
+      }
+      continue;
+    }
+    
+    // Okay, we have a token that is either the LHS or RHS of a paste (##)
+    // argument.  It gets substituted as its non-pre-expanded tokens.
+    const Token *ArgToks = ActualArgs->getUnexpArgument(ArgNo);
+    unsigned NumToks = MacroArgs::getArgLength(ArgToks);
+    if (NumToks) {  // Not an empty argument?
+      // If this is the GNU ", ## __VA_ARG__" extension, and we just learned
+      // that __VA_ARG__ expands to multiple tokens, avoid a pasting error when
+      // the expander trys to paste ',' with the first token of the __VA_ARG__
+      // expansion.
+      if (PasteBefore && ResultToks.size() >= 2 &&
+          ResultToks[ResultToks.size()-2].is(tok::comma) &&
+          (unsigned)ArgNo == Macro->getNumArgs()-1 &&
+          Macro->isVariadic()) {
+        // Remove the paste operator, report use of the extension.
+        PP.Diag(ResultToks.back().getLocation(), diag::ext_paste_comma);
+        ResultToks.pop_back();
+      }
+      
+      ResultToks.append(ArgToks, ArgToks+NumToks);
+      
+      // If the next token was supposed to get leading whitespace, ensure it has
+      // it now.
+      if (NextTokGetsSpace) {
+        ResultToks[ResultToks.size()-NumToks].setFlag(Token::LeadingSpace);
+        NextTokGetsSpace = false;
+      }
+      continue;
+    }
+    
+    // If an empty argument is on the LHS or RHS of a paste, the standard (C99
+    // 6.10.3.3p2,3) calls for a bunch of placemarker stuff to occur.  We
+    // implement this by eating ## operators when a LHS or RHS expands to
+    // empty.
+    NextTokGetsSpace |= CurTok.hasLeadingSpace();
+    if (PasteAfter) {
+      // Discard the argument token and skip (don't copy to the expansion
+      // buffer) the paste operator after it.
+      NextTokGetsSpace |= Tokens[i+1].hasLeadingSpace();
+      ++i;
+      continue;
+    }
+    
+    // If this is on the RHS of a paste operator, we've already copied the
+    // paste operator to the ResultToks list.  Remove it.
+    assert(PasteBefore && ResultToks.back().is(tok::hashhash));
+    NextTokGetsSpace |= ResultToks.back().hasLeadingSpace();
+    ResultToks.pop_back();
+    
+    // If this is the __VA_ARGS__ token, and if the argument wasn't provided,
+    // and if the macro had at least one real argument, and if the token before
+    // the ## was a comma, remove the comma.
+    if ((unsigned)ArgNo == Macro->getNumArgs()-1 && // is __VA_ARGS__
+        ActualArgs->isVarargsElidedUse() &&       // Argument elided.
+        !ResultToks.empty() && ResultToks.back().is(tok::comma)) {
+      // Never add a space, even if the comma, ##, or arg had a space.
+      NextTokGetsSpace = false;
+      // Remove the paste operator, report use of the extension.
+      PP.Diag(ResultToks.back().getLocation(), diag::ext_paste_comma);
+      ResultToks.pop_back();
+    }
+    continue;
+  }
+  
+  // If anything changed, install this as the new Tokens list.
+  if (MadeChange) {
+    // This is deleted in the dtor.
+    NumTokens = ResultToks.size();
+    Token *Res = new Token[ResultToks.size()];
+    if (NumTokens)
+      memcpy(Res, &ResultToks[0], NumTokens*sizeof(Token));
+    Tokens = Res;
+    OwnsTokens = true;
+  }
+}
+
+/// Lex - Lex and return a token from this macro stream.
+///
+void TokenLexer::Lex(Token &Tok) {
+  // Lexing off the end of the macro, pop this macro off the expansion stack.
+  if (isAtEnd()) {
+    // If this is a macro (not a token stream), mark the macro enabled now
+    // that it is no longer being expanded.
+    if (Macro) Macro->EnableMacro();
+
+    // Pop this context off the preprocessors lexer stack and get the next
+    // token.  This will delete "this" so remember the PP instance var.
+    Preprocessor &PPCache = PP;
+    if (PP.HandleEndOfTokenLexer(Tok))
+      return;
+
+    // HandleEndOfTokenLexer may not return a token.  If it doesn't, lex
+    // whatever is next.
+    return PPCache.Lex(Tok);
+  }
+  
+  // If this is the first token of the expanded result, we inherit spacing
+  // properties later.
+  bool isFirstToken = CurToken == 0;
+  
+  // Get the next token to return.
+  Tok = Tokens[CurToken++];
+  
+  // If this token is followed by a token paste (##) operator, paste the tokens!
+  if (!isAtEnd() && Tokens[CurToken].is(tok::hashhash))
+    if (PasteTokens(Tok)) {
+      // When handling the microsoft /##/ extension, the final token is
+      // returned by PasteTokens, not the pasted token.
+      return;
+    }
+
+  // The token's current location indicate where the token was lexed from.  We
+  // need this information to compute the spelling of the token, but any
+  // diagnostics for the expanded token should appear as if they came from
+  // InstantiationLoc.  Pull this information together into a new SourceLocation
+  // that captures all of this.
+  if (InstantiateLoc.isValid()) {   // Don't do this for token streams.
+    SourceManager &SrcMgr = PP.getSourceManager();
+    Tok.setLocation(SrcMgr.getInstantiationLoc(Tok.getLocation(), 
+                                               InstantiateLoc));
+  }
+  
+  // If this is the first token, set the lexical properties of the token to
+  // match the lexical properties of the macro identifier.
+  if (isFirstToken) {
+    Tok.setFlagValue(Token::StartOfLine , AtStartOfLine);
+    Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace);
+  }
+  
+  // Handle recursive expansion!
+  if (Tok.getIdentifierInfo() && !DisableMacroExpansion)
+    return PP.HandleIdentifier(Tok);
+
+  // Otherwise, return a normal token.
+}
+
+/// PasteTokens - Tok is the LHS of a ## operator, and CurToken is the ##
+/// operator.  Read the ## and RHS, and paste the LHS/RHS together.  If there
+/// are is another ## after it, chomp it iteratively.  Return the result as Tok.
+/// If this returns true, the caller should immediately return the token.
+bool TokenLexer::PasteTokens(Token &Tok) {
+  llvm::SmallVector<char, 128> Buffer;
+  do {
+    // Consume the ## operator.
+    SourceLocation PasteOpLoc = Tokens[CurToken].getLocation();
+    ++CurToken;
+    assert(!isAtEnd() && "No token on the RHS of a paste operator!");
+  
+    // Get the RHS token.
+    const Token &RHS = Tokens[CurToken];
+  
+    bool isInvalid = false;
+
+    // Allocate space for the result token.  This is guaranteed to be enough for
+    // the two tokens and a null terminator.
+    Buffer.resize(Tok.getLength() + RHS.getLength() + 1);
+    
+    // Get the spelling of the LHS token in Buffer.
+    const char *BufPtr = &Buffer[0];
+    unsigned LHSLen = PP.getSpelling(Tok, BufPtr);
+    if (BufPtr != &Buffer[0])   // Really, we want the chars in Buffer!
+      memcpy(&Buffer[0], BufPtr, LHSLen);
+    
+    BufPtr = &Buffer[LHSLen];
+    unsigned RHSLen = PP.getSpelling(RHS, BufPtr);
+    if (BufPtr != &Buffer[LHSLen])   // Really, we want the chars in Buffer!
+      memcpy(&Buffer[LHSLen], BufPtr, RHSLen);
+    
+    // Add null terminator.
+    Buffer[LHSLen+RHSLen] = '\0';
+    
+    // Trim excess space.
+    Buffer.resize(LHSLen+RHSLen+1);
+    
+    // Plop the pasted result (including the trailing newline and null) into a
+    // scratch buffer where we can lex it.
+    SourceLocation ResultTokLoc = PP.CreateString(&Buffer[0], Buffer.size());
+    
+    // Lex the resultant pasted token into Result.
+    Token Result;
+    
+    // Avoid testing /*, as the lexer would think it is the start of a comment
+    // and emit an error that it is unterminated.
+    if (Tok.is(tok::slash) && RHS.is(tok::star)) {
+      isInvalid = true;
+    } else if (Tok.is(tok::identifier) && RHS.is(tok::identifier)) {
+      // Common paste case: identifier+identifier = identifier.  Avoid creating
+      // a lexer and other overhead.
+      PP.IncrementPasteCounter(true);
+      Result.startToken();
+      Result.setKind(tok::identifier);
+      Result.setLocation(ResultTokLoc);
+      Result.setLength(LHSLen+RHSLen);
+    } else {
+      PP.IncrementPasteCounter(false);
+      
+      // Make a lexer to lex this string from.
+      SourceManager &SourceMgr = PP.getSourceManager();
+      const char *ResultStrData = SourceMgr.getCharacterData(ResultTokLoc);
+      
+      // Make a lexer object so that we lex and expand the paste result.
+      Lexer *TL = new Lexer(ResultTokLoc, PP, ResultStrData, 
+                            ResultStrData+LHSLen+RHSLen /*don't include null*/);
+      
+      // Lex a token in raw mode.  This way it won't look up identifiers
+      // automatically, lexing off the end will return an eof token, and
+      // warnings are disabled.  This returns true if the result token is the
+      // entire buffer.
+      bool IsComplete = TL->LexRawToken(Result);
+      
+      // If we got an EOF token, we didn't form even ONE token.  For example, we
+      // did "/ ## /" to get "//".
+      IsComplete &= Result.isNot(tok::eof);
+      isInvalid = !IsComplete;
+      
+      // We're now done with the temporary lexer.
+      delete TL;
+    }
+    
+    // If pasting the two tokens didn't form a full new token, this is an error.
+    // This occurs with "x ## +"  and other stuff.  Return with Tok unmodified
+    // and with RHS as the next token to lex.
+    if (isInvalid) {
+      // Test for the Microsoft extension of /##/ turning into // here on the
+      // error path.
+      if (PP.getLangOptions().Microsoft && Tok.is(tok::slash) && 
+          RHS.is(tok::slash)) {
+        HandleMicrosoftCommentPaste(Tok);
+        return true;
+      } else {
+        // TODO: If not in assembler language mode.
+        PP.Diag(PasteOpLoc, diag::err_pp_bad_paste, 
+                std::string(Buffer.begin(), Buffer.end()-1));
+        return false;
+      }
+    }
+    
+    // Turn ## into 'unknown' to avoid # ## # from looking like a paste
+    // operator.
+    if (Result.is(tok::hashhash))
+      Result.setKind(tok::unknown);
+    // FIXME: Turn __VA_ARGS__ into "not a token"?
+    
+    // Transfer properties of the LHS over the the Result.
+    Result.setFlagValue(Token::StartOfLine , Tok.isAtStartOfLine());
+    Result.setFlagValue(Token::LeadingSpace, Tok.hasLeadingSpace());
+    
+    // Finally, replace LHS with the result, consume the RHS, and iterate.
+    ++CurToken;
+    Tok = Result;
+  } while (!isAtEnd() && Tokens[CurToken].is(tok::hashhash));
+  
+  // Now that we got the result token, it will be subject to expansion.  Since
+  // token pasting re-lexes the result token in raw mode, identifier information
+  // isn't looked up.  As such, if the result is an identifier, look up id info.
+  if (Tok.is(tok::identifier)) {
+    // Look up the identifier info for the token.  We disabled identifier lookup
+    // by saying we're skipping contents, so we need to do this manually.
+    Tok.setIdentifierInfo(PP.LookUpIdentifierInfo(Tok));
+  }
+  return false;
+}
+
+/// isNextTokenLParen - If the next token lexed will pop this macro off the
+/// expansion stack, return 2.  If the next unexpanded token is a '(', return
+/// 1, otherwise return 0.
+unsigned TokenLexer::isNextTokenLParen() const {
+  // Out of tokens?
+  if (isAtEnd())
+    return 2;
+  return Tokens[CurToken].is(tok::l_paren);
+}
+
+
+/// HandleMicrosoftCommentPaste - In microsoft compatibility mode, /##/ pastes
+/// together to form a comment that comments out everything in the current
+/// macro, other active macros, and anything left on the current physical
+/// source line of the instantiated buffer.  Handle this by returning the
+/// first token on the next line.
+void TokenLexer::HandleMicrosoftCommentPaste(Token &Tok) {
+  // We 'comment out' the rest of this macro by just ignoring the rest of the
+  // tokens that have not been lexed yet, if any.
+  
+  // Since this must be a macro, mark the macro enabled now that it is no longer
+  // being expanded.
+  assert(Macro && "Token streams can't paste comments");
+  Macro->EnableMacro();
+  
+  PP.HandleMicrosoftCommentPaste(Tok);
+}