summaryrefslogtreecommitdiffstats
path: root/clang
diff options
context:
space:
mode:
Diffstat (limited to 'clang')
-rw-r--r--clang/include/clang/Basic/SourceManager.h4
-rw-r--r--clang/lib/Basic/SourceManager.cpp38
-rw-r--r--clang/tools/clang-format/ClangFormat.cpp27
-rw-r--r--clang/unittests/Basic/SourceManagerTest.cpp41
4 files changed, 70 insertions, 40 deletions
diff --git a/clang/include/clang/Basic/SourceManager.h b/clang/include/clang/Basic/SourceManager.h
index 3185ca0f4a2..ec1b0bcf989 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -226,6 +226,10 @@ namespace SrcMgr {
bool shouldFreeBuffer() const {
return (Buffer.getInt() & DoNotFreeFlag) == 0;
}
+
+ // If BufStr has an invalid BOM, returns the BOM name; otherwise, returns
+ // nullptr
+ static const char *getInvalidBOM(StringRef BufStr);
};
// Assert that the \c ContentCache objects will always be 8-byte aligned so
diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp
index 58b95289eaf..5f457d6f9e3 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -95,6 +95,29 @@ void ContentCache::replaceBuffer(const llvm::MemoryBuffer *B, bool DoNotFree) {
Buffer.setInt((B && DoNotFree) ? DoNotFreeFlag : 0);
}
+const char *ContentCache::getInvalidBOM(StringRef BufStr) {
+ // If the buffer is valid, check to see if it has a UTF Byte Order Mark
+ // (BOM). We only support UTF-8 with and without a BOM right now. See
+ // http://en.wikipedia.org/wiki/Byte_order_mark for more information.
+ const char *InvalidBOM =
+ llvm::StringSwitch<const char *>(BufStr)
+ .StartsWith(llvm::StringLiteral::withInnerNUL("\x00\x00\xFE\xFF"),
+ "UTF-32 (BE)")
+ .StartsWith(llvm::StringLiteral::withInnerNUL("\xFF\xFE\x00\x00"),
+ "UTF-32 (LE)")
+ .StartsWith("\xFE\xFF", "UTF-16 (BE)")
+ .StartsWith("\xFF\xFE", "UTF-16 (LE)")
+ .StartsWith("\x2B\x2F\x76", "UTF-7")
+ .StartsWith("\xF7\x64\x4C", "UTF-1")
+ .StartsWith("\xDD\x73\x66\x73", "UTF-EBCDIC")
+ .StartsWith("\x0E\xFE\xFF", "SCSU")
+ .StartsWith("\xFB\xEE\x28", "BOCU-1")
+ .StartsWith("\x84\x31\x95\x33", "GB-18030")
+ .Default(nullptr);
+
+ return InvalidBOM;
+}
+
const llvm::MemoryBuffer *ContentCache::getBuffer(DiagnosticsEngine &Diag,
FileManager &FM,
SourceLocation Loc,
@@ -190,20 +213,7 @@ const llvm::MemoryBuffer *ContentCache::getBuffer(DiagnosticsEngine &Diag,
// (BOM). We only support UTF-8 with and without a BOM right now. See
// http://en.wikipedia.org/wiki/Byte_order_mark for more information.
StringRef BufStr = Buffer.getPointer()->getBuffer();
- const char *InvalidBOM = llvm::StringSwitch<const char *>(BufStr)
- .StartsWith(llvm::StringLiteral::withInnerNUL("\x00\x00\xFE\xFF"),
- "UTF-32 (BE)")
- .StartsWith(llvm::StringLiteral::withInnerNUL("\xFF\xFE\x00\x00"),
- "UTF-32 (LE)")
- .StartsWith("\xFE\xFF", "UTF-16 (BE)")
- .StartsWith("\xFF\xFE", "UTF-16 (LE)")
- .StartsWith("\x2B\x2F\x76", "UTF-7")
- .StartsWith("\xF7\x64\x4C", "UTF-1")
- .StartsWith("\xDD\x73\x66\x73", "UTF-EBCDIC")
- .StartsWith("\x0E\xFE\xFF", "SCSU")
- .StartsWith("\xFB\xEE\x28", "BOCU-1")
- .StartsWith("\x84\x31\x95\x33", "GB-18030")
- .Default(nullptr);
+ const char *InvalidBOM = getInvalidBOM(BufStr);
if (InvalidBOM) {
Diag.Report(Loc, diag::err_unsupported_bom)
diff --git a/clang/tools/clang-format/ClangFormat.cpp b/clang/tools/clang-format/ClangFormat.cpp
index a10541d88f0..cbbb52bd0aa 100644
--- a/clang/tools/clang-format/ClangFormat.cpp
+++ b/clang/tools/clang-format/ClangFormat.cpp
@@ -289,31 +289,6 @@ static void outputReplacementsXML(const Replacements &Replaces) {
}
}
-// If BufStr has an invalid BOM, returns the BOM name; otherwise, returns
-// nullptr.
-static const char *getInValidBOM(StringRef BufStr) {
- // Check to see if the buffer has a UTF Byte Order Mark (BOM).
- // We only support UTF-8 with and without a BOM right now. See
- // https://en.wikipedia.org/wiki/Byte_order_mark#Byte_order_marks_by_encoding
- // for more information.
- const char *InvalidBOM =
- llvm::StringSwitch<const char *>(BufStr)
- .StartsWith(llvm::StringLiteral::withInnerNUL("\x00\x00\xFE\xFF"),
- "UTF-32 (BE)")
- .StartsWith(llvm::StringLiteral::withInnerNUL("\xFF\xFE\x00\x00"),
- "UTF-32 (LE)")
- .StartsWith("\xFE\xFF", "UTF-16 (BE)")
- .StartsWith("\xFF\xFE", "UTF-16 (LE)")
- .StartsWith("\x2B\x2F\x76", "UTF-7")
- .StartsWith("\xF7\x64\x4C", "UTF-1")
- .StartsWith("\xDD\x73\x66\x73", "UTF-EBCDIC")
- .StartsWith("\x0E\xFE\xFF", "SCSU")
- .StartsWith("\xFB\xEE\x28", "BOCU-1")
- .StartsWith("\x84\x31\x95\x33", "GB-18030")
- .Default(nullptr);
- return InvalidBOM;
-}
-
static bool
emitReplacementWarnings(const Replacements &Replaces, StringRef AssumedFileName,
const std::unique_ptr<llvm::MemoryBuffer> &Code) {
@@ -412,7 +387,7 @@ static bool format(StringRef FileName) {
StringRef BufStr = Code->getBuffer();
- const char *InvalidBOM = getInValidBOM(BufStr);
+ const char *InvalidBOM = SrcMgr::ContentCache::getInvalidBOM(BufStr);
if (InvalidBOM) {
errs() << "error: encoding with unsupported byte order mark \""
diff --git a/clang/unittests/Basic/SourceManagerTest.cpp b/clang/unittests/Basic/SourceManagerTest.cpp
index bc7031e1fd2..465f7a06f71 100644
--- a/clang/unittests/Basic/SourceManagerTest.cpp
+++ b/clang/unittests/Basic/SourceManagerTest.cpp
@@ -200,6 +200,47 @@ TEST_F(SourceManagerTest, locationPrintTest) {
"</mainFile.cpp:1:1, /test-header.h:1:1>");
}
+TEST_F(SourceManagerTest, getInvalidBOM) {
+ ASSERT_EQ(SrcMgr::ContentCache::getInvalidBOM(""), nullptr);
+ ASSERT_EQ(SrcMgr::ContentCache::getInvalidBOM("\x00\x00\x00"), nullptr);
+ ASSERT_EQ(SrcMgr::ContentCache::getInvalidBOM("\xFF\xFF\xFF"), nullptr);
+ ASSERT_EQ(SrcMgr::ContentCache::getInvalidBOM("#include <iostream>"),
+ nullptr);
+
+ ASSERT_EQ(StringRef(SrcMgr::ContentCache::getInvalidBOM(
+ "\xFE\xFF#include <iostream>")),
+ "UTF-16 (BE)");
+ ASSERT_EQ(StringRef(SrcMgr::ContentCache::getInvalidBOM(
+ "\xFF\xFE#include <iostream>")),
+ "UTF-16 (LE)");
+ ASSERT_EQ(StringRef(SrcMgr::ContentCache::getInvalidBOM(
+ "\x2B\x2F\x76#include <iostream>")),
+ "UTF-7");
+ ASSERT_EQ(StringRef(SrcMgr::ContentCache::getInvalidBOM(
+ "\xF7\x64\x4C#include <iostream>")),
+ "UTF-1");
+ ASSERT_EQ(StringRef(SrcMgr::ContentCache::getInvalidBOM(
+ "\xDD\x73\x66\x73#include <iostream>")),
+ "UTF-EBCDIC");
+ ASSERT_EQ(StringRef(SrcMgr::ContentCache::getInvalidBOM(
+ "\x0E\xFE\xFF#include <iostream>")),
+ "SCSU");
+ ASSERT_EQ(StringRef(SrcMgr::ContentCache::getInvalidBOM(
+ "\xFB\xEE\x28#include <iostream>")),
+ "BOCU-1");
+ ASSERT_EQ(StringRef(SrcMgr::ContentCache::getInvalidBOM(
+ "\x84\x31\x95\x33#include <iostream>")),
+ "GB-18030");
+ ASSERT_EQ(StringRef(SrcMgr::ContentCache::getInvalidBOM(
+ llvm::StringLiteral::withInnerNUL(
+ "\x00\x00\xFE\xFF#include <iostream>"))),
+ "UTF-32 (BE)");
+ ASSERT_EQ(StringRef(SrcMgr::ContentCache::getInvalidBOM(
+ llvm::StringLiteral::withInnerNUL(
+ "\xFF\xFE\x00\x00#include <iostream>"))),
+ "UTF-32 (LE)");
+}
+
#if defined(LLVM_ON_UNIX)
TEST_F(SourceManagerTest, getMacroArgExpandedLocation) {
OpenPOWER on IntegriCloud