summaryrefslogtreecommitdiffstats
path: root/clang/utils/TableGen/ClangCommentHTMLNamedCharacterReferenceEmitter.cpp
diff options
context:
space:
mode:
authorDmitri Gribenko <gribozavr@gmail.com>2013-01-30 14:29:28 +0000
committerDmitri Gribenko <gribozavr@gmail.com>2013-01-30 14:29:28 +0000
commit28800da1b352a43175aaa8d224d7fe6895b014be (patch)
tree5949959451f50794d4331e29f5315d2a7b9d34fb /clang/utils/TableGen/ClangCommentHTMLNamedCharacterReferenceEmitter.cpp
parent32832e61767f11d87f67fafa085f6860a9b3733f (diff)
downloadbcm5719-llvm-28800da1b352a43175aaa8d224d7fe6895b014be.tar.gz
bcm5719-llvm-28800da1b352a43175aaa8d224d7fe6895b014be.zip
Comment parsing: resolve more named character references
This reimplements r173850 with a better approach: (1) use a TableGen-generated matcher instead of doing a linear search; (2) avoid allocations for new strings by converting code points to string iterals with TableGen. llvm-svn: 173931
Diffstat (limited to 'clang/utils/TableGen/ClangCommentHTMLNamedCharacterReferenceEmitter.cpp')
-rw-r--r--clang/utils/TableGen/ClangCommentHTMLNamedCharacterReferenceEmitter.cpp83
1 files changed, 83 insertions, 0 deletions
diff --git a/clang/utils/TableGen/ClangCommentHTMLNamedCharacterReferenceEmitter.cpp b/clang/utils/TableGen/ClangCommentHTMLNamedCharacterReferenceEmitter.cpp
new file mode 100644
index 00000000000..3afe2b73f0a
--- /dev/null
+++ b/clang/utils/TableGen/ClangCommentHTMLNamedCharacterReferenceEmitter.cpp
@@ -0,0 +1,83 @@
+//===--- ClangCommentHTMLNamedCharacterReferenceEmitter.cpp -----------------=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tablegen backend emits an fficient function to translate HTML named
+// character references to UTF-8 sequences.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/ConvertUTF.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/StringMatcher.h"
+#include <vector>
+
+using namespace llvm;
+
+/// \brief Convert a code point to the corresponding UTF-8 sequence represented
+/// as a C string literal.
+///
+/// \returns true on success.
+static bool translateCodePointToUTF8(unsigned CodePoint,
+ SmallVectorImpl<char> &CLiteral) {
+ char Translated[UNI_MAX_UTF8_BYTES_PER_CODE_POINT];
+ char *TranslatedPtr = Translated;
+ if (!ConvertCodePointToUTF8(CodePoint, TranslatedPtr))
+ return false;
+
+ StringRef UTF8(Translated, TranslatedPtr - Translated);
+
+ raw_svector_ostream OS(CLiteral);
+ OS << "\"";
+ for (size_t i = 0, e = UTF8.size(); i != e; ++i) {
+ OS << "\\x";
+ OS.write_hex(static_cast<unsigned char>(UTF8[i]));
+ }
+ OS << "\"";
+
+ return true;
+}
+
+namespace clang {
+void EmitClangCommentHTMLNamedCharacterReferences(RecordKeeper &Records,
+ raw_ostream &OS) {
+ std::vector<Record *> Tags = Records.getAllDerivedDefinitions("NCR");
+ std::vector<StringMatcher::StringPair> NameToUTF8;
+ SmallString<32> CLiteral;
+ for (std::vector<Record *>::iterator I = Tags.begin(), E = Tags.end();
+ I != E; ++I) {
+ Record &Tag = **I;
+ std::string Spelling = Tag.getValueAsString("Spelling");
+ uint64_t CodePoint = Tag.getValueAsInt("CodePoint");
+ CLiteral.clear();
+ CLiteral.append("return ");
+ if (!translateCodePointToUTF8(CodePoint, CLiteral)) {
+ SrcMgr.PrintMessage(Tag.getLoc().front(),
+ SourceMgr::DK_Error,
+ Twine("invalid code point"));
+ continue;
+ }
+ CLiteral.append(";");
+
+ StringMatcher::StringPair Match(Spelling, CLiteral.str());
+ NameToUTF8.push_back(Match);
+ }
+
+ OS << "// This file is generated by TableGen. Do not edit.\n\n";
+
+ OS << "StringRef translateHTMLNamedCharacterReferenceToUTF8(\n"
+ " StringRef Name) {\n";
+ StringMatcher("Name", NameToUTF8, OS).Emit();
+ OS << " return StringRef();\n"
+ << "}\n\n";
+}
+
+} // end namespace clang
+
OpenPOWER on IntegriCloud