summaryrefslogtreecommitdiffstats
path: root/llvm/tools/llvm-rc/ResourceFileWriter.cpp
diff options
context:
space:
mode:
authorMartin Storsjo <martin@martin.st>2018-05-02 19:43:44 +0000
committerMartin Storsjo <martin@martin.st>2018-05-02 19:43:44 +0000
commitd1d046aa32395d8e9c459e877a03e716d47aca2c (patch)
treec2e7f98b2c476189bb52c52992ac4475a908c52d /llvm/tools/llvm-rc/ResourceFileWriter.cpp
parent4d64306a92a93a8e4f20c0c83e164c223b26b848 (diff)
downloadbcm5719-llvm-d1d046aa32395d8e9c459e877a03e716d47aca2c.tar.gz
bcm5719-llvm-d1d046aa32395d8e9c459e877a03e716d47aca2c.zip
[llvm-rc] Add rudimentary support for codepages
Only support UTF-8 (since LLVM contains UTF-8 parsing support already, and the code even does that already) and Windows-1252 (where most code points has the same value in unicode). Keep the existing default as only allowing ASCII input. Using the option type JoinedOrSeparate, since the real rc.exe handles options in this form, even if llvm-rc uses Separate for other similar existing options. Rename the struct SearchParams to WriterParams since it's now used for more than just include paths. Add a missing getResourceTypeName method to the BundleResource class, to fix error printing from within STRINGTABLE resources (used in tests). Differential Revision: https://reviews.llvm.org/D46238 llvm-svn: 331391
Diffstat (limited to 'llvm/tools/llvm-rc/ResourceFileWriter.cpp')
-rw-r--r--llvm/tools/llvm-rc/ResourceFileWriter.cpp76
1 files changed, 60 insertions, 16 deletions
diff --git a/llvm/tools/llvm-rc/ResourceFileWriter.cpp b/llvm/tools/llvm-rc/ResourceFileWriter.cpp
index f141dc7e356..dadb7d691f7 100644
--- a/llvm/tools/llvm-rc/ResourceFileWriter.cpp
+++ b/llvm/tools/llvm-rc/ResourceFileWriter.cpp
@@ -110,6 +110,18 @@ static bool stripQuotes(StringRef &Str, bool &IsLongString) {
return true;
}
+static UTF16 cp1252ToUnicode(unsigned char C) {
+ static const UTF16 Map80[] = {
+ 0x20ac, 0x0081, 0x201a, 0x0192, 0x201e, 0x2026, 0x2020, 0x2021,
+ 0x02c6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008d, 0x017d, 0x008f,
+ 0x0090, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
+ 0x02dc, 0x2122, 0x0161, 0x203a, 0x0153, 0x009d, 0x017e, 0x0178,
+ };
+ if (C >= 0x80 && C <= 0x9F)
+ return Map80[C - 0x80];
+ return C;
+}
+
// Describes a way to handle '\0' characters when processing the string.
// rc.exe tool sometimes behaves in a weird way in postprocessing.
// If the string to be output is equivalent to a C-string (e.g. in MENU
@@ -132,10 +144,26 @@ enum class NullHandlingMethod {
// * Replace the escape sequences with their processed version.
// For identifiers, this is no-op.
static Error processString(StringRef Str, NullHandlingMethod NullHandler,
- bool &IsLongString, SmallVectorImpl<UTF16> &Result) {
+ bool &IsLongString, SmallVectorImpl<UTF16> &Result,
+ int CodePage) {
bool IsString = stripQuotes(Str, IsLongString);
SmallVector<UTF16, 128> Chars;
- convertUTF8ToUTF16String(Str, Chars);
+
+ // Convert the input bytes according to the chosen codepage.
+ if (CodePage == CpUtf8) {
+ convertUTF8ToUTF16String(Str, Chars);
+ } else if (CodePage == CpWin1252) {
+ for (char C : Str)
+ Chars.push_back(cp1252ToUnicode((unsigned char)C));
+ } else {
+ // For other, unknown codepages, only allow plain ASCII input.
+ for (char C : Str) {
+ if ((unsigned char)C > 0x7F)
+ return createError("Non-ASCII 8-bit codepoint (" + Twine(C) +
+ ") can't be interpreted in the current codepage");
+ Chars.push_back((unsigned char)C);
+ }
+ }
if (!IsString) {
// It's an identifier if it's not a string. Make all characters uppercase.
@@ -157,21 +185,35 @@ static Error processString(StringRef Str, NullHandlingMethod NullHandler,
if (Char > 0xFF)
return createError("Non-8-bit codepoint (" + Twine(Char) +
") can't occur in a user-defined narrow string");
+ }
+ }
+ Result.push_back(Char);
+ return Error::success();
+ };
+ auto AddEscapedChar = [AddRes, IsLongString, CodePage](UTF16 Char) -> Error {
+ if (!IsLongString) {
+ // Escaped chars in narrow strings have to be interpreted according to
+ // the chosen code page.
+ if (Char > 0xFF)
+ return createError("Non-8-bit escaped char (" + Twine(Char) +
+ ") can't occur in narrow string");
+ if (CodePage == CpUtf8) {
+ if (Char >= 0x80)
+ return createError("Unable to interpret single byte (" + Twine(Char) +
+ ") as UTF-8");
+ } else if (CodePage == CpWin1252) {
+ Char = cp1252ToUnicode(Char);
} else {
- // In case of narrow non-user strings, Windows RC converts
- // [0x80, 0xFF] chars according to the current codepage.
- // There is no 'codepage' concept settled in every supported platform,
- // so we should reject such inputs.
- if (Char > 0x7F && Char <= 0xFF)
+ // Unknown/unsupported codepage, only allow ASCII input.
+ if (Char > 0x7F)
return createError("Non-ASCII 8-bit codepoint (" + Twine(Char) +
") can't "
"occur in a non-Unicode string");
}
}
- Result.push_back(Char);
- return Error::success();
+ return AddRes(Char);
};
while (Pos < Chars.size()) {
@@ -223,7 +265,7 @@ static Error processString(StringRef Str, NullHandlingMethod NullHandler,
--RemainingChars;
}
- RETURN_IF_ERROR(AddRes(ReadInt));
+ RETURN_IF_ERROR(AddEscapedChar(ReadInt));
continue;
}
@@ -240,7 +282,7 @@ static Error processString(StringRef Str, NullHandlingMethod NullHandler,
++Pos;
}
- RETURN_IF_ERROR(AddRes(ReadInt));
+ RETURN_IF_ERROR(AddEscapedChar(ReadInt));
continue;
}
@@ -328,7 +370,8 @@ Error ResourceFileWriter::writeCString(StringRef Str, bool WriteTerminator) {
SmallVector<UTF16, 128> ProcessedString;
bool IsLongString;
RETURN_IF_ERROR(processString(Str, NullHandlingMethod::CutAtNull,
- IsLongString, ProcessedString));
+ IsLongString, ProcessedString,
+ Params.CodePage));
for (auto Ch : ProcessedString)
writeInt<uint16_t>(Ch);
if (WriteTerminator)
@@ -1142,6 +1185,7 @@ public:
static bool classof(const RCResource *Res) {
return Res->getKind() == RkStringTableBundle;
}
+ Twine getResourceTypeName() const override { return "STRINGTABLE"; }
};
Error ResourceFileWriter::visitStringTableBundle(const RCResource *Res) {
@@ -1168,7 +1212,7 @@ Error ResourceFileWriter::writeStringTableBundleBody(const RCResource *Base) {
SmallVector<UTF16, 128> Data;
RETURN_IF_ERROR(processString(Res->Bundle.Data[ID].getValueOr(StringRef()),
NullHandlingMethod::CutAtDoubleNull,
- IsLongString, Data));
+ IsLongString, Data, Params.CodePage));
if (AppendNull && Res->Bundle.Data[ID])
Data.push_back('\0');
RETURN_IF_ERROR(
@@ -1215,9 +1259,9 @@ Error ResourceFileWriter::writeUserDefinedBody(const RCResource *Base) {
SmallVector<UTF16, 128> ProcessedString;
bool IsLongString;
- RETURN_IF_ERROR(processString(Elem.getString(),
- NullHandlingMethod::UserResource,
- IsLongString, ProcessedString));
+ RETURN_IF_ERROR(
+ processString(Elem.getString(), NullHandlingMethod::UserResource,
+ IsLongString, ProcessedString, Params.CodePage));
for (auto Ch : ProcessedString) {
if (IsLongString) {
OpenPOWER on IntegriCloud