summaryrefslogtreecommitdiffstats
path: root/llvm/tools/llvm-rc/ResourceFileWriter.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/tools/llvm-rc/ResourceFileWriter.cpp')
-rw-r--r--llvm/tools/llvm-rc/ResourceFileWriter.cpp76
1 files changed, 60 insertions, 16 deletions
diff --git a/llvm/tools/llvm-rc/ResourceFileWriter.cpp b/llvm/tools/llvm-rc/ResourceFileWriter.cpp
index f141dc7e356..dadb7d691f7 100644
--- a/llvm/tools/llvm-rc/ResourceFileWriter.cpp
+++ b/llvm/tools/llvm-rc/ResourceFileWriter.cpp
@@ -110,6 +110,18 @@ static bool stripQuotes(StringRef &Str, bool &IsLongString) {
return true;
}
+static UTF16 cp1252ToUnicode(unsigned char C) {
+ static const UTF16 Map80[] = {
+ 0x20ac, 0x0081, 0x201a, 0x0192, 0x201e, 0x2026, 0x2020, 0x2021,
+ 0x02c6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008d, 0x017d, 0x008f,
+ 0x0090, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
+ 0x02dc, 0x2122, 0x0161, 0x203a, 0x0153, 0x009d, 0x017e, 0x0178,
+ };
+ if (C >= 0x80 && C <= 0x9F)
+ return Map80[C - 0x80];
+ return C;
+}
+
// Describes a way to handle '\0' characters when processing the string.
// rc.exe tool sometimes behaves in a weird way in postprocessing.
// If the string to be output is equivalent to a C-string (e.g. in MENU
@@ -132,10 +144,26 @@ enum class NullHandlingMethod {
// * Replace the escape sequences with their processed version.
// For identifiers, this is no-op.
static Error processString(StringRef Str, NullHandlingMethod NullHandler,
- bool &IsLongString, SmallVectorImpl<UTF16> &Result) {
+ bool &IsLongString, SmallVectorImpl<UTF16> &Result,
+ int CodePage) {
bool IsString = stripQuotes(Str, IsLongString);
SmallVector<UTF16, 128> Chars;
- convertUTF8ToUTF16String(Str, Chars);
+
+ // Convert the input bytes according to the chosen codepage.
+ if (CodePage == CpUtf8) {
+ convertUTF8ToUTF16String(Str, Chars);
+ } else if (CodePage == CpWin1252) {
+ for (char C : Str)
+ Chars.push_back(cp1252ToUnicode((unsigned char)C));
+ } else {
+ // For other, unknown codepages, only allow plain ASCII input.
+ for (char C : Str) {
+ if ((unsigned char)C > 0x7F)
+ return createError("Non-ASCII 8-bit codepoint (" + Twine(C) +
+ ") can't be interpreted in the current codepage");
+ Chars.push_back((unsigned char)C);
+ }
+ }
if (!IsString) {
// It's an identifier if it's not a string. Make all characters uppercase.
@@ -157,21 +185,35 @@ static Error processString(StringRef Str, NullHandlingMethod NullHandler,
if (Char > 0xFF)
return createError("Non-8-bit codepoint (" + Twine(Char) +
") can't occur in a user-defined narrow string");
+ }
+ }
+ Result.push_back(Char);
+ return Error::success();
+ };
+ auto AddEscapedChar = [AddRes, IsLongString, CodePage](UTF16 Char) -> Error {
+ if (!IsLongString) {
+ // Escaped chars in narrow strings have to be interpreted according to
+ // the chosen code page.
+ if (Char > 0xFF)
+ return createError("Non-8-bit escaped char (" + Twine(Char) +
+ ") can't occur in narrow string");
+ if (CodePage == CpUtf8) {
+ if (Char >= 0x80)
+ return createError("Unable to interpret single byte (" + Twine(Char) +
+ ") as UTF-8");
+ } else if (CodePage == CpWin1252) {
+ Char = cp1252ToUnicode(Char);
} else {
- // In case of narrow non-user strings, Windows RC converts
- // [0x80, 0xFF] chars according to the current codepage.
- // There is no 'codepage' concept settled in every supported platform,
- // so we should reject such inputs.
- if (Char > 0x7F && Char <= 0xFF)
+ // Unknown/unsupported codepage, only allow ASCII input.
+ if (Char > 0x7F)
return createError("Non-ASCII 8-bit codepoint (" + Twine(Char) +
") can't "
"occur in a non-Unicode string");
}
}
- Result.push_back(Char);
- return Error::success();
+ return AddRes(Char);
};
while (Pos < Chars.size()) {
@@ -223,7 +265,7 @@ static Error processString(StringRef Str, NullHandlingMethod NullHandler,
--RemainingChars;
}
- RETURN_IF_ERROR(AddRes(ReadInt));
+ RETURN_IF_ERROR(AddEscapedChar(ReadInt));
continue;
}
@@ -240,7 +282,7 @@ static Error processString(StringRef Str, NullHandlingMethod NullHandler,
++Pos;
}
- RETURN_IF_ERROR(AddRes(ReadInt));
+ RETURN_IF_ERROR(AddEscapedChar(ReadInt));
continue;
}
@@ -328,7 +370,8 @@ Error ResourceFileWriter::writeCString(StringRef Str, bool WriteTerminator) {
SmallVector<UTF16, 128> ProcessedString;
bool IsLongString;
RETURN_IF_ERROR(processString(Str, NullHandlingMethod::CutAtNull,
- IsLongString, ProcessedString));
+ IsLongString, ProcessedString,
+ Params.CodePage));
for (auto Ch : ProcessedString)
writeInt<uint16_t>(Ch);
if (WriteTerminator)
@@ -1142,6 +1185,7 @@ public:
static bool classof(const RCResource *Res) {
return Res->getKind() == RkStringTableBundle;
}
+ Twine getResourceTypeName() const override { return "STRINGTABLE"; }
};
Error ResourceFileWriter::visitStringTableBundle(const RCResource *Res) {
@@ -1168,7 +1212,7 @@ Error ResourceFileWriter::writeStringTableBundleBody(const RCResource *Base) {
SmallVector<UTF16, 128> Data;
RETURN_IF_ERROR(processString(Res->Bundle.Data[ID].getValueOr(StringRef()),
NullHandlingMethod::CutAtDoubleNull,
- IsLongString, Data));
+ IsLongString, Data, Params.CodePage));
if (AppendNull && Res->Bundle.Data[ID])
Data.push_back('\0');
RETURN_IF_ERROR(
@@ -1215,9 +1259,9 @@ Error ResourceFileWriter::writeUserDefinedBody(const RCResource *Base) {
SmallVector<UTF16, 128> ProcessedString;
bool IsLongString;
- RETURN_IF_ERROR(processString(Elem.getString(),
- NullHandlingMethod::UserResource,
- IsLongString, ProcessedString));
+ RETURN_IF_ERROR(
+ processString(Elem.getString(), NullHandlingMethod::UserResource,
+ IsLongString, ProcessedString, Params.CodePage));
for (auto Ch : ProcessedString) {
if (IsLongString) {
OpenPOWER on IntegriCloud