summaryrefslogtreecommitdiffstats
path: root/llvm/tools/llvm-rc/ResourceFileWriter.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/tools/llvm-rc/ResourceFileWriter.cpp')
-rw-r--r--llvm/tools/llvm-rc/ResourceFileWriter.cpp159
1 files changed, 155 insertions, 4 deletions
diff --git a/llvm/tools/llvm-rc/ResourceFileWriter.cpp b/llvm/tools/llvm-rc/ResourceFileWriter.cpp
index 1d23fb966f6..8b234ae96d6 100644
--- a/llvm/tools/llvm-rc/ResourceFileWriter.cpp
+++ b/llvm/tools/llvm-rc/ResourceFileWriter.cpp
@@ -122,24 +122,175 @@ enum class NullHandlingMethod {
CutAtDoubleNull // Terminate string on '\0\0'; strip final '\0'.
};
-// Parses an identifier or string and returns a processed version of it.
-// For now, it only strips the string boundaries, but TODO:
+// Parses an identifier or string and returns a processed version of it:
+// * String the string boundary quotes.
// * Squash "" to a single ".
// * Replace the escape sequences with their processed version.
// For identifiers, this is no-op.
static Error processString(StringRef Str, NullHandlingMethod NullHandler,
bool &IsLongString, SmallVectorImpl<UTF16> &Result) {
bool IsString = stripQuotes(Str, IsLongString);
- convertUTF8ToUTF16String(Str, Result);
+ SmallVector<UTF16, 128> Chars;
+ convertUTF8ToUTF16String(Str, Chars);
if (!IsString) {
// It's an identifier if it's not a string. Make all characters uppercase.
- for (UTF16 &Ch : Result) {
+ for (UTF16 &Ch : Chars) {
assert(Ch <= 0x7F && "We didn't allow identifiers to be non-ASCII");
Ch = toupper(Ch);
}
+ Result.swap(Chars);
return Error::success();
}
+ Result.reserve(Chars.size());
+ size_t Pos = 0;
+
+ auto AddRes = [&Result, NullHandler, IsLongString](UTF16 Char) -> Error {
+ if (!IsLongString) {
+ if (NullHandler == NullHandlingMethod::UserResource) {
+ // Narrow strings in user-defined resources are *not* output in
+ // UTF-16 format.
+ if (Char > 0xFF)
+ return createError("Non-8-bit codepoint (" + Twine(Char) +
+ ") can't occur in a user-defined narrow string");
+
+ } else {
+ // In case of narrow non-user strings, Windows RC converts
+ // [0x80, 0xFF] chars according to the current codepage.
+ // There is no 'codepage' concept settled in every supported platform,
+ // so we should reject such inputs.
+ if (Char > 0x7F && Char <= 0xFF)
+ return createError("Non-ASCII 8-bit codepoint (" + Twine(Char) +
+ ") can't "
+ "occur in a non-Unicode string");
+ }
+ }
+
+ Result.push_back(Char);
+ return Error::success();
+ };
+
+ while (Pos < Chars.size()) {
+ UTF16 CurChar = Chars[Pos];
+ ++Pos;
+
+ // Strip double "".
+ if (CurChar == '"') {
+ if (Pos == Chars.size() || Chars[Pos] != '"')
+ return createError("Expected \"\"");
+ ++Pos;
+ RETURN_IF_ERROR(AddRes('"'));
+ continue;
+ }
+
+ if (CurChar == '\\') {
+ UTF16 TypeChar = Chars[Pos];
+ ++Pos;
+
+ if (TypeChar == 'x' || TypeChar == 'X') {
+ // Read a hex number. Max number of characters to read differs between
+ // narrow and wide strings.
+ UTF16 ReadInt = 0;
+ size_t RemainingChars = IsLongString ? 4 : 2;
+ // We don't want to read non-ASCII hex digits. std:: functions past
+ // 0xFF invoke UB.
+ //
+ // FIXME: actually, Microsoft version probably doesn't check this
+ // condition and uses their Unicode version of 'isxdigit'. However,
+ // there are some hex-digit Unicode character outside of ASCII, and
+ // some of these are actually accepted by rc.exe, the notable example
+ // being fullwidth forms (U+FF10..U+FF19 etc.) These can be written
+ // instead of ASCII digits in \x... escape sequence and get accepted.
+ // However, the resulting hexcodes seem totally unpredictable.
+ // We think it's infeasible to try to reproduce this behavior, nor to
+ // put effort in order to detect it.
+ while (RemainingChars && Pos < Chars.size() && Chars[Pos] < 0x80) {
+ if (!isxdigit(Chars[Pos]))
+ break;
+ char Digit = tolower(Chars[Pos]);
+ ++Pos;
+
+ ReadInt <<= 4;
+ if (isdigit(Digit))
+ ReadInt |= Digit - '0';
+ else
+ ReadInt |= Digit - 'a' + 10;
+
+ --RemainingChars;
+ }
+
+ RETURN_IF_ERROR(AddRes(ReadInt));
+ continue;
+ }
+
+ if (TypeChar >= '0' && TypeChar < '8') {
+ // Read an octal number. Note that we've already read the first digit.
+ UTF16 ReadInt = TypeChar - '0';
+ size_t RemainingChars = IsLongString ? 6 : 2;
+
+ while (RemainingChars && Pos < Chars.size() && Chars[Pos] >= '0' &&
+ Chars[Pos] < '8') {
+ ReadInt <<= 3;
+ ReadInt |= Chars[Pos] - '0';
+ --RemainingChars;
+ ++Pos;
+ }
+
+ RETURN_IF_ERROR(AddRes(ReadInt));
+
+ continue;
+ }
+
+ switch (TypeChar) {
+ case 'A':
+ case 'a':
+ // Windows '\a' translates into '\b' (Backspace).
+ RETURN_IF_ERROR(AddRes('\b'));
+ break;
+
+ case 'n': // Somehow, RC doesn't recognize '\N' and '\R'.
+ RETURN_IF_ERROR(AddRes('\n'));
+ break;
+
+ case 'r':
+ RETURN_IF_ERROR(AddRes('\r'));
+ break;
+
+ case 'T':
+ case 't':
+ RETURN_IF_ERROR(AddRes('\t'));
+ break;
+
+ case '\\':
+ RETURN_IF_ERROR(AddRes('\\'));
+ break;
+
+ case '"':
+ // RC accepts \" only if another " comes afterwards; then, \"" means
+ // a single ".
+ if (Pos == Chars.size() || Chars[Pos] != '"')
+ return createError("Expected \\\"\"");
+ ++Pos;
+ RETURN_IF_ERROR(AddRes('"'));
+ break;
+
+ default:
+ // If TypeChar means nothing, \ is should be output to stdout with
+ // following char. However, rc.exe consumes these characters when
+ // dealing with wide strings.
+ if (!IsLongString) {
+ RETURN_IF_ERROR(AddRes('\\'));
+ RETURN_IF_ERROR(AddRes(TypeChar));
+ }
+ break;
+ }
+
+ continue;
+ }
+
+ // If nothing interesting happens, just output the character.
+ RETURN_IF_ERROR(AddRes(CurChar));
+ }
switch (NullHandler) {
case NullHandlingMethod::CutAtNull:
OpenPOWER on IntegriCloud