summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/test/tools/llvm-rc/Inputs/cp1252.rc4
-rw-r--r--llvm/test/tools/llvm-rc/Inputs/utf8-escape-narrow.rc5
-rw-r--r--llvm/test/tools/llvm-rc/Inputs/utf8.rc6
-rw-r--r--llvm/test/tools/llvm-rc/codepage.test44
-rw-r--r--llvm/test/tools/llvm-rc/helpmsg.test1
-rw-r--r--llvm/tools/llvm-rc/Opts.td4
-rw-r--r--llvm/tools/llvm-rc/ResourceFileWriter.cpp76
-rw-r--r--llvm/tools/llvm-rc/ResourceFileWriter.h16
-rw-r--r--llvm/tools/llvm-rc/llvm-rc.cpp18
9 files changed, 153 insertions, 21 deletions
diff --git a/llvm/test/tools/llvm-rc/Inputs/cp1252.rc b/llvm/test/tools/llvm-rc/Inputs/cp1252.rc
new file mode 100644
index 00000000000..f1dd948aead
--- /dev/null
+++ b/llvm/test/tools/llvm-rc/Inputs/cp1252.rc
@@ -0,0 +1,4 @@
+STRINGTABLE {
+ 1 "åäö © ƒ \xe5\xe4\366 \251 \x83"
+ 2 L"åäö © ƒ \xe5\xe4\366 \251 \x0192"
+}
diff --git a/llvm/test/tools/llvm-rc/Inputs/utf8-escape-narrow.rc b/llvm/test/tools/llvm-rc/Inputs/utf8-escape-narrow.rc
new file mode 100644
index 00000000000..311968c4d71
--- /dev/null
+++ b/llvm/test/tools/llvm-rc/Inputs/utf8-escape-narrow.rc
@@ -0,0 +1,5 @@
+STRINGTABLE {
+ // One can't pass UTF-8 sequences via multiple escaped chars - in narrow
+ // strings in UTF-8 mode, only ASCII chars can be entered via escapes.
+ 1 "åäö \xc3\xa5"
+}
diff --git a/llvm/test/tools/llvm-rc/Inputs/utf8.rc b/llvm/test/tools/llvm-rc/Inputs/utf8.rc
new file mode 100644
index 00000000000..20ef99116c9
--- /dev/null
+++ b/llvm/test/tools/llvm-rc/Inputs/utf8.rc
@@ -0,0 +1,6 @@
+STRINGTABLE {
+ // One can't pass UTF-8 sequences via multiple escaped chars - in narrow
+ // strings in UTF-8 mode, only ASCII chars can be entered via escapes.
+ 1 "åäö © \x61"
+ 2 L"åäö © \xe5\xe4\366 \251"
+}
diff --git a/llvm/test/tools/llvm-rc/codepage.test b/llvm/test/tools/llvm-rc/codepage.test
new file mode 100644
index 00000000000..ce17e0a6b2b
--- /dev/null
+++ b/llvm/test/tools/llvm-rc/codepage.test
@@ -0,0 +1,44 @@
+; RUN: llvm-rc /C 65001 /FO %t.utf8.res %p/Inputs/utf8.rc
+; RUN: llvm-readobj %t.utf8.res | FileCheck %s --check-prefix=UTF8
+
+; UTF8: Resource type (int): 6
+; UTF8-NEXT: Resource name (int): 1
+; UTF8-NEXT: Data version: 0
+; UTF8-NEXT: Memory flags: 0x1030
+; UTF8-NEXT: Language ID: 1033
+; UTF8-NEXT: Version (major): 0
+; UTF8-NEXT: Version (minor): 0
+; UTF8-NEXT: Characteristics: 0
+; UTF8-NEXT: Data size: 68
+; UTF8-NEXT: Data: (
+; UTF8-NEXT: 0000: 00000700 E500E400 F6002000 A9002000 |.......... ... .|
+; UTF8-NEXT: 0010: 61000B00 E500E400 F6002000 A9002000 |a......... ... .|
+; UTF8-NEXT: 0020: E500E400 F6002000 A9000000 00000000 |...... .........|
+; UTF8-NEXT: 0030: 00000000 00000000 00000000 00000000 |................|
+; UTF8-NEXT: 0040: 00000000 |....|
+; UTF8-NEXT: )
+
+; RUN: not llvm-rc /C 65001 /FO %t.utf8-escape-narrow.res %p/Inputs/utf8-escape-narrow.rc 2>&1 | FileCheck %s --check-prefix UTF8_ESCAPE
+; UTF8_ESCAPE: llvm-rc: Error in STRINGTABLE statement (ID 1):
+; UTF8_ESCAPE-NEXT: Unable to interpret single byte (195) as UTF-8
+
+; RUN: llvm-rc /C 1252 /FO %t.cp1252.res %p/Inputs/cp1252.rc
+; RUN: llvm-readobj %t.cp1252.res | FileCheck %s --check-prefix=CP1252
+
+; CP1252: Resource type (int): 6
+; CP1252-NEXT: Resource name (int): 1
+; CP1252-NEXT: Data version: 0
+; CP1252-NEXT: Memory flags: 0x1030
+; CP1252-NEXT: Language ID: 1033
+; CP1252-NEXT: Version (major): 0
+; CP1252-NEXT: Version (minor): 0
+; CP1252-NEXT: Characteristics: 0
+; CP1252-NEXT: Data size: 92
+; CP1252-NEXT: Data: (
+; CP1252-NEXT: 0000: 00000F00 E500E400 F6002000 A9002000 |.......... ... .|
+; CP1252-NEXT: 0010: 92012000 E500E400 F6002000 A9002000 |.. ....... ... .|
+; CP1252-NEXT: 0020: 92010F00 E500E400 F6002000 A9002000 |.......... ... .|
+; CP1252-NEXT: 0030: 92012000 E500E400 F6002000 A9002000 |.. ....... ... .|
+; CP1252-NEXT: 0040: 92010000 00000000 00000000 00000000 |................|
+; CP1252-NEXT: 0050: 00000000 00000000 00000000 |............|
+; CP1252-NEXT: )
diff --git a/llvm/test/tools/llvm-rc/helpmsg.test b/llvm/test/tools/llvm-rc/helpmsg.test
index 2c2814abc66..e91d6369c62 100644
--- a/llvm/test/tools/llvm-rc/helpmsg.test
+++ b/llvm/test/tools/llvm-rc/helpmsg.test
@@ -7,6 +7,7 @@
; CHECK-DAG: USAGE: rc [options] <inputs>
; CHECK-DAG: OPTIONS:
; CHECK-NEXT: /? Display this help and exit.
+; CHECK-NEXT: /C <value> Set the codepage used for input strings.
; CHECK-NEXT: /dry-run Don't compile the input; only try to parse it.
; CHECK-NEXT: /D <value> Define a symbol for the C preprocessor.
; CHECK-NEXT: /FO <value> Change the output file location.
diff --git a/llvm/tools/llvm-rc/Opts.td b/llvm/tools/llvm-rc/Opts.td
index 9792aa582cb..11f40f57103 100644
--- a/llvm/tools/llvm-rc/Opts.td
+++ b/llvm/tools/llvm-rc/Opts.td
@@ -35,6 +35,9 @@ def H : Flag<[ "/", "-" ], "H">,
def DRY_RUN : Flag<[ "/", "-" ], "dry-run">,
HelpText<"Don't compile the input; only try to parse it.">;
+def CODEPAGE : JoinedOrSeparate<[ "/", "-" ], "C">,
+ HelpText<"Set the codepage used for input strings.">;
+
// Unused switches (at least for now). These will stay unimplemented
// in an early stage of development and can be ignored. However, we need to
// parse them in order to preserve the compatibility with the original tool.
@@ -44,7 +47,6 @@ def R : Flag<[ "/", "-" ], "R">;
def SL : Flag<[ "/", "-" ], "SL">;
// (Codepages support.)
-def C : Flag<[ "/", "-" ], "C">;
def W : Flag<[ "/", "-" ], "W">;
// (Support of MUI and similar.)
diff --git a/llvm/tools/llvm-rc/ResourceFileWriter.cpp b/llvm/tools/llvm-rc/ResourceFileWriter.cpp
index f141dc7e356..dadb7d691f7 100644
--- a/llvm/tools/llvm-rc/ResourceFileWriter.cpp
+++ b/llvm/tools/llvm-rc/ResourceFileWriter.cpp
@@ -110,6 +110,18 @@ static bool stripQuotes(StringRef &Str, bool &IsLongString) {
return true;
}
+static UTF16 cp1252ToUnicode(unsigned char C) {
+ static const UTF16 Map80[] = {
+ 0x20ac, 0x0081, 0x201a, 0x0192, 0x201e, 0x2026, 0x2020, 0x2021,
+ 0x02c6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008d, 0x017d, 0x008f,
+ 0x0090, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
+ 0x02dc, 0x2122, 0x0161, 0x203a, 0x0153, 0x009d, 0x017e, 0x0178,
+ };
+ if (C >= 0x80 && C <= 0x9F)
+ return Map80[C - 0x80];
+ return C;
+}
+
// Describes a way to handle '\0' characters when processing the string.
// rc.exe tool sometimes behaves in a weird way in postprocessing.
// If the string to be output is equivalent to a C-string (e.g. in MENU
@@ -132,10 +144,26 @@ enum class NullHandlingMethod {
// * Replace the escape sequences with their processed version.
// For identifiers, this is no-op.
static Error processString(StringRef Str, NullHandlingMethod NullHandler,
- bool &IsLongString, SmallVectorImpl<UTF16> &Result) {
+ bool &IsLongString, SmallVectorImpl<UTF16> &Result,
+ int CodePage) {
bool IsString = stripQuotes(Str, IsLongString);
SmallVector<UTF16, 128> Chars;
- convertUTF8ToUTF16String(Str, Chars);
+
+ // Convert the input bytes according to the chosen codepage.
+ if (CodePage == CpUtf8) {
+ convertUTF8ToUTF16String(Str, Chars);
+ } else if (CodePage == CpWin1252) {
+ for (char C : Str)
+ Chars.push_back(cp1252ToUnicode((unsigned char)C));
+ } else {
+ // For other, unknown codepages, only allow plain ASCII input.
+ for (char C : Str) {
+ if ((unsigned char)C > 0x7F)
+ return createError("Non-ASCII 8-bit codepoint (" + Twine(C) +
+ ") can't be interpreted in the current codepage");
+ Chars.push_back((unsigned char)C);
+ }
+ }
if (!IsString) {
// It's an identifier if it's not a string. Make all characters uppercase.
@@ -157,21 +185,35 @@ static Error processString(StringRef Str, NullHandlingMethod NullHandler,
if (Char > 0xFF)
return createError("Non-8-bit codepoint (" + Twine(Char) +
") can't occur in a user-defined narrow string");
+ }
+ }
+ Result.push_back(Char);
+ return Error::success();
+ };
+ auto AddEscapedChar = [AddRes, IsLongString, CodePage](UTF16 Char) -> Error {
+ if (!IsLongString) {
+ // Escaped chars in narrow strings have to be interpreted according to
+ // the chosen code page.
+ if (Char > 0xFF)
+ return createError("Non-8-bit escaped char (" + Twine(Char) +
+ ") can't occur in narrow string");
+ if (CodePage == CpUtf8) {
+ if (Char >= 0x80)
+ return createError("Unable to interpret single byte (" + Twine(Char) +
+ ") as UTF-8");
+ } else if (CodePage == CpWin1252) {
+ Char = cp1252ToUnicode(Char);
} else {
- // In case of narrow non-user strings, Windows RC converts
- // [0x80, 0xFF] chars according to the current codepage.
- // There is no 'codepage' concept settled in every supported platform,
- // so we should reject such inputs.
- if (Char > 0x7F && Char <= 0xFF)
+ // Unknown/unsupported codepage, only allow ASCII input.
+ if (Char > 0x7F)
return createError("Non-ASCII 8-bit codepoint (" + Twine(Char) +
") can't "
"occur in a non-Unicode string");
}
}
- Result.push_back(Char);
- return Error::success();
+ return AddRes(Char);
};
while (Pos < Chars.size()) {
@@ -223,7 +265,7 @@ static Error processString(StringRef Str, NullHandlingMethod NullHandler,
--RemainingChars;
}
- RETURN_IF_ERROR(AddRes(ReadInt));
+ RETURN_IF_ERROR(AddEscapedChar(ReadInt));
continue;
}
@@ -240,7 +282,7 @@ static Error processString(StringRef Str, NullHandlingMethod NullHandler,
++Pos;
}
- RETURN_IF_ERROR(AddRes(ReadInt));
+ RETURN_IF_ERROR(AddEscapedChar(ReadInt));
continue;
}
@@ -328,7 +370,8 @@ Error ResourceFileWriter::writeCString(StringRef Str, bool WriteTerminator) {
SmallVector<UTF16, 128> ProcessedString;
bool IsLongString;
RETURN_IF_ERROR(processString(Str, NullHandlingMethod::CutAtNull,
- IsLongString, ProcessedString));
+ IsLongString, ProcessedString,
+ Params.CodePage));
for (auto Ch : ProcessedString)
writeInt<uint16_t>(Ch);
if (WriteTerminator)
@@ -1142,6 +1185,7 @@ public:
static bool classof(const RCResource *Res) {
return Res->getKind() == RkStringTableBundle;
}
+ Twine getResourceTypeName() const override { return "STRINGTABLE"; }
};
Error ResourceFileWriter::visitStringTableBundle(const RCResource *Res) {
@@ -1168,7 +1212,7 @@ Error ResourceFileWriter::writeStringTableBundleBody(const RCResource *Base) {
SmallVector<UTF16, 128> Data;
RETURN_IF_ERROR(processString(Res->Bundle.Data[ID].getValueOr(StringRef()),
NullHandlingMethod::CutAtDoubleNull,
- IsLongString, Data));
+ IsLongString, Data, Params.CodePage));
if (AppendNull && Res->Bundle.Data[ID])
Data.push_back('\0');
RETURN_IF_ERROR(
@@ -1215,9 +1259,9 @@ Error ResourceFileWriter::writeUserDefinedBody(const RCResource *Base) {
SmallVector<UTF16, 128> ProcessedString;
bool IsLongString;
- RETURN_IF_ERROR(processString(Elem.getString(),
- NullHandlingMethod::UserResource,
- IsLongString, ProcessedString));
+ RETURN_IF_ERROR(
+ processString(Elem.getString(), NullHandlingMethod::UserResource,
+ IsLongString, ProcessedString, Params.CodePage));
for (auto Ch : ProcessedString) {
if (IsLongString) {
diff --git a/llvm/tools/llvm-rc/ResourceFileWriter.h b/llvm/tools/llvm-rc/ResourceFileWriter.h
index dcdebbf47fd..aef3bfa3c71 100644
--- a/llvm/tools/llvm-rc/ResourceFileWriter.h
+++ b/llvm/tools/llvm-rc/ResourceFileWriter.h
@@ -25,15 +25,25 @@ class MemoryBuffer;
namespace rc {
-struct SearchParams {
+enum CodePage {
+ CpAcp = 0, // The current used codepage. Since there's no such
+ // notion in LLVM what codepage it actually means,
+ // this only allows ASCII.
+ CpWin1252 = 1252, // A codepage where most 8 bit values correspond to
+ // unicode code points with the same value.
+ CpUtf8 = 65001, // UTF-8.
+};
+
+struct WriterParams {
std::vector<std::string> Include; // Additional folders to search for files.
std::vector<std::string> NoInclude; // Folders to exclude from file search.
StringRef InputFilePath; // The full path of the input file.
+ int CodePage = CpAcp; // The codepage for interpreting characters.
};
class ResourceFileWriter : public Visitor {
public:
- ResourceFileWriter(const SearchParams &Params,
+ ResourceFileWriter(const WriterParams &Params,
std::unique_ptr<raw_fd_ostream> Stream)
: Params(Params), FS(std::move(Stream)), IconCursorID(1) {
assert(FS && "Output stream needs to be provided to the serializator");
@@ -146,7 +156,7 @@ private:
Error writeVersionInfoBlock(const VersionInfoBlock &);
Error writeVersionInfoValue(const VersionInfoValue &);
- const SearchParams &Params;
+ const WriterParams &Params;
// Output stream handling.
std::unique_ptr<raw_fd_ostream> FS;
diff --git a/llvm/tools/llvm-rc/llvm-rc.cpp b/llvm/tools/llvm-rc/llvm-rc.cpp
index 6491473de32..1f0f16f1685 100644
--- a/llvm/tools/llvm-rc/llvm-rc.cpp
+++ b/llvm/tools/llvm-rc/llvm-rc.cpp
@@ -129,13 +129,29 @@ int main(int Argc, const char **Argv) {
}
}
- SearchParams Params;
+ WriterParams Params;
SmallString<128> InputFile(InArgsInfo[0]);
llvm::sys::fs::make_absolute(InputFile);
Params.InputFilePath = InputFile;
Params.Include = InputArgs.getAllArgValues(OPT_INCLUDE);
Params.NoInclude = InputArgs.getAllArgValues(OPT_NOINCLUDE);
+ if (InputArgs.hasArg(OPT_CODEPAGE)) {
+ if (InputArgs.getLastArgValue(OPT_CODEPAGE)
+ .getAsInteger(10, Params.CodePage))
+ fatalError("Invalid code page: " +
+ InputArgs.getLastArgValue(OPT_CODEPAGE));
+ switch (Params.CodePage) {
+ case CpAcp:
+ case CpWin1252:
+ case CpUtf8:
+ break;
+ default:
+ fatalError(
+ "Unsupported code page, only 0, 1252 and 65001 are supported!");
+ }
+ }
+
std::unique_ptr<ResourceFileWriter> Visitor;
bool IsDryRun = InputArgs.hasArg(OPT_DRY_RUN);
OpenPOWER on IntegriCloud