summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/Support/Program.h34
-rw-r--r--llvm/lib/Support/Unix/Program.inc18
-rw-r--r--llvm/lib/Support/Windows/Path.inc20
-rw-r--r--llvm/lib/Support/Windows/Program.inc46
-rw-r--r--llvm/lib/Support/Windows/WindowsSupport.h3
-rw-r--r--llvm/unittests/Support/ProgramTest.cpp50
6 files changed, 167 insertions, 4 deletions
diff --git a/llvm/include/llvm/Support/Program.h b/llvm/include/llvm/Support/Program.h
index 51279a9b864..01165e8bf74 100644
--- a/llvm/include/llvm/Support/Program.h
+++ b/llvm/include/llvm/Support/Program.h
@@ -126,6 +126,40 @@ struct ProcessInfo {
/// argument length limits.
bool argumentsFitWithinSystemLimits(ArrayRef<const char*> Args);
+ /// File encoding options when writing contents that a non-UTF8 tool will
+ /// read (on Windows systems). For UNIX, we always use UTF-8.
+ enum WindowsEncodingMethod {
+ /// UTF-8 is the LLVM native encoding, being the same as "do not perform
+ /// encoding conversion".
+ WEM_UTF8,
+ WEM_CurrentCodePage,
+ WEM_UTF16
+ };
+
+ /// Saves the UTF8-encoded \p contents string into the file \p FileName
+ /// using a specific encoding.
+ ///
+ /// This write file function adds the possibility to choose which encoding
+ /// to use when writing a text file. On Windows, this is important when
+ /// writing files with internationalization support with an encoding that is
+ /// different from the one used in LLVM (UTF-8). We use this when writing
+ /// response files, since GCC tools on MinGW only understand legacy code
+ /// pages, and VisualStudio tools only understand UTF-16.
+ /// For UNIX, using different encodings is silently ignored, since all tools
+ /// work well with UTF-8.
+ /// This function assumes that you only use UTF-8 *text* data and will convert
+ /// it to your desired encoding before writing to the file.
+ ///
+ /// FIXME: We use EM_CurrentCodePage to write response files for GNU tools in
+ /// a MinGW/MinGW-w64 environment, which has serious flaws but currently is
+ /// our best shot to make gcc/ld understand international characters. This
+ /// should be changed as soon as binutils fix this to support UTF16 on mingw.
+ ///
+ /// \returns non-zero error_code if failed
+ std::error_code
+ writeFileWithEncoding(StringRef FileName, StringRef Contents,
+ WindowsEncodingMethod Encoding = WEM_UTF8);
+
/// This function waits for the process specified by \p PI to finish.
/// \returns A \see ProcessInfo struct with Pid set to:
/// \li The process id of the child process if the child process has changed
diff --git a/llvm/lib/Support/Unix/Program.inc b/llvm/lib/Support/Unix/Program.inc
index 63d7ec22ebb..905c78f79bc 100644
--- a/llvm/lib/Support/Unix/Program.inc
+++ b/llvm/lib/Support/Unix/Program.inc
@@ -19,6 +19,7 @@
#include "Unix.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/raw_ostream.h"
#include <llvm/Config/config.h>
#if HAVE_SYS_STAT_H
#include <sys/stat.h>
@@ -440,6 +441,23 @@ ProcessInfo sys::Wait(const ProcessInfo &PI, unsigned SecondsToWait,
return std::error_code();
}
+std::error_code
+llvm::sys::writeFileWithEncoding(StringRef FileName, StringRef Contents,
+ WindowsEncodingMethod Encoding /*unused*/) {
+ std::error_code EC;
+ llvm::raw_fd_ostream OS(FileName, EC, llvm::sys::fs::OpenFlags::F_Text);
+
+ if (EC)
+ return EC;
+
+ OS << Contents;
+
+ if (OS.has_error())
+ return std::make_error_code(std::errc::io_error);
+
+ return EC;
+}
+
bool llvm::sys::argumentsFitWithinSystemLimits(ArrayRef<const char*> Args) {
static long ArgMax = sysconf(_SC_ARG_MAX);
diff --git a/llvm/lib/Support/Windows/Path.inc b/llvm/lib/Support/Windows/Path.inc
index b09c1989699..bff82141b34 100644
--- a/llvm/lib/Support/Windows/Path.inc
+++ b/llvm/lib/Support/Windows/Path.inc
@@ -919,11 +919,13 @@ std::error_code UTF8ToUTF16(llvm::StringRef utf8,
return std::error_code();
}
-std::error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len,
- llvm::SmallVectorImpl<char> &utf8) {
+static
+std::error_code UTF16ToCodePage(unsigned codepage, const wchar_t *utf16,
+ size_t utf16_len,
+ llvm::SmallVectorImpl<char> &utf8) {
if (utf16_len) {
// Get length.
- int len = ::WideCharToMultiByte(CP_UTF8, 0, utf16, utf16_len, utf8.begin(),
+ int len = ::WideCharToMultiByte(codepage, 0, utf16, utf16_len, utf8.begin(),
0, NULL, NULL);
if (len == 0)
@@ -933,7 +935,7 @@ std::error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len,
utf8.set_size(len);
// Now do the actual conversion.
- len = ::WideCharToMultiByte(CP_UTF8, 0, utf16, utf16_len, utf8.data(),
+ len = ::WideCharToMultiByte(codepage, 0, utf16, utf16_len, utf8.data(),
utf8.size(), NULL, NULL);
if (len == 0)
@@ -946,6 +948,16 @@ std::error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len,
return std::error_code();
}
+
+std::error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len,
+ llvm::SmallVectorImpl<char> &utf8) {
+ return UTF16ToCodePage(CP_UTF8, utf16, utf16_len, utf8);
+}
+
+std::error_code UTF16ToCurCP(const wchar_t *utf16, size_t utf16_len,
+ llvm::SmallVectorImpl<char> &utf8) {
+ return UTF16ToCodePage(CP_ACP, utf16, utf16_len, utf8);
+}
} // end namespace windows
} // end namespace sys
} // end namespace llvm
diff --git a/llvm/lib/Support/Windows/Program.inc b/llvm/lib/Support/Windows/Program.inc
index 74a0066de8d..affaf03eb61 100644
--- a/llvm/lib/Support/Windows/Program.inc
+++ b/llvm/lib/Support/Windows/Program.inc
@@ -12,7 +12,9 @@
//===----------------------------------------------------------------------===//
#include "WindowsSupport.h"
+#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/raw_ostream.h"
#include <cstdio>
#include <fcntl.h>
#include <io.h>
@@ -440,6 +442,50 @@ ProcessInfo sys::Wait(const ProcessInfo &PI, unsigned SecondsToWait,
return std::error_code();
}
+std::error_code
+llvm::sys::writeFileWithEncoding(StringRef FileName, StringRef Contents,
+ WindowsEncodingMethod Encoding) {
+ std::error_code EC;
+ llvm::raw_fd_ostream OS(FileName, EC, llvm::sys::fs::OpenFlags::F_Text);
+ if (EC)
+ return EC;
+
+ if (Encoding == WEM_UTF8) {
+ OS << Contents;
+ } else if (Encoding == WEM_CurrentCodePage) {
+ SmallVector<wchar_t, 1> ArgsUTF16;
+ SmallVector<char, 1> ArgsCurCP;
+
+ if ((EC = windows::UTF8ToUTF16(Contents, ArgsUTF16)))
+ return EC;
+
+ if ((EC = windows::UTF16ToCurCP(
+ ArgsUTF16.data(), ArgsUTF16.size(), ArgsCurCP)))
+ return EC;
+
+ OS.write(ArgsCurCP.data(), ArgsCurCP.size());
+ } else if (Encoding == WEM_UTF16) {
+ SmallVector<wchar_t, 1> ArgsUTF16;
+
+ if ((EC = windows::UTF8ToUTF16(Contents, ArgsUTF16)))
+ return EC;
+
+ // Endianness guessing
+ char BOM[2];
+ uint16_t src = UNI_UTF16_BYTE_ORDER_MARK_NATIVE;
+ memcpy(BOM, &src, 2);
+ OS.write(BOM, 2);
+ OS.write((char *)ArgsUTF16.data(), ArgsUTF16.size() << 1);
+ } else {
+ llvm_unreachable("Unknown encoding");
+ }
+
+ if (OS.has_error())
+ return std::make_error_code(std::errc::io_error);
+
+ return EC;
+}
+
bool llvm::sys::argumentsFitWithinSystemLimits(ArrayRef<const char*> Args) {
// The documented max length of the command line passed to CreateProcess.
static const size_t MaxCommandStringLength = 32768;
diff --git a/llvm/lib/Support/Windows/WindowsSupport.h b/llvm/lib/Support/Windows/WindowsSupport.h
index f68835b1a71..6d9c5fb24ff 100644
--- a/llvm/lib/Support/Windows/WindowsSupport.h
+++ b/llvm/lib/Support/Windows/WindowsSupport.h
@@ -166,6 +166,9 @@ namespace windows {
std::error_code UTF8ToUTF16(StringRef utf8, SmallVectorImpl<wchar_t> &utf16);
std::error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len,
SmallVectorImpl<char> &utf8);
+/// Convert from UTF16 to the current code page used in the system
+std::error_code UTF16ToCurCP(const wchar_t *utf16, size_t utf16_len,
+ SmallVectorImpl<char> &utf8);
} // end namespace windows
} // end namespace sys
} // end namespace llvm.
diff --git a/llvm/unittests/Support/ProgramTest.cpp b/llvm/unittests/Support/ProgramTest.cpp
index 4e7316fb3ac..c0e6e80e358 100644
--- a/llvm/unittests/Support/ProgramTest.cpp
+++ b/llvm/unittests/Support/ProgramTest.cpp
@@ -34,6 +34,16 @@ void sleep_for(unsigned int seconds) {
#error sleep_for is not implemented on your platform.
#endif
+#define ASSERT_NO_ERROR(x) \
+ if (std::error_code ASSERT_NO_ERROR_ec = x) { \
+ SmallString<128> MessageStorage; \
+ raw_svector_ostream Message(MessageStorage); \
+ Message << #x ": did not return errc::success.\n" \
+ << "error number: " << ASSERT_NO_ERROR_ec.value() << "\n" \
+ << "error message: " << ASSERT_NO_ERROR_ec.message() << "\n"; \
+ GTEST_FATAL_FAILURE_(MessageStorage.c_str()); \
+ } else { \
+ }
// From TestMain.cpp.
extern const char *TestMainArgv0;
@@ -220,4 +230,44 @@ TEST(ProgramTest, TestExecuteNegative) {
}
+#ifdef LLVM_ON_WIN32
+const char utf16le_text[] =
+ "\x6c\x00\x69\x00\x6e\x00\x67\x00\xfc\x00\x69\x00\xe7\x00\x61\x00";
+const char utf16be_text[] =
+ "\x00\x6c\x00\x69\x00\x6e\x00\x67\x00\xfc\x00\x69\x00\xe7\x00\x61";
+#endif
+const char utf8_text[] = "\x6c\x69\x6e\x67\xc3\xbc\x69\xc3\xa7\x61";
+
+TEST(ProgramTest, TestWriteWithSystemEncoding) {
+ SmallString<128> TestDirectory;
+ ASSERT_NO_ERROR(fs::createUniqueDirectory("program-test", TestDirectory));
+ errs() << "Test Directory: " << TestDirectory << '\n';
+ errs().flush();
+ SmallString<128> file_pathname(TestDirectory);
+ path::append(file_pathname, "international-file.txt");
+ // Only on Windows we should encode in UTF16. For other systems, use UTF8
+ ASSERT_NO_ERROR(sys::writeFileWithEncoding(file_pathname.c_str(), utf8_text,
+ sys::WEM_UTF16));
+ int fd = 0;
+ ASSERT_NO_ERROR(fs::openFileForRead(file_pathname.c_str(), fd));
+#if defined(LLVM_ON_WIN32)
+ char buf[18];
+ ASSERT_EQ(::read(fd, buf, 18), 18);
+ if (strncmp(buf, "\xfe\xff", 2) == 0) { // UTF16-BE
+ ASSERT_EQ(strncmp(&buf[2], utf16be_text, 16), 0);
+ } else if (strncmp(buf, "\xff\xfe", 2) == 0) { // UTF16-LE
+ ASSERT_EQ(strncmp(&buf[2], utf16le_text, 16), 0);
+ } else {
+ FAIL() << "Invalid BOM in UTF-16 file";
+ }
+#else
+ char buf[10];
+ ASSERT_EQ(::read(fd, buf, 10), 10);
+ ASSERT_EQ(strncmp(buf, utf8_text, 10), 0);
+#endif
+ ::close(fd);
+ ASSERT_NO_ERROR(fs::remove(file_pathname.str()));
+ ASSERT_NO_ERROR(fs::remove(TestDirectory.str()));
+}
+
} // end anonymous namespace
OpenPOWER on IntegriCloud