summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/include/llvm/Support/raw_ostream.h8
-rw-r--r--llvm/lib/Support/Locale.cpp13
-rw-r--r--llvm/lib/Support/SourceMgr.cpp14
-rw-r--r--llvm/lib/Support/raw_ostream.cpp84
4 files changed, 92 insertions, 27 deletions
diff --git a/llvm/include/llvm/Support/raw_ostream.h b/llvm/include/llvm/Support/raw_ostream.h
index b9ea9b5817f..458b0998e82 100644
--- a/llvm/include/llvm/Support/raw_ostream.h
+++ b/llvm/include/llvm/Support/raw_ostream.h
@@ -367,12 +367,16 @@ class raw_fd_ostream : public raw_pwrite_stream {
int FD;
bool ShouldClose;
+ bool SupportsSeeking;
+
+ /// True if this fd refers to a Windows console device. Mintty and other
+ /// terminal emulators are TTYs, but they are not consoles.
+ bool IsWindowsConsole = false;
+
std::error_code EC;
uint64_t pos;
- bool SupportsSeeking;
-
/// See raw_ostream::write_impl.
void write_impl(const char *Ptr, size_t Size) override;
diff --git a/llvm/lib/Support/Locale.cpp b/llvm/lib/Support/Locale.cpp
index c4cfc5e8de0..1b3300b90f2 100644
--- a/llvm/lib/Support/Locale.cpp
+++ b/llvm/lib/Support/Locale.cpp
@@ -7,24 +7,11 @@ namespace sys {
namespace locale {
int columnWidth(StringRef Text) {
-#ifdef _WIN32
- return Text.size();
-#else
return llvm::sys::unicode::columnWidthUTF8(Text);
-#endif
}
bool isPrint(int UCS) {
-#ifdef _WIN32
- // Restrict characters that we'll try to print to the lower part of ASCII
- // except for the control characters (0x20 - 0x7E). In general one can not
- // reliably output code points U+0080 and higher using narrow character C/C++
- // output functions in Windows, because the meaning of the upper 128 codes is
- // determined by the active code page in the console.
- return ' ' <= UCS && UCS <= '~';
-#else
return llvm::sys::unicode::isPrintable(UCS);
-#endif
}
} // namespace locale
diff --git a/llvm/lib/Support/SourceMgr.cpp b/llvm/lib/Support/SourceMgr.cpp
index d8fde7fa899..e17e5ab0141 100644
--- a/llvm/lib/Support/SourceMgr.cpp
+++ b/llvm/lib/Support/SourceMgr.cpp
@@ -345,12 +345,18 @@ static void buildFixItLine(std::string &CaretLine, std::string &FixItLine,
static void printSourceLine(raw_ostream &S, StringRef LineContents) {
// Print out the source line one character at a time, so we can expand tabs.
for (unsigned i = 0, e = LineContents.size(), OutCol = 0; i != e; ++i) {
- if (LineContents[i] != '\t') {
- S << LineContents[i];
- ++OutCol;
- continue;
+ size_t NextTab = LineContents.find('\t', i);
+ // If there were no tabs left, print the rest, we are done.
+ if (NextTab == StringRef::npos) {
+ S << LineContents.drop_front(i);
+ break;
}
+ // Otherwise, print from i to NextTab.
+ S << LineContents.slice(i, NextTab);
+ OutCol += NextTab - i;
+ i = NextTab;
+
// If we have a tab, emit at least one space, then round up to 8 columns.
do {
S << ' ';
diff --git a/llvm/lib/Support/raw_ostream.cpp b/llvm/lib/Support/raw_ostream.cpp
index 1dae469958f..81df38f341c 100644
--- a/llvm/lib/Support/raw_ostream.cpp
+++ b/llvm/lib/Support/raw_ostream.cpp
@@ -60,6 +60,7 @@
#endif
#ifdef _WIN32
+#include "llvm/Support/ConvertUTF.h"
#include "Windows/WindowsSupport.h"
#endif
@@ -567,6 +568,12 @@ raw_fd_ostream::raw_fd_ostream(int fd, bool shouldClose, bool unbuffered)
if (FD <= STDERR_FILENO)
ShouldClose = false;
+#ifdef _WIN32
+ // Check if this is a console device. This is not equivalent to isatty.
+ IsWindowsConsole =
+ ::GetFileType((HANDLE)::_get_osfhandle(fd)) == FILE_TYPE_CHAR;
+#endif
+
// Get the starting position.
off_t loc = ::lseek(FD, 0, SEEK_CUR);
#ifdef _WIN32
@@ -609,10 +616,68 @@ raw_fd_ostream::~raw_fd_ostream() {
/*GenCrashDiag=*/false);
}
+#if defined(_WIN32)
+// The most reliable way to print unicode in a Windows console is with
+// WriteConsoleW. To use that, first transcode from UTF-8 to UTF-16. This
+// assumes that LLVM programs always print valid UTF-8 to the console. The data
+// might not be UTF-8 for two major reasons:
+// 1. The program is printing binary (-filetype=obj -o -), in which case it
+// would have been gibberish anyway.
+// 2. The program is printing text in a semi-ascii compatible codepage like
+// shift-jis or cp1252.
+//
+// Most LLVM programs don't produce non-ascii text unless they are quoting
+// user source input. A well-behaved LLVM program should either validate that
+// the input is UTF-8 or transcode from the local codepage to UTF-8 before
+// quoting it. If they don't, this may mess up the encoding, but this is still
+// probably the best compromise we can make.
+static bool write_console_impl(int FD, StringRef Data) {
+ SmallVector<wchar_t, 256> WideText;
+
+ // Fall back to ::write if it wasn't valid UTF-8.
+ if (auto EC = sys::windows::UTF8ToUTF16(Data, WideText))
+ return false;
+
+ // On Windows 7 and earlier, WriteConsoleW has a low maximum amount of data
+ // that can be written to the console at a time.
+ size_t MaxWriteSize = WideText.size();
+ if (!RunningWindows8OrGreater())
+ MaxWriteSize = 32767;
+
+ size_t WCharsWritten = 0;
+ do {
+ size_t WCharsToWrite =
+ std::min(MaxWriteSize, WideText.size() - WCharsWritten);
+ DWORD ActuallyWritten;
+ bool Success =
+ ::WriteConsoleW((HANDLE)::_get_osfhandle(FD), &WideText[WCharsWritten],
+ WCharsToWrite, &ActuallyWritten,
+ /*Reserved=*/nullptr);
+
+ // The most likely reason for WriteConsoleW to fail is that FD no longer
+ // points to a console. Fall back to ::write. If this isn't the first loop
+ // iteration, something is truly wrong.
+ if (!Success)
+ return false;
+
+ WCharsWritten += ActuallyWritten;
+ } while (WCharsWritten != WideText.size());
+ return true;
+}
+#endif
+
void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) {
assert(FD >= 0 && "File already closed.");
pos += Size;
+#if defined(_WIN32)
+ // If this is a Windows console device, try re-encoding from UTF-8 to UTF-16
+ // and using WriteConsoleW. If that fails, fall back to plain write().
+ if (IsWindowsConsole)
+ if (write_console_impl(FD, StringRef(Ptr, Size)))
+ return;
+#endif
+
// The maximum write size is limited to INT32_MAX. A write
// greater than SSIZE_MAX is implementation-defined in POSIX,
// and Windows _write requires 32 bit input.
@@ -622,12 +687,6 @@ void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) {
// It is observed that Linux returns EINVAL for a very large write (>2G).
// Make it a reasonably small value.
MaxWriteSize = 1024 * 1024 * 1024;
-#elif defined(_WIN32)
- // Writing a large size of output to Windows console returns ENOMEM. It seems
- // that, prior to Windows 8, WriteFile() is redirecting to WriteConsole(), and
- // the latter has a size limit (66000 bytes or less, depending on heap usage).
- if (::_isatty(FD) && !RunningWindows8OrGreater())
- MaxWriteSize = 32767;
#endif
do {
@@ -696,8 +755,17 @@ void raw_fd_ostream::pwrite_impl(const char *Ptr, size_t Size,
}
size_t raw_fd_ostream::preferred_buffer_size() const {
-#if !defined(_MSC_VER) && !defined(__MINGW32__) && !defined(__minix)
- // Windows and Minix have no st_blksize.
+#if defined(_WIN32)
+ // Disable buffering for console devices. Console output is re-encoded from
+ // UTF-8 to UTF-16 on Windows, and buffering it would require us to split the
+ // buffer on a valid UTF-8 codepoint boundary. Terminal buffering is disabled
+ // below on most other OSs, so do the same thing on Windows and avoid that
+ // complexity.
+ if (IsWindowsConsole)
+ return 0;
+ return raw_ostream::preferred_buffer_size();
+#elif !defined(__minix)
+ // Minix has no st_blksize.
assert(FD >= 0 && "File not yet open!");
struct stat statbuf;
if (fstat(FD, &statbuf) != 0)
OpenPOWER on IntegriCloud