diff options
author | Alexander Kornienko <alexfh@google.com> | 2013-11-26 10:38:53 +0000 |
---|---|---|
committer | Alexander Kornienko <alexfh@google.com> | 2013-11-26 10:38:53 +0000 |
commit | 71d95d6e51641e8475cf4d1664582c1ca5537924 (patch) | |
tree | 0d4146d995b51c17822aedd0d6ed4fa8cbe11aad /clang | |
parent | b697b538dc07c72c729d1afd1603aa3708475b3b (diff) | |
download | bcm5719-llvm-71d95d6e51641e8475cf4d1664582c1ca5537924.tar.gz bcm5719-llvm-71d95d6e51641e8475cf4d1664582c1ca5537924.zip |
Fix crash in getStringSplit.
Summary:
getStringSplit used to crash, when trying to split a long string
literal containing both printable and unprintable multi-byte UTF-8 characters.
Reviewers: djasper, klimek
Reviewed By: djasper
CC: cfe-commits, klimek
Differential Revision: http://llvm-reviews.chandlerc.com/D2268
llvm-svn: 195728
Diffstat (limited to 'clang')
-rw-r--r-- | clang/lib/Format/BreakableToken.cpp | 6 | ||||
-rw-r--r-- | clang/lib/Format/Encoding.h | 8 | ||||
-rw-r--r-- | clang/unittests/Format/FormatTest.cpp | 12 |
3 files changed, 19 insertions, 7 deletions
diff --git a/clang/lib/Format/BreakableToken.cpp b/clang/lib/Format/BreakableToken.cpp index d720ce990b5..a08102a3b78 100644 --- a/clang/lib/Format/BreakableToken.cpp +++ b/clang/lib/Format/BreakableToken.cpp @@ -92,9 +92,7 @@ static BreakableToken::Split getStringSplit(StringRef Text, return BreakableToken::Split(StringRef::npos, 0); if (ColumnLimit <= UsedColumns) return BreakableToken::Split(StringRef::npos, 0); - unsigned MaxSplit = std::min<unsigned>( - ColumnLimit - UsedColumns, - encoding::columnWidthWithTabs(Text, UsedColumns, TabWidth, Encoding) - 1); + unsigned MaxSplit = ColumnLimit - UsedColumns; StringRef::size_type SpaceOffset = 0; StringRef::size_type SlashOffset = 0; StringRef::size_type WordStartOffset = 0; @@ -110,7 +108,7 @@ static BreakableToken::Split getStringSplit(StringRef Text, Text.substr(0, Advance), UsedColumns + Chars, TabWidth, Encoding); } - if (Chars > MaxSplit) + if (Chars > MaxSplit || Text.size() == Advance) break; if (IsBlank(Text[0])) diff --git a/clang/lib/Format/Encoding.h b/clang/lib/Format/Encoding.h index 356334d5376..dba5174b97b 100644 --- a/clang/lib/Format/Encoding.h +++ b/clang/lib/Format/Encoding.h @@ -64,6 +64,10 @@ inline unsigned getCodePointCount(StringRef Text, Encoding Encoding) { inline unsigned columnWidth(StringRef Text, Encoding Encoding) { if (Encoding == Encoding_UTF8) { int ContentWidth = llvm::sys::unicode::columnWidthUTF8(Text); + // FIXME: Figure out the correct way to handle this in the presence of both + // printable and unprintable multi-byte UTF-8 characters. Falling back to + // returning the number of bytes may cause problems, as columnWidth suddenly + // becomes non-additive. if (ContentWidth >= 0) return ContentWidth; } @@ -81,9 +85,7 @@ inline unsigned columnWidthWithTabs(StringRef Text, unsigned StartColumn, StringRef::size_type TabPos = Tail.find('\t'); if (TabPos == StringRef::npos) return TotalWidth + columnWidth(Tail, Encoding); - int Width = columnWidth(Tail.substr(0, TabPos), Encoding); - assert(Width >= 0); - TotalWidth += Width; + TotalWidth += columnWidth(Tail.substr(0, TabPos), Encoding); TotalWidth += TabWidth - (TotalWidth + StartColumn) % TabWidth; Tail = Tail.substr(TabPos + 1); } diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index fc0e935037b..a7dce3b86f5 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -6991,6 +6991,16 @@ TEST_F(FormatTest, CountsUTF8CharactersProperly) { } TEST_F(FormatTest, SplitsUTF8Strings) { + // Non-printable characters' width is currently considered to be the length in + // bytes in UTF8. The characters can be displayed in very different manner + // (zero-width, single width with a substitution glyph, expanded to their code + // (e.g. "<8d>"), so there's no single correct way to handle them. + EXPECT_EQ("\"aaaaÄ\"\n" + "\"\";", + format("\"aaaaÄ\";", getLLVMStyleWithColumns(10))); + EXPECT_EQ("\"aaaaaaaÄ\"\n" + "\"\";", + format("\"aaaaaaaÄ\";", getLLVMStyleWithColumns(10))); EXPECT_EQ( "\"Однажды, в \"\n" "\"студёную \"\n" @@ -7024,6 +7034,8 @@ TEST_F(FormatTest, HandlesDoubleWidthCharsInMultiLineStrings) { } TEST_F(FormatTest, SplitsUTF8LineComments) { + EXPECT_EQ("// aaaaÄ", + format("// aaaaÄ", getLLVMStyleWithColumns(10))); EXPECT_EQ("// Я из лесу\n" "// вышел; был\n" "// сильный\n" |