diff options
Diffstat (limited to 'clang/lib')
| -rw-r--r-- | clang/lib/Analysis/FormatString.cpp | 23 | ||||
| -rw-r--r-- | clang/lib/Analysis/FormatStringParsing.h | 8 | ||||
| -rw-r--r-- | clang/lib/Analysis/PrintfFormatString.cpp | 7 | ||||
| -rw-r--r-- | clang/lib/Analysis/ScanfFormatString.cpp | 11 | ||||
| -rw-r--r-- | clang/lib/Sema/SemaChecking.cpp | 43 |
5 files changed, 81 insertions, 11 deletions
diff --git a/clang/lib/Analysis/FormatString.cpp b/clang/lib/Analysis/FormatString.cpp index 1c42ec0e87c..badc71021a1 100644 --- a/clang/lib/Analysis/FormatString.cpp +++ b/clang/lib/Analysis/FormatString.cpp @@ -15,6 +15,7 @@ #include "FormatStringParsing.h" #include "clang/Basic/LangOptions.h" #include "clang/Basic/TargetInfo.h" +#include "llvm/Support/ConvertUTF.h" using clang::analyze_format_string::ArgType; using clang::analyze_format_string::FormatStringHandler; @@ -260,6 +261,28 @@ clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS, return true; } +bool clang::analyze_format_string::ParseUTF8InvalidSpecifier( + const char *SpecifierBegin, const char *FmtStrEnd, unsigned &Len) { + if (SpecifierBegin + 1 >= FmtStrEnd) + return false; + + const UTF8 *SB = reinterpret_cast<const UTF8 *>(SpecifierBegin + 1); + const UTF8 *SE = reinterpret_cast<const UTF8 *>(FmtStrEnd); + const char FirstByte = *SB; + + // If the invalid specifier is a multibyte UTF-8 string, return the + // total length accordingly so that the conversion specifier can be + // properly updated to reflect a complete UTF-8 specifier. + unsigned NumBytes = getNumBytesForUTF8(FirstByte); + if (NumBytes == 1) + return false; + if (SB + NumBytes > SE) + return false; + + Len = NumBytes + 1; + return true; +} + //===----------------------------------------------------------------------===// // Methods on ArgType. //===----------------------------------------------------------------------===// diff --git a/clang/lib/Analysis/FormatStringParsing.h b/clang/lib/Analysis/FormatStringParsing.h index e1652964b8c..8463fcec5bf 100644 --- a/clang/lib/Analysis/FormatStringParsing.h +++ b/clang/lib/Analysis/FormatStringParsing.h @@ -46,7 +46,13 @@ bool ParseArgPosition(FormatStringHandler &H, /// FormatSpecifier& argument, and false otherwise. bool ParseLengthModifier(FormatSpecifier &FS, const char *&Beg, const char *E, const LangOptions &LO, bool IsScanf = false); - + +/// Returns true if the invalid specifier in \p SpecifierBegin is a UTF-8 +/// string; check that it won't go further than \p FmtStrEnd and write +/// up the total size in \p Len. +bool ParseUTF8InvalidSpecifier(const char *SpecifierBegin, + const char *FmtStrEnd, unsigned &Len); + template <typename T> class SpecifierResult { T FS; const char *Start; diff --git a/clang/lib/Analysis/PrintfFormatString.cpp b/clang/lib/Analysis/PrintfFormatString.cpp index f0976bce972..fb5df61c5ed 100644 --- a/clang/lib/Analysis/PrintfFormatString.cpp +++ b/clang/lib/Analysis/PrintfFormatString.cpp @@ -312,8 +312,13 @@ static PrintfSpecifierResult ParsePrintfSpecifier(FormatStringHandler &H, argIndex++; if (k == ConversionSpecifier::InvalidSpecifier) { + unsigned Len = I - Start; + if (ParseUTF8InvalidSpecifier(Start, E, Len)) { + CS.setEndScanList(Start + Len); + FS.setConversionSpecifier(CS); + } // Assume the conversion takes one argument. - return !H.HandleInvalidPrintfConversionSpecifier(FS, Start, I - Start); + return !H.HandleInvalidPrintfConversionSpecifier(FS, Start, Len); } return PrintfSpecifierResult(Start, FS); } diff --git a/clang/lib/Analysis/ScanfFormatString.cpp b/clang/lib/Analysis/ScanfFormatString.cpp index d484d8e828c..82b038864c2 100644 --- a/clang/lib/Analysis/ScanfFormatString.cpp +++ b/clang/lib/Analysis/ScanfFormatString.cpp @@ -79,7 +79,7 @@ static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H, unsigned &argIndex, const LangOptions &LO, const TargetInfo &Target) { - + using namespace clang::analyze_format_string; using namespace clang::analyze_scanf; const char *I = Beg; const char *Start = nullptr; @@ -210,10 +210,15 @@ static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H, // FIXME: '%' and '*' doesn't make sense. Issue a warning. // FIXME: 'ConsumedSoFar' and '*' doesn't make sense. - + if (k == ScanfConversionSpecifier::InvalidSpecifier) { + unsigned Len = I - Beg; + if (ParseUTF8InvalidSpecifier(Beg, E, Len)) { + CS.setEndScanList(Beg + Len); + FS.setConversionSpecifier(CS); + } // Assume the conversion takes one argument. - return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg); + return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, Len); } return ScanfSpecifierResult(Start, FS); } diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index cc261e05963..062041e3771 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -36,6 +36,8 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallString.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/Locale.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/raw_ostream.h" #include <limits> @@ -3976,12 +3978,41 @@ CheckFormatHandler::HandleInvalidConversionSpecifier(unsigned argIndex, // gibberish when trying to match arguments. keepGoing = false; } - - EmitFormatDiagnostic(S.PDiag(diag::warn_format_invalid_conversion) - << StringRef(csStart, csLen), - Loc, /*IsStringLocation*/true, - getSpecifierRange(startSpec, specifierLen)); - + + StringRef Specifier(csStart, csLen); + + // If the specifier in non-printable, it could be the first byte of a UTF-8 + // sequence. In that case, print the UTF-8 code point. If not, print the byte + // hex value. + std::string CodePointStr; + if (!llvm::sys::locale::isPrint(*csStart)) { + UTF32 CodePoint; + const UTF8 **B = reinterpret_cast<const UTF8 **>(&csStart); + const UTF8 *E = + reinterpret_cast<const UTF8 *>(csStart + csLen); + ConversionResult Result = + llvm::convertUTF8Sequence(B, E, &CodePoint, strictConversion); + + if (Result != conversionOK) { + unsigned char FirstChar = *csStart; + CodePoint = (UTF32)FirstChar; + } + + llvm::raw_string_ostream OS(CodePointStr); + if (CodePoint < 256) + OS << "\\x" << llvm::format("%02x", CodePoint); + else if (CodePoint <= 0xFFFF) + OS << "\\u" << llvm::format("%04x", CodePoint); + else + OS << "\\U" << llvm::format("%08x", CodePoint); + OS.flush(); + Specifier = CodePointStr; + } + + EmitFormatDiagnostic( + S.PDiag(diag::warn_format_invalid_conversion) << Specifier, Loc, + /*IsStringLocation*/ true, getSpecifierRange(startSpec, specifierLen)); + return keepGoing; } |

