diff options
| author | Bruno Cardoso Lopes <bruno.cardoso@gmail.com> | 2016-03-29 17:35:02 +0000 |
|---|---|---|
| committer | Bruno Cardoso Lopes <bruno.cardoso@gmail.com> | 2016-03-29 17:35:02 +0000 |
| commit | 0c18d03d9157090cb379219a8b91f3104869f358 (patch) | |
| tree | 4691c44c47d7b0f79928619e73b170efb9047c65 /clang/lib/Analysis | |
| parent | ac400900da8e0f756a73739a85151b01e42500ea (diff) | |
| download | bcm5719-llvm-0c18d03d9157090cb379219a8b91f3104869f358.tar.gz bcm5719-llvm-0c18d03d9157090cb379219a8b91f3104869f358.zip | |
[Sema] Handle UTF-8 invalid format string specifiers
Improve invalid format string specifier handling by printing out
invalid specifiers characters with \x, \u and \U. Previously clang
would print gargabe whenever the character is unprintable.
Example, before:
NSLog(@"%\u25B9"); => warning: invalid conversion specifier ' [-Wformat-invalid-specifier]
after:
NSLog(@"%\u25B9"); => warning: invalid conversion specifier '\u25b9' [-Wformat-invalid-specifier]
Differential Revision: http://reviews.llvm.org/D18296
rdar://problem/24672159
llvm-svn: 264752
Diffstat (limited to 'clang/lib/Analysis')
| -rw-r--r-- | clang/lib/Analysis/FormatString.cpp | 23 | ||||
| -rw-r--r-- | clang/lib/Analysis/FormatStringParsing.h | 8 | ||||
| -rw-r--r-- | clang/lib/Analysis/PrintfFormatString.cpp | 7 | ||||
| -rw-r--r-- | clang/lib/Analysis/ScanfFormatString.cpp | 11 |
4 files changed, 44 insertions, 5 deletions
diff --git a/clang/lib/Analysis/FormatString.cpp b/clang/lib/Analysis/FormatString.cpp index 1c42ec0e87c..badc71021a1 100644 --- a/clang/lib/Analysis/FormatString.cpp +++ b/clang/lib/Analysis/FormatString.cpp @@ -15,6 +15,7 @@ #include "FormatStringParsing.h" #include "clang/Basic/LangOptions.h" #include "clang/Basic/TargetInfo.h" +#include "llvm/Support/ConvertUTF.h" using clang::analyze_format_string::ArgType; using clang::analyze_format_string::FormatStringHandler; @@ -260,6 +261,28 @@ clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS, return true; } +bool clang::analyze_format_string::ParseUTF8InvalidSpecifier( + const char *SpecifierBegin, const char *FmtStrEnd, unsigned &Len) { + if (SpecifierBegin + 1 >= FmtStrEnd) + return false; + + const UTF8 *SB = reinterpret_cast<const UTF8 *>(SpecifierBegin + 1); + const UTF8 *SE = reinterpret_cast<const UTF8 *>(FmtStrEnd); + const char FirstByte = *SB; + + // If the invalid specifier is a multibyte UTF-8 string, return the + // total length accordingly so that the conversion specifier can be + // properly updated to reflect a complete UTF-8 specifier. + unsigned NumBytes = getNumBytesForUTF8(FirstByte); + if (NumBytes == 1) + return false; + if (SB + NumBytes > SE) + return false; + + Len = NumBytes + 1; + return true; +} + //===----------------------------------------------------------------------===// // Methods on ArgType. //===----------------------------------------------------------------------===// diff --git a/clang/lib/Analysis/FormatStringParsing.h b/clang/lib/Analysis/FormatStringParsing.h index e1652964b8c..8463fcec5bf 100644 --- a/clang/lib/Analysis/FormatStringParsing.h +++ b/clang/lib/Analysis/FormatStringParsing.h @@ -46,7 +46,13 @@ bool ParseArgPosition(FormatStringHandler &H, /// FormatSpecifier& argument, and false otherwise. bool ParseLengthModifier(FormatSpecifier &FS, const char *&Beg, const char *E, const LangOptions &LO, bool IsScanf = false); - + +/// Returns true if the invalid specifier in \p SpecifierBegin is a UTF-8 +/// string; check that it won't go further than \p FmtStrEnd and write +/// up the total size in \p Len. +bool ParseUTF8InvalidSpecifier(const char *SpecifierBegin, + const char *FmtStrEnd, unsigned &Len); + template <typename T> class SpecifierResult { T FS; const char *Start; diff --git a/clang/lib/Analysis/PrintfFormatString.cpp b/clang/lib/Analysis/PrintfFormatString.cpp index f0976bce972..fb5df61c5ed 100644 --- a/clang/lib/Analysis/PrintfFormatString.cpp +++ b/clang/lib/Analysis/PrintfFormatString.cpp @@ -312,8 +312,13 @@ static PrintfSpecifierResult ParsePrintfSpecifier(FormatStringHandler &H, argIndex++; if (k == ConversionSpecifier::InvalidSpecifier) { + unsigned Len = I - Start; + if (ParseUTF8InvalidSpecifier(Start, E, Len)) { + CS.setEndScanList(Start + Len); + FS.setConversionSpecifier(CS); + } // Assume the conversion takes one argument. - return !H.HandleInvalidPrintfConversionSpecifier(FS, Start, I - Start); + return !H.HandleInvalidPrintfConversionSpecifier(FS, Start, Len); } return PrintfSpecifierResult(Start, FS); } diff --git a/clang/lib/Analysis/ScanfFormatString.cpp b/clang/lib/Analysis/ScanfFormatString.cpp index d484d8e828c..82b038864c2 100644 --- a/clang/lib/Analysis/ScanfFormatString.cpp +++ b/clang/lib/Analysis/ScanfFormatString.cpp @@ -79,7 +79,7 @@ static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H, unsigned &argIndex, const LangOptions &LO, const TargetInfo &Target) { - + using namespace clang::analyze_format_string; using namespace clang::analyze_scanf; const char *I = Beg; const char *Start = nullptr; @@ -210,10 +210,15 @@ static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H, // FIXME: '%' and '*' doesn't make sense. Issue a warning. // FIXME: 'ConsumedSoFar' and '*' doesn't make sense. - + if (k == ScanfConversionSpecifier::InvalidSpecifier) { + unsigned Len = I - Beg; + if (ParseUTF8InvalidSpecifier(Beg, E, Len)) { + CS.setEndScanList(Beg + Len); + FS.setConversionSpecifier(CS); + } // Assume the conversion takes one argument. - return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg); + return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, Len); } return ScanfSpecifierResult(Start, FS); } |

