summaryrefslogtreecommitdiffstats
path: root/clang/lib/Analysis
diff options
context:
space:
mode:
authorBruno Cardoso Lopes <bruno.cardoso@gmail.com>2016-03-29 17:35:02 +0000
committerBruno Cardoso Lopes <bruno.cardoso@gmail.com>2016-03-29 17:35:02 +0000
commit0c18d03d9157090cb379219a8b91f3104869f358 (patch)
tree4691c44c47d7b0f79928619e73b170efb9047c65 /clang/lib/Analysis
parentac400900da8e0f756a73739a85151b01e42500ea (diff)
downloadbcm5719-llvm-0c18d03d9157090cb379219a8b91f3104869f358.tar.gz
bcm5719-llvm-0c18d03d9157090cb379219a8b91f3104869f358.zip
[Sema] Handle UTF-8 invalid format string specifiers
Improve invalid format string specifier handling by printing out invalid specifiers characters with \x, \u and \U. Previously clang would print gargabe whenever the character is unprintable. Example, before: NSLog(@"%\u25B9"); => warning: invalid conversion specifier ' [-Wformat-invalid-specifier] after: NSLog(@"%\u25B9"); => warning: invalid conversion specifier '\u25b9' [-Wformat-invalid-specifier] Differential Revision: http://reviews.llvm.org/D18296 rdar://problem/24672159 llvm-svn: 264752
Diffstat (limited to 'clang/lib/Analysis')
-rw-r--r--clang/lib/Analysis/FormatString.cpp23
-rw-r--r--clang/lib/Analysis/FormatStringParsing.h8
-rw-r--r--clang/lib/Analysis/PrintfFormatString.cpp7
-rw-r--r--clang/lib/Analysis/ScanfFormatString.cpp11
4 files changed, 44 insertions, 5 deletions
diff --git a/clang/lib/Analysis/FormatString.cpp b/clang/lib/Analysis/FormatString.cpp
index 1c42ec0e87c..badc71021a1 100644
--- a/clang/lib/Analysis/FormatString.cpp
+++ b/clang/lib/Analysis/FormatString.cpp
@@ -15,6 +15,7 @@
#include "FormatStringParsing.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/TargetInfo.h"
+#include "llvm/Support/ConvertUTF.h"
using clang::analyze_format_string::ArgType;
using clang::analyze_format_string::FormatStringHandler;
@@ -260,6 +261,28 @@ clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS,
return true;
}
+bool clang::analyze_format_string::ParseUTF8InvalidSpecifier(
+ const char *SpecifierBegin, const char *FmtStrEnd, unsigned &Len) {
+ if (SpecifierBegin + 1 >= FmtStrEnd)
+ return false;
+
+ const UTF8 *SB = reinterpret_cast<const UTF8 *>(SpecifierBegin + 1);
+ const UTF8 *SE = reinterpret_cast<const UTF8 *>(FmtStrEnd);
+ const char FirstByte = *SB;
+
+ // If the invalid specifier is a multibyte UTF-8 string, return the
+ // total length accordingly so that the conversion specifier can be
+ // properly updated to reflect a complete UTF-8 specifier.
+ unsigned NumBytes = getNumBytesForUTF8(FirstByte);
+ if (NumBytes == 1)
+ return false;
+ if (SB + NumBytes > SE)
+ return false;
+
+ Len = NumBytes + 1;
+ return true;
+}
+
//===----------------------------------------------------------------------===//
// Methods on ArgType.
//===----------------------------------------------------------------------===//
diff --git a/clang/lib/Analysis/FormatStringParsing.h b/clang/lib/Analysis/FormatStringParsing.h
index e1652964b8c..8463fcec5bf 100644
--- a/clang/lib/Analysis/FormatStringParsing.h
+++ b/clang/lib/Analysis/FormatStringParsing.h
@@ -46,7 +46,13 @@ bool ParseArgPosition(FormatStringHandler &H,
/// FormatSpecifier& argument, and false otherwise.
bool ParseLengthModifier(FormatSpecifier &FS, const char *&Beg, const char *E,
const LangOptions &LO, bool IsScanf = false);
-
+
+/// Returns true if the invalid specifier in \p SpecifierBegin is a UTF-8
+/// string; check that it won't go further than \p FmtStrEnd and write
+/// up the total size in \p Len.
+bool ParseUTF8InvalidSpecifier(const char *SpecifierBegin,
+ const char *FmtStrEnd, unsigned &Len);
+
template <typename T> class SpecifierResult {
T FS;
const char *Start;
diff --git a/clang/lib/Analysis/PrintfFormatString.cpp b/clang/lib/Analysis/PrintfFormatString.cpp
index f0976bce972..fb5df61c5ed 100644
--- a/clang/lib/Analysis/PrintfFormatString.cpp
+++ b/clang/lib/Analysis/PrintfFormatString.cpp
@@ -312,8 +312,13 @@ static PrintfSpecifierResult ParsePrintfSpecifier(FormatStringHandler &H,
argIndex++;
if (k == ConversionSpecifier::InvalidSpecifier) {
+ unsigned Len = I - Start;
+ if (ParseUTF8InvalidSpecifier(Start, E, Len)) {
+ CS.setEndScanList(Start + Len);
+ FS.setConversionSpecifier(CS);
+ }
// Assume the conversion takes one argument.
- return !H.HandleInvalidPrintfConversionSpecifier(FS, Start, I - Start);
+ return !H.HandleInvalidPrintfConversionSpecifier(FS, Start, Len);
}
return PrintfSpecifierResult(Start, FS);
}
diff --git a/clang/lib/Analysis/ScanfFormatString.cpp b/clang/lib/Analysis/ScanfFormatString.cpp
index d484d8e828c..82b038864c2 100644
--- a/clang/lib/Analysis/ScanfFormatString.cpp
+++ b/clang/lib/Analysis/ScanfFormatString.cpp
@@ -79,7 +79,7 @@ static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
unsigned &argIndex,
const LangOptions &LO,
const TargetInfo &Target) {
-
+ using namespace clang::analyze_format_string;
using namespace clang::analyze_scanf;
const char *I = Beg;
const char *Start = nullptr;
@@ -210,10 +210,15 @@ static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
// FIXME: '%' and '*' doesn't make sense. Issue a warning.
// FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
-
+
if (k == ScanfConversionSpecifier::InvalidSpecifier) {
+ unsigned Len = I - Beg;
+ if (ParseUTF8InvalidSpecifier(Beg, E, Len)) {
+ CS.setEndScanList(Beg + Len);
+ FS.setConversionSpecifier(CS);
+ }
// Assume the conversion takes one argument.
- return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg);
+ return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, Len);
}
return ScanfSpecifierResult(Start, FS);
}
OpenPOWER on IntegriCloud