summaryrefslogtreecommitdiffstats
path: root/clang/lib
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib')
-rw-r--r--clang/lib/Analysis/FormatString.cpp23
-rw-r--r--clang/lib/Analysis/FormatStringParsing.h8
-rw-r--r--clang/lib/Analysis/PrintfFormatString.cpp7
-rw-r--r--clang/lib/Analysis/ScanfFormatString.cpp11
-rw-r--r--clang/lib/Sema/SemaChecking.cpp43
5 files changed, 81 insertions, 11 deletions
diff --git a/clang/lib/Analysis/FormatString.cpp b/clang/lib/Analysis/FormatString.cpp
index 1c42ec0e87c..badc71021a1 100644
--- a/clang/lib/Analysis/FormatString.cpp
+++ b/clang/lib/Analysis/FormatString.cpp
@@ -15,6 +15,7 @@
#include "FormatStringParsing.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/TargetInfo.h"
+#include "llvm/Support/ConvertUTF.h"
using clang::analyze_format_string::ArgType;
using clang::analyze_format_string::FormatStringHandler;
@@ -260,6 +261,28 @@ clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS,
return true;
}
+bool clang::analyze_format_string::ParseUTF8InvalidSpecifier(
+ const char *SpecifierBegin, const char *FmtStrEnd, unsigned &Len) {
+ if (SpecifierBegin + 1 >= FmtStrEnd)
+ return false;
+
+ const UTF8 *SB = reinterpret_cast<const UTF8 *>(SpecifierBegin + 1);
+ const UTF8 *SE = reinterpret_cast<const UTF8 *>(FmtStrEnd);
+ const char FirstByte = *SB;
+
+ // If the invalid specifier is a multibyte UTF-8 string, return the
+ // total length accordingly so that the conversion specifier can be
+ // properly updated to reflect a complete UTF-8 specifier.
+ unsigned NumBytes = getNumBytesForUTF8(FirstByte);
+ if (NumBytes == 1)
+ return false;
+ if (SB + NumBytes > SE)
+ return false;
+
+ Len = NumBytes + 1;
+ return true;
+}
+
//===----------------------------------------------------------------------===//
// Methods on ArgType.
//===----------------------------------------------------------------------===//
diff --git a/clang/lib/Analysis/FormatStringParsing.h b/clang/lib/Analysis/FormatStringParsing.h
index e1652964b8c..8463fcec5bf 100644
--- a/clang/lib/Analysis/FormatStringParsing.h
+++ b/clang/lib/Analysis/FormatStringParsing.h
@@ -46,7 +46,13 @@ bool ParseArgPosition(FormatStringHandler &H,
/// FormatSpecifier& argument, and false otherwise.
bool ParseLengthModifier(FormatSpecifier &FS, const char *&Beg, const char *E,
const LangOptions &LO, bool IsScanf = false);
-
+
+/// Returns true if the invalid specifier in \p SpecifierBegin is a UTF-8
+/// string; check that it won't go further than \p FmtStrEnd and write
+/// up the total size in \p Len.
+bool ParseUTF8InvalidSpecifier(const char *SpecifierBegin,
+ const char *FmtStrEnd, unsigned &Len);
+
template <typename T> class SpecifierResult {
T FS;
const char *Start;
diff --git a/clang/lib/Analysis/PrintfFormatString.cpp b/clang/lib/Analysis/PrintfFormatString.cpp
index f0976bce972..fb5df61c5ed 100644
--- a/clang/lib/Analysis/PrintfFormatString.cpp
+++ b/clang/lib/Analysis/PrintfFormatString.cpp
@@ -312,8 +312,13 @@ static PrintfSpecifierResult ParsePrintfSpecifier(FormatStringHandler &H,
argIndex++;
if (k == ConversionSpecifier::InvalidSpecifier) {
+ unsigned Len = I - Start;
+ if (ParseUTF8InvalidSpecifier(Start, E, Len)) {
+ CS.setEndScanList(Start + Len);
+ FS.setConversionSpecifier(CS);
+ }
// Assume the conversion takes one argument.
- return !H.HandleInvalidPrintfConversionSpecifier(FS, Start, I - Start);
+ return !H.HandleInvalidPrintfConversionSpecifier(FS, Start, Len);
}
return PrintfSpecifierResult(Start, FS);
}
diff --git a/clang/lib/Analysis/ScanfFormatString.cpp b/clang/lib/Analysis/ScanfFormatString.cpp
index d484d8e828c..82b038864c2 100644
--- a/clang/lib/Analysis/ScanfFormatString.cpp
+++ b/clang/lib/Analysis/ScanfFormatString.cpp
@@ -79,7 +79,7 @@ static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
unsigned &argIndex,
const LangOptions &LO,
const TargetInfo &Target) {
-
+ using namespace clang::analyze_format_string;
using namespace clang::analyze_scanf;
const char *I = Beg;
const char *Start = nullptr;
@@ -210,10 +210,15 @@ static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
// FIXME: '%' and '*' doesn't make sense. Issue a warning.
// FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
-
+
if (k == ScanfConversionSpecifier::InvalidSpecifier) {
+ unsigned Len = I - Beg;
+ if (ParseUTF8InvalidSpecifier(Beg, E, Len)) {
+ CS.setEndScanList(Beg + Len);
+ FS.setConversionSpecifier(CS);
+ }
// Assume the conversion takes one argument.
- return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg);
+ return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, Len);
}
return ScanfSpecifierResult(Start, FS);
}
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index cc261e05963..062041e3771 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -36,6 +36,8 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/Locale.h"
#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/raw_ostream.h"
#include <limits>
@@ -3976,12 +3978,41 @@ CheckFormatHandler::HandleInvalidConversionSpecifier(unsigned argIndex,
// gibberish when trying to match arguments.
keepGoing = false;
}
-
- EmitFormatDiagnostic(S.PDiag(diag::warn_format_invalid_conversion)
- << StringRef(csStart, csLen),
- Loc, /*IsStringLocation*/true,
- getSpecifierRange(startSpec, specifierLen));
-
+
+ StringRef Specifier(csStart, csLen);
+
+ // If the specifier in non-printable, it could be the first byte of a UTF-8
+ // sequence. In that case, print the UTF-8 code point. If not, print the byte
+ // hex value.
+ std::string CodePointStr;
+ if (!llvm::sys::locale::isPrint(*csStart)) {
+ UTF32 CodePoint;
+ const UTF8 **B = reinterpret_cast<const UTF8 **>(&csStart);
+ const UTF8 *E =
+ reinterpret_cast<const UTF8 *>(csStart + csLen);
+ ConversionResult Result =
+ llvm::convertUTF8Sequence(B, E, &CodePoint, strictConversion);
+
+ if (Result != conversionOK) {
+ unsigned char FirstChar = *csStart;
+ CodePoint = (UTF32)FirstChar;
+ }
+
+ llvm::raw_string_ostream OS(CodePointStr);
+ if (CodePoint < 256)
+ OS << "\\x" << llvm::format("%02x", CodePoint);
+ else if (CodePoint <= 0xFFFF)
+ OS << "\\u" << llvm::format("%04x", CodePoint);
+ else
+ OS << "\\U" << llvm::format("%08x", CodePoint);
+ OS.flush();
+ Specifier = CodePointStr;
+ }
+
+ EmitFormatDiagnostic(
+ S.PDiag(diag::warn_format_invalid_conversion) << Specifier, Loc,
+ /*IsStringLocation*/ true, getSpecifierRange(startSpec, specifierLen));
+
return keepGoing;
}
OpenPOWER on IntegriCloud