diff options
| author | Etienne Bergeron <etienneb@google.com> | 2016-04-07 16:16:36 +0000 |
|---|---|---|
| committer | Etienne Bergeron <etienneb@google.com> | 2016-04-07 16:16:36 +0000 |
| commit | a5fd19ba1e6a50c49ea8be34caa6b398f7f08546 (patch) | |
| tree | a10daf7ada942faf021f0dcfa378412b1f0c59b8 /clang-tools-extra | |
| parent | 3802c4af596d1e708a999ddb3e965e9b60820ad6 (diff) | |
| download | bcm5719-llvm-a5fd19ba1e6a50c49ea8be34caa6b398f7f08546.tar.gz bcm5719-llvm-a5fd19ba1e6a50c49ea8be34caa6b398f7f08546.zip | |
[clang-tidy] add new checker for string literal with NUL character.
Summary:
This patch adds the support for detecting suspicious string
literals and their //incorrect// usage.
The following example shows a incorrect character escaping leading
to an embedded NUL character.
```
std::string str = "\0x42"; // Should be "\x42".
```
The patch also add detection of truncated literal when a literal
is passed to a string constructor.
Reviewers: hokein, alexfh
Subscribers: LegalizeAdulthood, bcraig, Eugene.Zelenko, bkramer, cfe-commits
Differential Revision: http://reviews.llvm.org/D18783
llvm-svn: 265691
Diffstat (limited to 'clang-tools-extra')
8 files changed, 252 insertions, 0 deletions
diff --git a/clang-tools-extra/clang-tidy/misc/CMakeLists.txt b/clang-tools-extra/clang-tidy/misc/CMakeLists.txt index fdfa26e6d05..b5a5d53d4cf 100644 --- a/clang-tools-extra/clang-tidy/misc/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/misc/CMakeLists.txt @@ -23,6 +23,7 @@ add_clang_library(clangTidyMiscModule SizeofContainerCheck.cpp StaticAssertCheck.cpp StringIntegerAssignmentCheck.cpp + StringLiteralWithEmbeddedNulCheck.cpp SuspiciousMissingCommaCheck.cpp SuspiciousSemicolonCheck.cpp SwappedArgumentsCheck.cpp diff --git a/clang-tools-extra/clang-tidy/misc/MiscTidyModule.cpp b/clang-tools-extra/clang-tidy/misc/MiscTidyModule.cpp index 7dc8a217389..efbcde7245a 100644 --- a/clang-tools-extra/clang-tidy/misc/MiscTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/misc/MiscTidyModule.cpp @@ -31,6 +31,7 @@ #include "SizeofContainerCheck.h" #include "StaticAssertCheck.h" #include "StringIntegerAssignmentCheck.h" +#include "StringLiteralWithEmbeddedNulCheck.h" #include "SuspiciousMissingCommaCheck.h" #include "SuspiciousSemicolonCheck.h" #include "SwappedArgumentsCheck.h" @@ -89,6 +90,8 @@ public: "misc-static-assert"); CheckFactories.registerCheck<StringIntegerAssignmentCheck>( "misc-string-integer-assignment"); + CheckFactories.registerCheck<StringLiteralWithEmbeddedNulCheck>( + "misc-string-literal-with-embedded-nul"); CheckFactories.registerCheck<SuspiciousMissingCommaCheck>( "misc-suspicious-missing-comma"); CheckFactories.registerCheck<SuspiciousSemicolonCheck>( diff --git a/clang-tools-extra/clang-tidy/misc/StringLiteralWithEmbeddedNulCheck.cpp b/clang-tools-extra/clang-tidy/misc/StringLiteralWithEmbeddedNulCheck.cpp new file mode 100644 index 00000000000..335927b2ec7 --- /dev/null +++ b/clang-tools-extra/clang-tidy/misc/StringLiteralWithEmbeddedNulCheck.cpp @@ -0,0 +1,83 @@ +//===--- StringLiteralWithEmbeddedNulCheck.cpp - clang-tidy----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "StringLiteralWithEmbeddedNulCheck.h" +#include "clang/AST/ASTContext.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" + +using namespace clang::ast_matchers; + +namespace clang { +namespace tidy { +namespace misc { + +AST_MATCHER(StringLiteral, containsNul) { + for (size_t i = 0; i < Node.getLength(); ++i) + if (Node.getCodeUnit(i) == '\0') + return true; + return false; +} + +void StringLiteralWithEmbeddedNulCheck::registerMatchers(MatchFinder *Finder) { + // Match a string that contains embedded NUL character. Extra-checks are + // applied in |check| to find incorectly escaped characters. + Finder->addMatcher(stringLiteral(containsNul()).bind("strlit"), this); + + // The remaining checks only apply to C++. + if (!getLangOpts().CPlusPlus) + return; + + const auto StrLitWithNul = + ignoringParenImpCasts(stringLiteral(containsNul()).bind("truncated")); + + // Match string constructor. + const auto StringConstructorExpr = expr(anyOf( + cxxConstructExpr(argumentCountIs(1), + hasDeclaration(cxxMethodDecl(hasName("basic_string")))), + // If present, the second argument is the alloc object which must not + // be present explicitly. + cxxConstructExpr(argumentCountIs(2), + hasDeclaration(cxxMethodDecl(hasName("basic_string"))), + hasArgument(1, cxxDefaultArgExpr())))); + + // Detect passing a suspicious string literal to a string constructor. + // example: std::string str = "abc\0def"; + Finder->addMatcher( + cxxConstructExpr(StringConstructorExpr, hasArgument(0, StrLitWithNul)), + this); + + // Detect passing a suspicious string literal through an overloaded operator. + Finder->addMatcher(cxxOperatorCallExpr(hasAnyArgument(StrLitWithNul)), this); +} + +void StringLiteralWithEmbeddedNulCheck::check( + const MatchFinder::MatchResult &Result) { + if (const auto *SL = Result.Nodes.getNodeAs<StringLiteral>("strlit")) { + for (size_t Offset = 0, Length = SL->getLength(); Offset < Length; + ++Offset) { + // Find a sequence of character like "\0x12". + if (Offset + 3 < Length && SL->getCodeUnit(Offset) == '\0' && + SL->getCodeUnit(Offset + 1) == 'x' && + isDigit(SL->getCodeUnit(Offset + 2)) && + isDigit(SL->getCodeUnit(Offset + 3))) { + diag(SL->getLocStart(), "suspicious embedded NUL character"); + return; + } + } + } + + if (const auto *SL = Result.Nodes.getNodeAs<StringLiteral>("truncated")) { + diag(SL->getLocStart(), + "truncated string literal with embedded NUL character"); + } +} + +} // namespace misc +} // namespace tidy +} // namespace clang diff --git a/clang-tools-extra/clang-tidy/misc/StringLiteralWithEmbeddedNulCheck.h b/clang-tools-extra/clang-tidy/misc/StringLiteralWithEmbeddedNulCheck.h new file mode 100644 index 00000000000..e4a87fc280b --- /dev/null +++ b/clang-tools-extra/clang-tidy/misc/StringLiteralWithEmbeddedNulCheck.h @@ -0,0 +1,35 @@ +//===--- StringLiteralWithEmbeddedNulCheck.h - clang-tidy--------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_STRING_LITERAL_WITH_EMBEDDED_NUL_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_STRING_LITERAL_WITH_EMBEDDED_NUL_H + +#include "../ClangTidy.h" + +namespace clang { +namespace tidy { +namespace misc { + +/// Find suspicious string literals with embedded NUL characters. +/// +/// For the user-facing documentation see: +/// http://clang.llvm.org/extra/clang-tidy/checks/misc-string-literal-with-embedded-nul.html +class StringLiteralWithEmbeddedNulCheck : public ClangTidyCheck { +public: + StringLiteralWithEmbeddedNulCheck(StringRef Name, ClangTidyContext *Context) + : ClangTidyCheck(Name, Context) {} + void registerMatchers(ast_matchers::MatchFinder *Finder) override; + void check(const ast_matchers::MatchFinder::MatchResult &Result) override; +}; + +} // namespace misc +} // namespace tidy +} // namespace clang + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_STRING_LITERAL_WITH_EMBEDDED_NUL_H diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 7b65a023cb6..20c156f4ed5 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -97,6 +97,12 @@ identified. The improvements since the 3.8 release include: Warns when there is a explicit redundant cast of a calculation result to a bigger type. +- New `misc-string-literal-with-embedded-nul + <http://clang.llvm.org/extra/clang-tidy/checks/misc-string-literal-with-embedded-nul.html>`_ check + + Warns about suspicious NUL character in string literals which may lead to + truncation or invalid character escaping. + - New `misc-suspicious-missing-comma <http://clang.llvm.org/extra/clang-tidy/checks/misc-suspicious-missing-comma.html>`_ check diff --git a/clang-tools-extra/docs/clang-tidy/checks/list.rst b/clang-tools-extra/docs/clang-tidy/checks/list.rst index 28d4915f816..25e8851ac39 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/list.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/list.rst @@ -66,6 +66,7 @@ Clang-Tidy Checks misc-sizeof-container misc-static-assert misc-string-integer-assignment + misc-string-literal-with-embedded-nul misc-suspicious-missing-comma misc-suspicious-semicolon misc-swapped-arguments diff --git a/clang-tools-extra/docs/clang-tidy/checks/misc-string-literal-with-embedded-nul.rst b/clang-tools-extra/docs/clang-tidy/checks/misc-string-literal-with-embedded-nul.rst new file mode 100644 index 00000000000..3661218acef --- /dev/null +++ b/clang-tools-extra/docs/clang-tidy/checks/misc-string-literal-with-embedded-nul.rst @@ -0,0 +1,38 @@ +.. title:: clang-tidy - misc-string-literal-with-embedded-nul + +misc-string-literal-with-embedded-nul +===================================== + +Finds occurences of string literal with embedded NUL character and validates +their usage. + + +Invalid escaping +^^^^^^^^^^^^^^^^ + +Special characters can be escaped within a string literal by using their +hexadecimal encoding like ``\x42``. A common mistake is to escape them +like this ``\0x42`` where the ``\0`` stands for the NUL character. + +.. code:: c++ + + const char* Example[] = "Invalid character: \0x12 should be \x12"; + const char* Bytes[] = "\x03\0x02\0x01\0x00\0xFF\0xFF\0xFF"; + + +Truncated literal +^^^^^^^^^^^^^^^^^ + +String-like classes can manipulate strings with embedded NUL as they are +keeping track of the bytes and the length. This is not the case for a +``char*`` (NUL-terminated) string. + +A common mistake is to pass a string-literal with embedded NUL to a string +constructor expecting a NUL-terminated string. The bytes after the first NUL +character are truncated. + +.. code:: c++ + + std::string str("abc\0def"); // "def" is truncated + str += "\0"; // This statement is doing nothing + if (str == "\0abc") return; // This expression is always true diff --git a/clang-tools-extra/test/clang-tidy/misc-string-literal-with-embedded-nul.cpp b/clang-tools-extra/test/clang-tidy/misc-string-literal-with-embedded-nul.cpp new file mode 100644 index 00000000000..2605dd4eccb --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/misc-string-literal-with-embedded-nul.cpp @@ -0,0 +1,85 @@ +// RUN: %check_clang_tidy %s misc-string-literal-with-embedded-nul %t + +namespace std { +template <typename T> +class allocator {}; +template <typename T> +class char_traits {}; +template <typename C, typename T, typename A> +struct basic_string { + typedef basic_string<C, T, A> _Type; + basic_string(); + basic_string(const C *p, const A &a = A()); + + _Type& operator+=(const C* s); + _Type& operator=(const C* s); +}; + +typedef basic_string<char, std::char_traits<char>, std::allocator<char>> string; +typedef basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t>> wstring; +} + +bool operator==(const std::string&, const char*); +bool operator==(const char*, const std::string&); + + +const char Valid[] = "This is valid \x12."; +const char Strange[] = "This is strange \0x12 and must be fixed"; +// CHECK-MESSAGES: :[[@LINE-1]]:24: warning: suspicious embedded NUL character [misc-string-literal-with-embedded-nul] + +const char textA[] = "\0x01\0x02\0x03\0x04"; +// CHECK-MESSAGES: :[[@LINE-1]]:22: warning: suspicious embedded NUL character +const wchar_t textW[] = L"\0x01\0x02\0x03\0x04"; +// CHECK-MESSAGES: :[[@LINE-1]]:25: warning: suspicious embedded NUL character + +const char A[] = "\0"; +const char B[] = "\0x"; +const char C[] = "\0x1"; +const char D[] = "\0x11"; +// CHECK-MESSAGES: :[[@LINE-1]]:18: warning: suspicious embedded NUL character + +const wchar_t E[] = L"\0"; +const wchar_t F[] = L"\0x"; +const wchar_t G[] = L"\0x1"; +const wchar_t H[] = L"\0x11"; +// CHECK-MESSAGES: :[[@LINE-1]]:21: warning: suspicious embedded NUL character + +const char I[] = "\000\000\000\000"; +const char J[] = "\0\0\0\0\0\0"; +const char K[] = ""; + +const char L[] = "\0x12" "\0x12" "\0x12" "\0x12"; +// CHECK-MESSAGES: :[[@LINE-1]]:18: warning: suspicious embedded NUL character + +void TestA() { + std::string str1 = "abc\0def"; + // CHECK-MESSAGES: :[[@LINE-1]]:22: warning: truncated string literal + std::string str2 = "\0"; + // CHECK-MESSAGES: :[[@LINE-1]]:22: warning: truncated string literal + std::string str3("\0"); + // CHECK-MESSAGES: :[[@LINE-1]]:20: warning: truncated string literal + std::string str4{"\x00\x01\x02\x03"}; + // CHECK-MESSAGES: :[[@LINE-1]]:20: warning: truncated string literal + + std::string str; + str += "abc\0def"; + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: truncated string literal + str = "abc\0def"; + // CHECK-MESSAGES: :[[@LINE-1]]:9: warning: truncated string literal + + if (str == "abc\0def") return; + // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: truncated string literal + if ("abc\0def" == str) return; + // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: truncated string literal +} + +void TestW() { + std::wstring str1 = L"abc\0def"; + // CHECK-MESSAGES: :[[@LINE-1]]:23: warning: truncated string literal + std::wstring str2 = L"\0"; + // CHECK-MESSAGES: :[[@LINE-1]]:23: warning: truncated string literal + std::wstring str3(L"\0"); + // CHECK-MESSAGES: :[[@LINE-1]]:21: warning: truncated string literal + std::wstring str4{L"\x00\x01\x02\x03"}; + // CHECK-MESSAGES: :[[@LINE-1]]:21: warning: truncated string literal +} |

