From 56faca015513475b13b2784fb82d05563fe72cec Mon Sep 17 00:00:00 2001 From: Etienne Bergeron Date: Thu, 7 Apr 2016 16:16:36 +0000 Subject: [PATCH] [clang-tidy] add new checker for string literal with NUL character. Summary: This patch adds the support for detecting suspicious string literals and their //incorrect// usage. The following example shows a incorrect character escaping leading to an embedded NUL character. ``` std::string str = "\0x42"; // Should be "\x42". ``` The patch also add detection of truncated literal when a literal is passed to a string constructor. Reviewers: hokein, alexfh Subscribers: LegalizeAdulthood, bcraig, Eugene.Zelenko, bkramer, cfe-commits Differential Revision: http://reviews.llvm.org/D18783 git-svn-id: https://llvm.org/svn/llvm-project/clang-tools-extra/trunk@265691 91177308-0d34-0410-b5e6-96231b3b80d8 --- clang-tidy/misc/CMakeLists.txt | 1 + clang-tidy/misc/MiscTidyModule.cpp | 3 + .../StringLiteralWithEmbeddedNulCheck.cpp | 83 ++++++++++++++++++ .../misc/StringLiteralWithEmbeddedNulCheck.h | 35 ++++++++ docs/ReleaseNotes.rst | 6 ++ docs/clang-tidy/checks/list.rst | 1 + .../misc-string-literal-with-embedded-nul.rst | 38 +++++++++ .../misc-string-literal-with-embedded-nul.cpp | 85 +++++++++++++++++++ 8 files changed, 252 insertions(+) create mode 100644 clang-tidy/misc/StringLiteralWithEmbeddedNulCheck.cpp create mode 100644 clang-tidy/misc/StringLiteralWithEmbeddedNulCheck.h create mode 100644 docs/clang-tidy/checks/misc-string-literal-with-embedded-nul.rst create mode 100644 test/clang-tidy/misc-string-literal-with-embedded-nul.cpp diff --git a/clang-tidy/misc/CMakeLists.txt b/clang-tidy/misc/CMakeLists.txt index fdfa26e..b5a5d53 100644 --- a/clang-tidy/misc/CMakeLists.txt +++ b/clang-tidy/misc/CMakeLists.txt @@ -23,6 +23,7 @@ add_clang_library(clangTidyMiscModule SizeofContainerCheck.cpp StaticAssertCheck.cpp StringIntegerAssignmentCheck.cpp + StringLiteralWithEmbeddedNulCheck.cpp SuspiciousMissingCommaCheck.cpp SuspiciousSemicolonCheck.cpp SwappedArgumentsCheck.cpp diff --git a/clang-tidy/misc/MiscTidyModule.cpp b/clang-tidy/misc/MiscTidyModule.cpp index 7dc8a21..efbcde7 100644 --- a/clang-tidy/misc/MiscTidyModule.cpp +++ b/clang-tidy/misc/MiscTidyModule.cpp @@ -31,6 +31,7 @@ #include "SizeofContainerCheck.h" #include "StaticAssertCheck.h" #include "StringIntegerAssignmentCheck.h" +#include "StringLiteralWithEmbeddedNulCheck.h" #include "SuspiciousMissingCommaCheck.h" #include "SuspiciousSemicolonCheck.h" #include "SwappedArgumentsCheck.h" @@ -89,6 +90,8 @@ public: "misc-static-assert"); CheckFactories.registerCheck( "misc-string-integer-assignment"); + CheckFactories.registerCheck( + "misc-string-literal-with-embedded-nul"); CheckFactories.registerCheck( "misc-suspicious-missing-comma"); CheckFactories.registerCheck( diff --git a/clang-tidy/misc/StringLiteralWithEmbeddedNulCheck.cpp b/clang-tidy/misc/StringLiteralWithEmbeddedNulCheck.cpp new file mode 100644 index 0000000..335927b --- /dev/null +++ b/clang-tidy/misc/StringLiteralWithEmbeddedNulCheck.cpp @@ -0,0 +1,83 @@ +//===--- StringLiteralWithEmbeddedNulCheck.cpp - clang-tidy----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "StringLiteralWithEmbeddedNulCheck.h" +#include "clang/AST/ASTContext.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" + +using namespace clang::ast_matchers; + +namespace clang { +namespace tidy { +namespace misc { + +AST_MATCHER(StringLiteral, containsNul) { + for (size_t i = 0; i < Node.getLength(); ++i) + if (Node.getCodeUnit(i) == '\0') + return true; + return false; +} + +void StringLiteralWithEmbeddedNulCheck::registerMatchers(MatchFinder *Finder) { + // Match a string that contains embedded NUL character. Extra-checks are + // applied in |check| to find incorectly escaped characters. + Finder->addMatcher(stringLiteral(containsNul()).bind("strlit"), this); + + // The remaining checks only apply to C++. + if (!getLangOpts().CPlusPlus) + return; + + const auto StrLitWithNul = + ignoringParenImpCasts(stringLiteral(containsNul()).bind("truncated")); + + // Match string constructor. + const auto StringConstructorExpr = expr(anyOf( + cxxConstructExpr(argumentCountIs(1), + hasDeclaration(cxxMethodDecl(hasName("basic_string")))), + // If present, the second argument is the alloc object which must not + // be present explicitly. + cxxConstructExpr(argumentCountIs(2), + hasDeclaration(cxxMethodDecl(hasName("basic_string"))), + hasArgument(1, cxxDefaultArgExpr())))); + + // Detect passing a suspicious string literal to a string constructor. + // example: std::string str = "abc\0def"; + Finder->addMatcher( + cxxConstructExpr(StringConstructorExpr, hasArgument(0, StrLitWithNul)), + this); + + // Detect passing a suspicious string literal through an overloaded operator. + Finder->addMatcher(cxxOperatorCallExpr(hasAnyArgument(StrLitWithNul)), this); +} + +void StringLiteralWithEmbeddedNulCheck::check( + const MatchFinder::MatchResult &Result) { + if (const auto *SL = Result.Nodes.getNodeAs("strlit")) { + for (size_t Offset = 0, Length = SL->getLength(); Offset < Length; + ++Offset) { + // Find a sequence of character like "\0x12". + if (Offset + 3 < Length && SL->getCodeUnit(Offset) == '\0' && + SL->getCodeUnit(Offset + 1) == 'x' && + isDigit(SL->getCodeUnit(Offset + 2)) && + isDigit(SL->getCodeUnit(Offset + 3))) { + diag(SL->getLocStart(), "suspicious embedded NUL character"); + return; + } + } + } + + if (const auto *SL = Result.Nodes.getNodeAs("truncated")) { + diag(SL->getLocStart(), + "truncated string literal with embedded NUL character"); + } +} + +} // namespace misc +} // namespace tidy +} // namespace clang diff --git a/clang-tidy/misc/StringLiteralWithEmbeddedNulCheck.h b/clang-tidy/misc/StringLiteralWithEmbeddedNulCheck.h new file mode 100644 index 0000000..e4a87fc --- /dev/null +++ b/clang-tidy/misc/StringLiteralWithEmbeddedNulCheck.h @@ -0,0 +1,35 @@ +//===--- StringLiteralWithEmbeddedNulCheck.h - clang-tidy--------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_STRING_LITERAL_WITH_EMBEDDED_NUL_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_STRING_LITERAL_WITH_EMBEDDED_NUL_H + +#include "../ClangTidy.h" + +namespace clang { +namespace tidy { +namespace misc { + +/// Find suspicious string literals with embedded NUL characters. +/// +/// For the user-facing documentation see: +/// http://clang.llvm.org/extra/clang-tidy/checks/misc-string-literal-with-embedded-nul.html +class StringLiteralWithEmbeddedNulCheck : public ClangTidyCheck { +public: + StringLiteralWithEmbeddedNulCheck(StringRef Name, ClangTidyContext *Context) + : ClangTidyCheck(Name, Context) {} + void registerMatchers(ast_matchers::MatchFinder *Finder) override; + void check(const ast_matchers::MatchFinder::MatchResult &Result) override; +}; + +} // namespace misc +} // namespace tidy +} // namespace clang + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_STRING_LITERAL_WITH_EMBEDDED_NUL_H diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst index 7b65a02..20c156f 100644 --- a/docs/ReleaseNotes.rst +++ b/docs/ReleaseNotes.rst @@ -97,6 +97,12 @@ identified. The improvements since the 3.8 release include: Warns when there is a explicit redundant cast of a calculation result to a bigger type. +- New `misc-string-literal-with-embedded-nul + `_ check + + Warns about suspicious NUL character in string literals which may lead to + truncation or invalid character escaping. + - New `misc-suspicious-missing-comma `_ check diff --git a/docs/clang-tidy/checks/list.rst b/docs/clang-tidy/checks/list.rst index 28d4915..25e8851 100644 --- a/docs/clang-tidy/checks/list.rst +++ b/docs/clang-tidy/checks/list.rst @@ -66,6 +66,7 @@ Clang-Tidy Checks misc-sizeof-container misc-static-assert misc-string-integer-assignment + misc-string-literal-with-embedded-nul misc-suspicious-missing-comma misc-suspicious-semicolon misc-swapped-arguments diff --git a/docs/clang-tidy/checks/misc-string-literal-with-embedded-nul.rst b/docs/clang-tidy/checks/misc-string-literal-with-embedded-nul.rst new file mode 100644 index 0000000..3661218 --- /dev/null +++ b/docs/clang-tidy/checks/misc-string-literal-with-embedded-nul.rst @@ -0,0 +1,38 @@ +.. title:: clang-tidy - misc-string-literal-with-embedded-nul + +misc-string-literal-with-embedded-nul +===================================== + +Finds occurences of string literal with embedded NUL character and validates +their usage. + + +Invalid escaping +^^^^^^^^^^^^^^^^ + +Special characters can be escaped within a string literal by using their +hexadecimal encoding like ``\x42``. A common mistake is to escape them +like this ``\0x42`` where the ``\0`` stands for the NUL character. + +.. code:: c++ + + const char* Example[] = "Invalid character: \0x12 should be \x12"; + const char* Bytes[] = "\x03\0x02\0x01\0x00\0xFF\0xFF\0xFF"; + + +Truncated literal +^^^^^^^^^^^^^^^^^ + +String-like classes can manipulate strings with embedded NUL as they are +keeping track of the bytes and the length. This is not the case for a +``char*`` (NUL-terminated) string. + +A common mistake is to pass a string-literal with embedded NUL to a string +constructor expecting a NUL-terminated string. The bytes after the first NUL +character are truncated. + +.. code:: c++ + + std::string str("abc\0def"); // "def" is truncated + str += "\0"; // This statement is doing nothing + if (str == "\0abc") return; // This expression is always true diff --git a/test/clang-tidy/misc-string-literal-with-embedded-nul.cpp b/test/clang-tidy/misc-string-literal-with-embedded-nul.cpp new file mode 100644 index 0000000..2605dd4 --- /dev/null +++ b/test/clang-tidy/misc-string-literal-with-embedded-nul.cpp @@ -0,0 +1,85 @@ +// RUN: %check_clang_tidy %s misc-string-literal-with-embedded-nul %t + +namespace std { +template +class allocator {}; +template +class char_traits {}; +template +struct basic_string { + typedef basic_string _Type; + basic_string(); + basic_string(const C *p, const A &a = A()); + + _Type& operator+=(const C* s); + _Type& operator=(const C* s); +}; + +typedef basic_string, std::allocator> string; +typedef basic_string, std::allocator> wstring; +} + +bool operator==(const std::string&, const char*); +bool operator==(const char*, const std::string&); + + +const char Valid[] = "This is valid \x12."; +const char Strange[] = "This is strange \0x12 and must be fixed"; +// CHECK-MESSAGES: :[[@LINE-1]]:24: warning: suspicious embedded NUL character [misc-string-literal-with-embedded-nul] + +const char textA[] = "\0x01\0x02\0x03\0x04"; +// CHECK-MESSAGES: :[[@LINE-1]]:22: warning: suspicious embedded NUL character +const wchar_t textW[] = L"\0x01\0x02\0x03\0x04"; +// CHECK-MESSAGES: :[[@LINE-1]]:25: warning: suspicious embedded NUL character + +const char A[] = "\0"; +const char B[] = "\0x"; +const char C[] = "\0x1"; +const char D[] = "\0x11"; +// CHECK-MESSAGES: :[[@LINE-1]]:18: warning: suspicious embedded NUL character + +const wchar_t E[] = L"\0"; +const wchar_t F[] = L"\0x"; +const wchar_t G[] = L"\0x1"; +const wchar_t H[] = L"\0x11"; +// CHECK-MESSAGES: :[[@LINE-1]]:21: warning: suspicious embedded NUL character + +const char I[] = "\000\000\000\000"; +const char J[] = "\0\0\0\0\0\0"; +const char K[] = ""; + +const char L[] = "\0x12" "\0x12" "\0x12" "\0x12"; +// CHECK-MESSAGES: :[[@LINE-1]]:18: warning: suspicious embedded NUL character + +void TestA() { + std::string str1 = "abc\0def"; + // CHECK-MESSAGES: :[[@LINE-1]]:22: warning: truncated string literal + std::string str2 = "\0"; + // CHECK-MESSAGES: :[[@LINE-1]]:22: warning: truncated string literal + std::string str3("\0"); + // CHECK-MESSAGES: :[[@LINE-1]]:20: warning: truncated string literal + std::string str4{"\x00\x01\x02\x03"}; + // CHECK-MESSAGES: :[[@LINE-1]]:20: warning: truncated string literal + + std::string str; + str += "abc\0def"; + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: truncated string literal + str = "abc\0def"; + // CHECK-MESSAGES: :[[@LINE-1]]:9: warning: truncated string literal + + if (str == "abc\0def") return; + // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: truncated string literal + if ("abc\0def" == str) return; + // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: truncated string literal +} + +void TestW() { + std::wstring str1 = L"abc\0def"; + // CHECK-MESSAGES: :[[@LINE-1]]:23: warning: truncated string literal + std::wstring str2 = L"\0"; + // CHECK-MESSAGES: :[[@LINE-1]]:23: warning: truncated string literal + std::wstring str3(L"\0"); + // CHECK-MESSAGES: :[[@LINE-1]]:21: warning: truncated string literal + std::wstring str4{L"\x00\x01\x02\x03"}; + // CHECK-MESSAGES: :[[@LINE-1]]:21: warning: truncated string literal +}