diff options
author | Raphael Isemann <teemperor@gmail.com> | 2019-07-10 21:04:01 +0000 |
---|---|---|
committer | Raphael Isemann <teemperor@gmail.com> | 2019-07-10 21:04:01 +0000 |
commit | 0171866672346f2a6e956ea97d5cc913f85b39db (patch) | |
tree | e3bbf3cf35741158516cfca7c4ac1efa4ef3dc51 | |
parent | 021ba49b31dd5cf3ff4ea5182bee36431be11bdb (diff) | |
download | bcm5719-llvm-0171866672346f2a6e956ea97d5cc913f85b39db.tar.gz bcm5719-llvm-0171866672346f2a6e956ea97d5cc913f85b39db.zip |
[lldb] Fix handling of dollar characters in expr command
llvm-svn: 365698
7 files changed, 132 insertions, 31 deletions
diff --git a/lldb/packages/Python/lldbsuite/test/expression_command/dollar-in-variable/Makefile b/lldb/packages/Python/lldbsuite/test/expression_command/dollar-in-variable/Makefile new file mode 100644 index 00000000000..f5a47fcc46c --- /dev/null +++ b/lldb/packages/Python/lldbsuite/test/expression_command/dollar-in-variable/Makefile @@ -0,0 +1,3 @@ +LEVEL = ../../make +C_SOURCES := main.c +include $(LEVEL)/Makefile.rules diff --git a/lldb/packages/Python/lldbsuite/test/expression_command/dollar-in-variable/TestDollarInVariable.py b/lldb/packages/Python/lldbsuite/test/expression_command/dollar-in-variable/TestDollarInVariable.py new file mode 100644 index 00000000000..03424658f3e --- /dev/null +++ b/lldb/packages/Python/lldbsuite/test/expression_command/dollar-in-variable/TestDollarInVariable.py @@ -0,0 +1,4 @@ +from lldbsuite.test import lldbinline +from lldbsuite.test import decorators + +lldbinline.MakeInlineTest(__file__, globals(), None) diff --git a/lldb/packages/Python/lldbsuite/test/expression_command/dollar-in-variable/main.c b/lldb/packages/Python/lldbsuite/test/expression_command/dollar-in-variable/main.c new file mode 100644 index 00000000000..e5fec25b35b --- /dev/null +++ b/lldb/packages/Python/lldbsuite/test/expression_command/dollar-in-variable/main.c @@ -0,0 +1,21 @@ +// Make sure we correctly handle $ in variable names. + +int main() { + // Some variables that might conflict with our variables below. + int __lldb_expr_result = 2; + int $$foo = 1; + int R0 = 2; + + // Some variables with dollar signs that should work (and shadow + // any built-in LLDB variables). + int $__lldb_expr_result = 11; + int $foo = 12; + int $R0 = 13; + int $0 = 14; + + //%self.expect("expr $__lldb_expr_result", substrs=['(int) $0 = 11']) + //%self.expect("expr $foo", substrs=['(int)', ' = 12']) + //%self.expect("expr $R0", substrs=['(int)', ' = 13']) + //%self.expect("expr int $foo = 123", error=True, substrs=["declaration conflicts"]) + return 0; //%self.expect("expr $0", substrs=['(int)', ' = 14']) +} diff --git a/lldb/packages/Python/lldbsuite/test/expression_command/unicode-in-variable/Makefile b/lldb/packages/Python/lldbsuite/test/expression_command/unicode-in-variable/Makefile new file mode 100644 index 00000000000..3759b5f9255 --- /dev/null +++ b/lldb/packages/Python/lldbsuite/test/expression_command/unicode-in-variable/Makefile @@ -0,0 +1,4 @@ +LEVEL = ../../make +CXX_SOURCES := main.cpp +CXX_FLAGS_EXTRA := -finput-charset=UTF-8 -fextended-identifiers +include $(LEVEL)/Makefile.rules diff --git a/lldb/packages/Python/lldbsuite/test/expression_command/unicode-in-variable/TestUnicodeInVariable.py b/lldb/packages/Python/lldbsuite/test/expression_command/unicode-in-variable/TestUnicodeInVariable.py new file mode 100644 index 00000000000..03424658f3e --- /dev/null +++ b/lldb/packages/Python/lldbsuite/test/expression_command/unicode-in-variable/TestUnicodeInVariable.py @@ -0,0 +1,4 @@ +from lldbsuite.test import lldbinline +from lldbsuite.test import decorators + +lldbinline.MakeInlineTest(__file__, globals(), None) diff --git a/lldb/packages/Python/lldbsuite/test/expression_command/unicode-in-variable/main.cpp b/lldb/packages/Python/lldbsuite/test/expression_command/unicode-in-variable/main.cpp new file mode 100644 index 00000000000..82e38b1ef4c --- /dev/null +++ b/lldb/packages/Python/lldbsuite/test/expression_command/unicode-in-variable/main.cpp @@ -0,0 +1,17 @@ +// Make sure we correctly handle unicode in variable names. + +struct A { + // We need a member variable in the context that could shadow our local + // variable. If our optimization code fails to handle this, then we won't + // correctly inject our local variable so that it won't get shadowed. + int foob\u00E1r = 2; + int foo() { + int foob\u00E1r = 3; + return foob\u00E1r; //%self.expect("expr foobár", substrs=['(int)', ' = 3']) + } +}; + +int main() { + A a; + return a.foo(); +} diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionSourceCode.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionSourceCode.cpp index 39ee8f73bc4..f513b1eea36 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionSourceCode.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionSourceCode.cpp @@ -9,6 +9,8 @@ #include "ClangExpressionSourceCode.h" #include "clang/Basic/CharInfo.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Lex/Lexer.h" #include "llvm/ADT/StringRef.h" #include "Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.h" @@ -164,44 +166,90 @@ static void AddMacros(const DebugMacros *dm, CompileUnit *comp_unit, } } -/// Checks if the expression body contains the given variable as a token. -/// \param body The expression body. -/// \param var The variable token we are looking for. -/// \return True iff the expression body containes the variable as a token. -static bool ExprBodyContainsVar(llvm::StringRef body, llvm::StringRef var) { - assert(var.find_if([](char c) { return !clang::isIdentifierBody(c); }) == - llvm::StringRef::npos && - "variable contains non-identifier chars?"); - - size_t start = 0; - // Iterate over all occurences of the variable string in our expression. - while ((start = body.find(var, start)) != llvm::StringRef::npos) { - // We found our variable name in the expression. Check that the token - // that contains our needle is equal to our variable and not just contains - // the character sequence by accident. - // Prevents situations where we for example inlcude the variable 'FOO' in an - // expression like 'FOObar + 1'. - bool has_characters_before = - start != 0 && clang::isIdentifierBody(body[start - 1]); - bool has_characters_after = - start + var.size() < body.size() && - clang::isIdentifierBody(body[start + var.size()]); - - // Our token just contained the variable name as a substring. Continue - // searching the rest of the expression. - if (has_characters_before || has_characters_after) { - ++start; +namespace { +/// Allows checking if a token is contained in a given expression. +class TokenVerifier { + /// The tokens we found in the expression. + llvm::StringSet<> m_tokens; + +public: + TokenVerifier(std::string body); + /// Returns true iff the given expression body contained a token with the + /// given content. + bool hasToken(llvm::StringRef token) const { + return m_tokens.find(token) != m_tokens.end(); + } +}; +} // namespace + +TokenVerifier::TokenVerifier(std::string body) { + using namespace clang; + + // We only care about tokens and not their original source locations. If we + // move the whole expression to only be in one line we can simplify the + // following code that extracts the token contents. + std::replace(body.begin(), body.end(), '\n', ' '); + std::replace(body.begin(), body.end(), '\r', ' '); + + FileSystemOptions file_opts; + FileManager file_mgr(file_opts, + FileSystem::Instance().GetVirtualFileSystem()); + + // Let's build the actual source code Clang needs and setup some utility + // objects. + llvm::IntrusiveRefCntPtr<DiagnosticIDs> diag_ids(new DiagnosticIDs()); + llvm::IntrusiveRefCntPtr<DiagnosticOptions> diags_opts( + new DiagnosticOptions()); + DiagnosticsEngine diags(diag_ids, diags_opts); + clang::SourceManager SM(diags, file_mgr); + auto buf = llvm::MemoryBuffer::getMemBuffer(body); + + FileID FID = SM.createFileID(clang::SourceManager::Unowned, buf.get()); + + // Let's just enable the latest ObjC and C++ which should get most tokens + // right. + LangOptions Opts; + Opts.ObjC = true; + Opts.DollarIdents = true; + Opts.CPlusPlus17 = true; + Opts.LineComment = true; + + Lexer lex(FID, buf.get(), SM, Opts); + + Token token; + bool exit = false; + while (!exit) { + // Returns true if this is the last token we get from the lexer. + exit = lex.LexFromRawLexer(token); + + // Extract the column number which we need to extract the token content. + // Our expression is just one line, so we don't need to handle any line + // numbers here. + bool invalid = false; + unsigned start = SM.getSpellingColumnNumber(token.getLocation(), &invalid); + if (invalid) continue; - } - return true; + // Column numbers start at 1, but indexes in our string start at 0. + --start; + + // Annotations don't have a length, so let's skip them. + if (token.isAnnotation()) + continue; + + // Extract the token string from our source code and store it. + std::string token_str = body.substr(start, token.getLength()); + if (token_str.empty()) + continue; + m_tokens.insert(token_str); } - return false; } static void AddLocalVariableDecls(const lldb::VariableListSP &var_list_sp, StreamString &stream, const std::string &expr, lldb::LanguageType wrapping_language) { + TokenVerifier tokens(expr); + for (size_t i = 0; i < var_list_sp->GetSize(); i++) { lldb::VariableSP var_sp = var_list_sp->GetVariableAtIndex(i); @@ -213,7 +261,7 @@ static void AddLocalVariableDecls(const lldb::VariableListSP &var_list_sp, if (!var_name || var_name == ".block_descriptor") continue; - if (!expr.empty() && !ExprBodyContainsVar(expr, var_name.GetStringRef())) + if (!expr.empty() && !tokens.hasToken(var_name.GetStringRef())) continue; if ((var_name == "self" || var_name == "_cmd") && |