diff options
author | Ben Hamilton <benhamilton@google.com> | 2018-01-17 17:33:08 +0000 |
---|---|---|
committer | Ben Hamilton <benhamilton@google.com> | 2018-01-17 17:33:08 +0000 |
commit | e2e3e67cf1a7c2fda14f76200d665662b1fdfaa5 (patch) | |
tree | 4cd7fc84d17d55b50d3d2522296f97bf5d75a994 /clang/lib/Format | |
parent | 68bc4bd6e519ee8d82ee7a3882821b4a165662e1 (diff) | |
download | bcm5719-llvm-e2e3e67cf1a7c2fda14f76200d665662b1fdfaa5.tar.gz bcm5719-llvm-e2e3e67cf1a7c2fda14f76200d665662b1fdfaa5.zip |
[Format] Improve ObjC header guessing heuristic
Summary:
This improves upon the previous Objective-C header guessing heuristic
from rC320479.
Now, we run the lexer on C++ header files and look for Objective-C
keywords and syntax. We also look for Foundation types.
Test Plan: make -j12 FormatTests && ./tools/clang/unittests/Format/FormatTests
Reviewers: jolesiak, krasimir
Reviewed By: jolesiak
Subscribers: klimek, cfe-commits
Differential Revision: https://reviews.llvm.org/D42135
llvm-svn: 322690
Diffstat (limited to 'clang/lib/Format')
-rw-r--r-- | clang/lib/Format/Format.cpp | 124 |
1 files changed, 116 insertions, 8 deletions
diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index 6ef38ad1692..896e055d870 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -32,6 +32,7 @@ #include "clang/Basic/VirtualFileSystem.h" #include "clang/Lex/Lexer.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Path.h" @@ -40,6 +41,7 @@ #include <algorithm> #include <memory> #include <string> +#include <unordered_set> #define DEBUG_TYPE "format-formatter" @@ -48,6 +50,16 @@ using clang::format::FormatStyle; LLVM_YAML_IS_SEQUENCE_VECTOR(clang::format::FormatStyle::IncludeCategory) LLVM_YAML_IS_SEQUENCE_VECTOR(clang::format::FormatStyle::RawStringFormat) +namespace std { +// Allow using StringRef in std::unordered_set. +template <> struct hash<llvm::StringRef> { +public: + size_t operator()(const llvm::StringRef &s) const { + return llvm::hash_value(s); + } +}; +} // namespace std + namespace llvm { namespace yaml { template <> struct ScalarEnumerationTraits<FormatStyle::LanguageKind> { @@ -1400,6 +1412,101 @@ private: std::set<FormatToken *, FormatTokenLess> DeletedTokens; }; +class ObjCHeaderStyleGuesser : public TokenAnalyzer { +public: + ObjCHeaderStyleGuesser(const Environment &Env, const FormatStyle &Style) + : TokenAnalyzer(Env, Style), IsObjC(false) {} + + std::pair<tooling::Replacements, unsigned> + analyze(TokenAnnotator &Annotator, + SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, + FormatTokenLexer &Tokens) override { + assert(Style.Language == FormatStyle::LK_Cpp); + IsObjC = guessIsObjC(AnnotatedLines, Tokens.getKeywords()); + tooling::Replacements Result; + return {Result, 0}; + } + + bool isObjC() { return IsObjC; } + +private: + static bool guessIsObjC(const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, + const AdditionalKeywords &Keywords) { + static const std::unordered_set<StringRef> FoundationIdentifiers = { + "CGFloat", + "NSAffineTransform", + "NSArray", + "NSAttributedString", + "NSCache", + "NSCharacterSet", + "NSCountedSet", + "NSData", + "NSDataDetector", + "NSDecimal", + "NSDecimalNumber", + "NSDictionary", + "NSEdgeInsets", + "NSHashTable", + "NSIndexPath", + "NSIndexSet", + "NSInteger", + "NSLocale", + "NSMapTable", + "NSMutableArray", + "NSMutableAttributedString", + "NSMutableCharacterSet", + "NSMutableData", + "NSMutableDictionary", + "NSMutableIndexSet", + "NSMutableOrderedSet", + "NSMutableSet", + "NSMutableString", + "NSNumber", + "NSNumberFormatter", + "NSOrderedSet", + "NSPoint", + "NSPointerArray", + "NSRange", + "NSRect", + "NSRegularExpression", + "NSSet", + "NSSize", + "NSString", + "NSUInteger", + "NSURL", + "NSURLComponents", + "NSURLQueryItem", + "NSUUID", + }; + + for (auto &Line : AnnotatedLines) { + for (FormatToken *FormatTok = Line->First->Next; FormatTok; + FormatTok = FormatTok->Next) { + if ((FormatTok->Previous->is(tok::at) && + (FormatTok->isObjCAtKeyword(tok::objc_interface) || + FormatTok->isObjCAtKeyword(tok::objc_implementation) || + FormatTok->isObjCAtKeyword(tok::objc_protocol) || + FormatTok->isObjCAtKeyword(tok::objc_end) || + FormatTok->isOneOf(tok::numeric_constant, tok::l_square, + tok::l_brace))) || + (FormatTok->Tok.isAnyIdentifier() && + FoundationIdentifiers.find(FormatTok->TokenText) != + FoundationIdentifiers.end()) || + FormatTok->is(TT_ObjCStringLiteral) || + FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, + TT_ObjCBlockLBrace, TT_ObjCBlockLParen, + TT_ObjCDecl, TT_ObjCForIn, TT_ObjCMethodExpr, + TT_ObjCMethodSpecifier, TT_ObjCProperty)) { + return true; + } + } + } + return false; + } + + bool IsObjC; +}; + struct IncludeDirective { StringRef Filename; StringRef Text; @@ -2185,14 +2292,15 @@ llvm::Expected<FormatStyle> getStyle(StringRef StyleName, StringRef FileName, FormatStyle Style = getLLVMStyle(); Style.Language = getLanguageByFileName(FileName); - // This is a very crude detection of whether a header contains ObjC code that - // should be improved over time and probably be done on tokens, not one the - // bare content of the file. - if (Style.Language == FormatStyle::LK_Cpp && FileName.endswith(".h") && - (Code.contains("\n- (") || Code.contains("\n+ (") || - Code.contains("\n@end\n") || Code.contains("\n@end ") || - Code.endswith("@end"))) - Style.Language = FormatStyle::LK_ObjC; + if (Style.Language == FormatStyle::LK_Cpp && FileName.endswith(".h")) { + std::unique_ptr<Environment> Env = + Environment::CreateVirtualEnvironment(Code, FileName, /*Ranges=*/{}); + ObjCHeaderStyleGuesser Guesser(*Env, Style); + Guesser.process(); + if (Guesser.isObjC()) { + Style.Language = FormatStyle::LK_ObjC; + } + } FormatStyle FallbackStyle = getNoStyle(); if (!getPredefinedStyle(FallbackStyleName, Style.Language, &FallbackStyle)) |