summaryrefslogtreecommitdiffstats
path: root/clang/lib/Format
diff options
context:
space:
mode:
authorDaniel Jasper <djasper@google.com>2016-03-02 22:44:03 +0000
committerDaniel Jasper <djasper@google.com>2016-03-02 22:44:03 +0000
commitabd1f574535fd57d2099daff486afb0ded29138a (patch)
tree3f8f70a7022cbcbf9f2b0d0899258e38daa28d23 /clang/lib/Format
parenta0d7a2cd3f878f8fa9624006998558057bed9081 (diff)
downloadbcm5719-llvm-abd1f574535fd57d2099daff486afb0ded29138a.tar.gz
bcm5719-llvm-abd1f574535fd57d2099daff486afb0ded29138a.zip
clang-format: [JS] Optionally re-quote string literals.
Turns "foo" into 'foo' (or vice versa, depending on configuration). This makes it more convenient to follow the Google JavaScript style guide: https://google.github.io/styleguide/javascriptguide.xml?showone=Strings#Strings This functionality is behind the option "JavaScriptQuotes", which can be: * "leave" (no re-quoting) * "single" (change to single quotes) * "double" (change to double quotes) This also changes single quoted JavaScript string literals to be treated as tok::string_literal, not tok::char_literal, which fixes two unrelated tests. Patch by Martin Probst. Thank you. llvm-svn: 262534
Diffstat (limited to 'clang/lib/Format')
-rw-r--r--clang/lib/Format/Format.cpp98
-rw-r--r--clang/lib/Format/TokenAnnotator.cpp6
2 files changed, 96 insertions, 8 deletions
diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp
index 9d5c0bca335..c8f5ec9259d 100644
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@@ -71,6 +71,14 @@ template <> struct ScalarEnumerationTraits<FormatStyle::UseTabStyle> {
}
};
+template <> struct ScalarEnumerationTraits<FormatStyle::JavaScriptQuoteStyle> {
+ static void enumeration(IO &IO, FormatStyle::JavaScriptQuoteStyle &Value) {
+ IO.enumCase(Value, "Leave", FormatStyle::JSQS_Leave);
+ IO.enumCase(Value, "Single", FormatStyle::JSQS_Single);
+ IO.enumCase(Value, "Double", FormatStyle::JSQS_Double);
+ }
+};
+
template <> struct ScalarEnumerationTraits<FormatStyle::ShortFunctionStyle> {
static void enumeration(IO &IO, FormatStyle::ShortFunctionStyle &Value) {
IO.enumCase(Value, "None", FormatStyle::SFS_None);
@@ -335,6 +343,7 @@ template <> struct MappingTraits<FormatStyle> {
IO.mapOptional("Standard", Style.Standard);
IO.mapOptional("TabWidth", Style.TabWidth);
IO.mapOptional("UseTab", Style.UseTab);
+ IO.mapOptional("JavaScriptQuotes", Style.JavaScriptQuotes);
}
};
@@ -522,6 +531,7 @@ FormatStyle getLLVMStyle() {
LLVMStyle.SpacesBeforeTrailingComments = 1;
LLVMStyle.Standard = FormatStyle::LS_Cpp11;
LLVMStyle.UseTab = FormatStyle::UT_Never;
+ LLVMStyle.JavaScriptQuotes = FormatStyle::JSQS_Leave;
LLVMStyle.ReflowComments = true;
LLVMStyle.SpacesInParentheses = false;
LLVMStyle.SpacesInSquareBrackets = false;
@@ -590,6 +600,7 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) {
GoogleStyle.CommentPragmas = "@(export|return|see|visibility) ";
GoogleStyle.MaxEmptyLinesToKeep = 3;
GoogleStyle.SpacesInContainerLiterals = false;
+ GoogleStyle.JavaScriptQuotes = FormatStyle::JSQS_Single;
} else if (Language == FormatStyle::LK_Proto) {
GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_None;
GoogleStyle.SpacesInContainerLiterals = false;
@@ -766,13 +777,13 @@ namespace {
class FormatTokenLexer {
public:
FormatTokenLexer(SourceManager &SourceMgr, FileID ID, FormatStyle &Style,
- encoding::Encoding Encoding)
+ encoding::Encoding Encoding, tooling::Replacements &Replaces)
: FormatTok(nullptr), IsFirstToken(true), GreaterStashed(false),
LessStashed(false), Column(0), TrailingWhitespace(0),
SourceMgr(SourceMgr), ID(ID), Style(Style),
IdentTable(getFormattingLangOpts(Style)), Keywords(IdentTable),
- Encoding(Encoding), FirstInLineIndex(0), FormattingDisabled(false),
- MacroBlockBeginRegex(Style.MacroBlockBegin),
+ Encoding(Encoding), Replaces(Replaces), FirstInLineIndex(0),
+ FormattingDisabled(false), MacroBlockBeginRegex(Style.MacroBlockBegin),
MacroBlockEndRegex(Style.MacroBlockEnd) {
Lex.reset(new Lexer(ID, SourceMgr.getBuffer(ID), SourceMgr,
getFormattingLangOpts(Style)));
@@ -791,6 +802,8 @@ public:
if (Style.Language == FormatStyle::LK_JavaScript)
tryParseJSRegexLiteral();
tryMergePreviousTokens();
+ if (Style.Language == FormatStyle::LK_JavaScript)
+ tryRequoteJSStringLiteral();
if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
FirstInLineIndex = Tokens.size() - 1;
} while (Tokens.back()->Tok.isNot(tok::eof));
@@ -1061,6 +1074,75 @@ private:
return false;
}
+ // If the last token is a double/single-quoted string literal, generates a
+ // replacement with a single/double quoted string literal, re-escaping the
+ // contents in the process.
+ void tryRequoteJSStringLiteral() {
+ if (Style.JavaScriptQuotes == FormatStyle::JSQS_Leave)
+ return;
+
+ FormatToken *FormatTok = Tokens.back();
+ StringRef Input = FormatTok->TokenText;
+ if (!FormatTok->isStringLiteral() ||
+ // NB: testing for not starting with a double quote to avoid breaking
+ // `template strings`.
+ (Style.JavaScriptQuotes == FormatStyle::JSQS_Single &&
+ !Input.startswith("\"")) ||
+ (Style.JavaScriptQuotes == FormatStyle::JSQS_Double &&
+ !Input.startswith("\'")))
+ return;
+
+ // Change start and end quote.
+ bool IsSingle = Style.JavaScriptQuotes == FormatStyle::JSQS_Single;
+ SourceLocation Start = FormatTok->Tok.getLocation();
+ auto Replace = [&](SourceLocation Start, unsigned Length,
+ StringRef ReplacementText) {
+ Replaces.insert(
+ tooling::Replacement(SourceMgr, Start, Length, ReplacementText));
+ };
+ Replace(Start, 1, IsSingle ? "'" : "\"");
+ Replace(FormatTok->Tok.getEndLoc().getLocWithOffset(-1), 1,
+ IsSingle ? "'" : "\"");
+
+ // Escape internal quotes.
+ size_t ColumnWidth = FormatTok->TokenText.size();
+ bool Escaped = false;
+ for (size_t i = 1; i < Input.size() - 1; i++) {
+ switch (Input[i]) {
+ case '\\':
+ if (!Escaped && i + 1 < Input.size() &&
+ ((IsSingle && Input[i + 1] == '"') ||
+ (!IsSingle && Input[i + 1] == '\''))) {
+ // Remove this \, it's escaping a " or ' that no longer needs escaping
+ ColumnWidth--;
+ Replace(Start.getLocWithOffset(i), 1, "");
+ continue;
+ }
+ Escaped = !Escaped;
+ break;
+ case '\"':
+ case '\'':
+ if (!Escaped && IsSingle == (Input[i] == '\'')) {
+ // Escape the quote.
+ Replace(Start.getLocWithOffset(i), 0, "\\");
+ ColumnWidth++;
+ }
+ Escaped = false;
+ break;
+ default:
+ Escaped = false;
+ break;
+ }
+ }
+
+ // For formatting, count the number of non-escaped single quotes in them
+ // and adjust ColumnWidth to take the added escapes into account.
+ // FIXME(martinprobst): this might conflict with code breaking a long string
+ // literal (which clang-format doesn't do, yet). For that to work, this code
+ // would have to modify TokenText directly.
+ FormatTok->ColumnWidth = ColumnWidth;
+ }
+
bool tryMerge_TMacro() {
if (Tokens.size() < 4)
return false;
@@ -1359,6 +1441,7 @@ private:
IdentifierTable IdentTable;
AdditionalKeywords Keywords;
encoding::Encoding Encoding;
+ tooling::Replacements &Replaces;
llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
// Index (in 'Tokens') of the last token that starts a new line.
unsigned FirstInLineIndex;
@@ -1382,10 +1465,15 @@ private:
Tok.IsUnterminatedLiteral = true;
} else if (Style.Language == FormatStyle::LK_JavaScript &&
Tok.TokenText == "''") {
- Tok.Tok.setKind(tok::char_constant);
+ Tok.Tok.setKind(tok::string_literal);
}
}
+ if (Style.Language == FormatStyle::LK_JavaScript &&
+ Tok.is(tok::char_constant)) {
+ Tok.Tok.setKind(tok::string_literal);
+ }
+
if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format on" ||
Tok.TokenText == "/* clang-format on */")) {
FormattingDisabled = false;
@@ -1443,7 +1531,7 @@ public:
tooling::Replacements format(bool *IncompleteFormat) {
tooling::Replacements Result;
- FormatTokenLexer Tokens(SourceMgr, ID, Style, Encoding);
+ FormatTokenLexer Tokens(SourceMgr, ID, Style, Encoding, Result);
UnwrappedLineParser Parser(Style, Tokens.getKeywords(), Tokens.lex(),
*this);
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index bc8d5c7b6bf..f7cfe9fa583 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -410,7 +410,7 @@ private:
Style.Language != FormatStyle::LK_Cpp)) ||
Style.Language == FormatStyle::LK_Proto) &&
(Previous->Tok.getIdentifierInfo() ||
- Previous->is(tok::char_constant)))
+ Previous->is(tok::string_literal)))
Previous->Type = TT_SelectorName;
if (CurrentToken->is(tok::colon) ||
Style.Language == FormatStyle::LK_JavaScript)
@@ -2170,8 +2170,8 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
if (Style.Language == FormatStyle::LK_JavaScript) {
// FIXME: This might apply to other languages and token kinds.
- if (Right.is(tok::char_constant) && Left.is(tok::plus) && Left.Previous &&
- Left.Previous->is(tok::char_constant))
+ if (Right.is(tok::string_literal) && Left.is(tok::plus) && Left.Previous &&
+ Left.Previous->is(tok::string_literal))
return true;
if (Left.is(TT_DictLiteral) && Left.is(tok::l_brace) && Line.Level == 0 &&
Left.Previous && Left.Previous->is(tok::equal) &&
OpenPOWER on IntegriCloud