diff options
| -rw-r--r-- | clang/lib/Format/Format.cpp | 72 | ||||
| -rw-r--r-- | clang/lib/Format/FormatToken.h | 11 | ||||
| -rw-r--r-- | clang/lib/Format/UnwrappedLineParser.cpp | 69 | ||||
| -rw-r--r-- | clang/lib/Format/UnwrappedLineParser.h | 11 | ||||
| -rw-r--r-- | clang/unittests/Format/FormatTest.cpp | 92 | 
5 files changed, 224 insertions, 31 deletions
diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index 95ae7b56dc4..b4eae19d47d 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -1157,7 +1157,8 @@ public:                     encoding::Encoding Encoding)        : FormatTok(NULL), IsFirstToken(true), GreaterStashed(false), Column(0),          TrailingWhitespace(0), Lex(Lex), SourceMgr(SourceMgr), Style(Style), -        IdentTable(getFormattingLangOpts()), Encoding(Encoding) { +        IdentTable(getFormattingLangOpts()), Encoding(Encoding), +        FirstInLineIndex(0) {      Lex.SetKeepWhitespaceMode(true);      for (const std::string& ForEachMacro : Style.ForEachMacros) @@ -1167,9 +1168,12 @@ public:    ArrayRef<FormatToken *> lex() {      assert(Tokens.empty()); +    assert(FirstInLineIndex == 0);      do {        Tokens.push_back(getNextToken());        tryMergePreviousTokens(); +      if (Tokens.back()->NewlinesBefore > 0) +        FirstInLineIndex = Tokens.size() - 1;      } while (Tokens.back()->Tok.isNot(tok::eof));      return Tokens;    } @@ -1180,6 +1184,8 @@ private:    void tryMergePreviousTokens() {      if (tryMerge_TMacro())        return; +    if (tryMergeConflictMarkers()) +      return;      if (Style.Language == FormatStyle::LK_JavaScript) {        static tok::TokenKind JSIdentity[] = { tok::equalequal, tok::equal }; @@ -1254,6 +1260,68 @@ private:      return true;    } +  bool tryMergeConflictMarkers() { +    if (Tokens.back()->NewlinesBefore == 0 && Tokens.back()->isNot(tok::eof)) +      return false; + +    // Conflict lines look like: +    // <marker> <text from the vcs> +    // For example: +    // >>>>>>> /file/in/file/system at revision 1234 +    // +    // We merge all tokens in a line that starts with a conflict marker +    // into a single token with a special token type that the unwrapped line +    // parser will use to correctly rebuild the underlying code. + +    FileID ID; +    // Get the position of the first token in the line. +    unsigned FirstInLineOffset; +    std::tie(ID, FirstInLineOffset) = SourceMgr.getDecomposedLoc( +        Tokens[FirstInLineIndex]->getStartOfNonWhitespace()); +    StringRef Buffer = SourceMgr.getBuffer(ID)->getBuffer(); +    // Calculate the offset of the start of the current line. +    auto LineOffset = Buffer.rfind('\n', FirstInLineOffset); +    if (LineOffset == StringRef::npos) { +      LineOffset = 0; +    } else { +      ++LineOffset; +    } + +    auto FirstSpace = Buffer.find_first_of(" \n", LineOffset); +    StringRef LineStart; +    if (FirstSpace == StringRef::npos) { +      LineStart = Buffer.substr(LineOffset); +    } else { +      LineStart = Buffer.substr(LineOffset, FirstSpace - LineOffset); +    } + +    TokenType Type = TT_Unknown; +    if (LineStart == "<<<<<<<" || LineStart == ">>>>") { +      Type = TT_ConflictStart; +    } else if (LineStart == "|||||||" || LineStart == "=======" || +               LineStart == "====") { +      Type = TT_ConflictAlternative; +    } else if (LineStart == ">>>>>>>" || LineStart == "<<<<") { +      Type = TT_ConflictEnd; +    } + +    if (Type != TT_Unknown) { +      FormatToken *Next = Tokens.back(); + +      Tokens.resize(FirstInLineIndex + 1); +      // We do not need to build a complete token here, as we will skip it +      // during parsing anyway (as we must not touch whitespace around conflict +      // markers). +      Tokens.back()->Type = Type; +      Tokens.back()->Tok.setKind(tok::kw___unknown_anytype); + +      Tokens.push_back(Next); +      return true; +    } + +    return false; +  } +    FormatToken *getNextToken() {      if (GreaterStashed) {        // Create a synthesized second '>' token. @@ -1401,6 +1469,8 @@ private:    IdentifierTable IdentTable;    encoding::Encoding Encoding;    llvm::SpecificBumpPtrAllocator<FormatToken> Allocator; +  // Index (in 'Tokens') of the last token that starts a new line. +  unsigned FirstInLineIndex;    SmallVector<FormatToken *, 16> Tokens;    SmallVector<IdentifierInfo*, 8> ForEachMacros; diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h index a5aaa6f6de7..249eecc0ba1 100644 --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -33,19 +33,22 @@ enum TokenType {    TT_BlockComment,    TT_CastRParen,    TT_ConditionalExpr, +  TT_ConflictAlternative, +  TT_ConflictEnd, +  TT_ConflictStart,    TT_CtorInitializerColon,    TT_CtorInitializerComma,    TT_DesignatedInitializerPeriod,    TT_DictLiteral, -  TT_ImplicitStringLiteral, -  TT_InlineASMColon, -  TT_InheritanceColon,    TT_FunctionLBrace,    TT_FunctionTypeLParen, +  TT_ImplicitStringLiteral, +  TT_InheritanceColon, +  TT_InlineASMColon,    TT_LambdaLSquare,    TT_LineComment, -  TT_ObjCBlockLParen,    TT_ObjCBlockLBrace, +  TT_ObjCBlockLParen,    TT_ObjCDecl,    TT_ObjCForIn,    TT_ObjCMethodExpr, diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index 004c8364830..d63b38bcd1a 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -465,14 +465,14 @@ void UnwrappedLineParser::parsePPDirective() {    }  } -void UnwrappedLineParser::pushPPConditional() { -  if (!PPStack.empty() && PPStack.back() == PP_Unreachable) +void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { +  if (Unreachable || (!PPStack.empty() && PPStack.back() == PP_Unreachable))      PPStack.push_back(PP_Unreachable);    else      PPStack.push_back(PP_Conditional);  } -void UnwrappedLineParser::parsePPIf(bool IfDef) { +void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {    ++PPBranchLevel;    assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());    if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { @@ -480,37 +480,22 @@ void UnwrappedLineParser::parsePPIf(bool IfDef) {      PPLevelBranchCount.push_back(0);    }    PPChainBranchIndex.push(0); -  nextToken(); -  bool IsLiteralFalse = (FormatTok->Tok.isLiteral() && -                         StringRef(FormatTok->Tok.getLiteralData(), -                                   FormatTok->Tok.getLength()) == "0") || -                        FormatTok->Tok.is(tok::kw_false); -  if ((!IfDef && IsLiteralFalse) || PPLevelBranchIndex[PPBranchLevel] > 0) { -    PPStack.push_back(PP_Unreachable); -  } else { -    pushPPConditional(); -  } -  parsePPUnknown(); +  bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; +  conditionalCompilationCondition(Unreachable || Skip);  } -void UnwrappedLineParser::parsePPElse() { +void UnwrappedLineParser::conditionalCompilationAlternative() {    if (!PPStack.empty())      PPStack.pop_back();    assert(PPBranchLevel < (int)PPLevelBranchIndex.size());    if (!PPChainBranchIndex.empty())      ++PPChainBranchIndex.top(); -  if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && -      PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()) { -    PPStack.push_back(PP_Unreachable); -  } else { -    pushPPConditional(); -  } -  parsePPUnknown(); +  conditionalCompilationCondition( +      PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && +      PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());  } -void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } - -void UnwrappedLineParser::parsePPEndIf() { +void UnwrappedLineParser::conditionalCompilationEnd() {    assert(PPBranchLevel < (int)PPLevelBranchIndex.size());    if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {      if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) { @@ -524,6 +509,27 @@ void UnwrappedLineParser::parsePPEndIf() {      PPChainBranchIndex.pop();    if (!PPStack.empty())      PPStack.pop_back(); +} + +void UnwrappedLineParser::parsePPIf(bool IfDef) { +  nextToken(); +  bool IsLiteralFalse = (FormatTok->Tok.isLiteral() && +                         StringRef(FormatTok->Tok.getLiteralData(), +                                   FormatTok->Tok.getLength()) == "0") || +                        FormatTok->Tok.is(tok::kw_false); +  conditionalCompilationStart(!IfDef && IsLiteralFalse); +  parsePPUnknown(); +} + +void UnwrappedLineParser::parsePPElse() { +  conditionalCompilationAlternative(); +  parsePPUnknown(); +} + +void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } + +void UnwrappedLineParser::parsePPEndIf() { +  conditionalCompilationEnd();    parsePPUnknown();  } @@ -1406,6 +1412,19 @@ void UnwrappedLineParser::readToken() {        flushComments(isOnNewLine(*FormatTok));        parsePPDirective();      } +    while (FormatTok->Type == TT_ConflictStart || +           FormatTok->Type == TT_ConflictEnd || +           FormatTok->Type == TT_ConflictAlternative) { +      if (FormatTok->Type == TT_ConflictStart) { +        conditionalCompilationStart(/*Unreachable=*/false); +      } else if (FormatTok->Type == TT_ConflictAlternative) { +        conditionalCompilationAlternative(); +      } else if(FormatTok->Type == TT_ConflictEnd) { +        conditionalCompilationEnd(); +      } +      FormatTok = Tokens->getNextToken(); +      FormatTok->MustBreakBefore = true; +    }      if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) &&          !Line->InPPDirective) { diff --git a/clang/lib/Format/UnwrappedLineParser.h b/clang/lib/Format/UnwrappedLineParser.h index 6eaa415b6b8..8f0c5a3ef41 100644 --- a/clang/lib/Format/UnwrappedLineParser.h +++ b/clang/lib/Format/UnwrappedLineParser.h @@ -107,7 +107,16 @@ private:    void flushComments(bool NewlineBeforeNext);    void pushToken(FormatToken *Tok);    void calculateBraceTypes(); -  void pushPPConditional(); + +  // Marks a conditional compilation edge (for example, an '#if', '#ifdef', +  // '#else' or merge conflict marker). If 'Unreachable' is true, assumes +  // this branch either cannot be taken (for example '#if false'), or should +  // not be taken in this round. +  void conditionalCompilationCondition(bool Unreachable); +  void conditionalCompilationStart(bool Unreachable); +  void conditionalCompilationAlternative(); +  void conditionalCompilationEnd(); +    bool isOnNewLine(const FormatToken& FormatTok);    // FIXME: We are constantly running into bugs where Line.Level is incorrectly diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 382276abb60..fb33bd685be 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -8402,5 +8402,97 @@ TEST_F(FormatTest, HandleUnbalancedImplicitBracesAcrossPPBranches) {    EXPECT_EQ(code, format(code));  } +TEST_F(FormatTest, HandleConflictMarkers) { +  // Git/SVN conflict markers. +  EXPECT_EQ("int a;\n" +            "void f() {\n" +            "  callme(some(parameter1,\n" +            "<<<<<<< text by the vcs\n" +            "              parameter2),\n" +            "||||||| text by the vcs\n" +            "              parameter2),\n" +            "         parameter3,\n" +            "======= text by the vcs\n" +            "              parameter2, parameter3),\n" +            ">>>>>>> text by the vcs\n" +            "         otherparameter);\n", +            format("int a;\n" +                   "void f() {\n" +                   "  callme(some(parameter1,\n" +                   "<<<<<<< text by the vcs\n" +                   "  parameter2),\n" +                   "||||||| text by the vcs\n" +                   "  parameter2),\n" +                   "  parameter3,\n" +                   "======= text by the vcs\n" +                   "  parameter2,\n" +                   "  parameter3),\n" +                   ">>>>>>> text by the vcs\n" +                   "  otherparameter);\n")); + +  // Perforce markers. +  EXPECT_EQ("void f() {\n" +            "  function(\n" +            ">>>> text by the vcs\n" +            "      parameter,\n" +            "==== text by the vcs\n" +            "      parameter,\n" +            "==== text by the vcs\n" +            "      parameter,\n" +            "<<<< text by the vcs\n" +            "      parameter);\n", +            format("void f() {\n" +                   "  function(\n" +                   ">>>> text by the vcs\n" +                   "  parameter,\n" +                   "==== text by the vcs\n" +                   "  parameter,\n" +                   "==== text by the vcs\n" +                   "  parameter,\n" +                   "<<<< text by the vcs\n" +                   "  parameter);\n")); + +  EXPECT_EQ("<<<<<<<\n" +            "|||||||\n" +            "=======\n" +            ">>>>>>>", +            format("<<<<<<<\n" +                   "|||||||\n" +                   "=======\n" +                   ">>>>>>>")); + +  EXPECT_EQ("<<<<<<<\n" +            "|||||||\n" +            "int i;\n" +            "=======\n" +            ">>>>>>>", +            format("<<<<<<<\n" +                   "|||||||\n" +                   "int i;\n" +                   "=======\n" +                   ">>>>>>>")); + +  // FIXME: Handle parsing of macros around conflict markers correctly: +  EXPECT_EQ("#define Macro \\\n" +            "<<<<<<<\n" +            "Something \\\n" +            "|||||||\n" +            "Else \\\n" +            "=======\n" +            "Other \\\n" +            ">>>>>>>\n" +            "End int i;\n", +            format("#define Macro \\\n" +                   "<<<<<<<\n" +                   "  Something \\\n" +                   "|||||||\n" +                   "  Else \\\n" +                   "=======\n" +                   "  Other \\\n" +                   ">>>>>>>\n" +                   "  End\n" +                   "int i;\n")); +} +  } // end namespace tooling  } // end namespace clang  | 

