From 6ec880d9b58177fe3001dcd92ad6830333fd6793 Mon Sep 17 00:00:00 2001 From: Rui Ueyama Date: Wed, 27 Dec 2017 08:59:52 +0000 Subject: Improve performance TokenizeWindowsCommandLine Patcy by Takuto Ikuta. This patch reduces lld link time of chromium's blink_core.dll in component build. Total size of input argument in .directives become nearly 300MB in the build and calling many strchr and assert becomes bottleneck. On my desktop machine, 4 times stats of the link time are like below. Improved around 10%. This patch TotalSeconds : 13.4918885 TotalSeconds : 13.9474257 TotalSeconds : 13.4941082 TotalSeconds : 13.6077962 Avg : 13.63530465 master TotalSeconds : 15.6938531 TotalSeconds : 15.7022508 TotalSeconds : 15.9567202 TotalSeconds : 14.5851505 Avg : 15.48449365 Differential Revision: https://reviews.llvm.org/D41590 llvm-svn: 321479 --- llvm/lib/Support/CommandLine.cpp | 46 +++++++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 20 deletions(-) (limited to 'llvm/lib/Support/CommandLine.cpp') diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp index b547a093270..4caf4a4fdce 100644 --- a/llvm/lib/Support/CommandLine.cpp +++ b/llvm/lib/Support/CommandLine.cpp @@ -688,7 +688,9 @@ static bool EatsUnboundedNumberOfValues(const Option *O) { O->getNumOccurrencesFlag() == cl::OneOrMore; } -static bool isWhitespace(char C) { return strchr(" \t\n\r\f\v", C); } +static bool isWhitespace(char C) { + return C == ' ' || C == '\t' || C == '\r' || C == '\n'; +} static bool isQuote(char C) { return C == '\"' || C == '\''; } @@ -709,17 +711,19 @@ void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver, break; } + char C = Src[I]; + // Backslash escapes the next character. - if (I + 1 < E && Src[I] == '\\') { + if (I + 1 < E && C == '\\') { ++I; // Skip the escape. Token.push_back(Src[I]); continue; } // Consume a quoted string. - if (isQuote(Src[I])) { - char Quote = Src[I++]; - while (I != E && Src[I] != Quote) { + if (isQuote(C)) { + ++I; + while (I != E && Src[I] != C) { // Backslash escapes the next character. if (Src[I] == '\\' && I + 1 != E) ++I; @@ -732,7 +736,7 @@ void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver, } // End the token if this is whitespace. - if (isWhitespace(Src[I])) { + if (isWhitespace(C)) { if (!Token.empty()) NewArgv.push_back(Saver.save(StringRef(Token)).data()); Token.clear(); @@ -740,7 +744,7 @@ void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver, } // This is a normal character. Append it. - Token.push_back(Src[I]); + Token.push_back(C); } // Append the last token after hitting EOF with no whitespace. @@ -798,25 +802,27 @@ void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver, // end of the source string. enum { INIT, UNQUOTED, QUOTED } State = INIT; for (size_t I = 0, E = Src.size(); I != E; ++I) { + char C = Src[I]; + // INIT state indicates that the current input index is at the start of // the string or between tokens. if (State == INIT) { - if (isWhitespace(Src[I])) { + if (isWhitespace(C)) { // Mark the end of lines in response files - if (MarkEOLs && Src[I] == '\n') + if (MarkEOLs && C == '\n') NewArgv.push_back(nullptr); continue; } - if (Src[I] == '"') { + if (C == '"') { State = QUOTED; continue; } - if (Src[I] == '\\') { + if (C == '\\') { I = parseBackslash(Src, I, Token); State = UNQUOTED; continue; } - Token.push_back(Src[I]); + Token.push_back(C); State = UNQUOTED; continue; } @@ -825,38 +831,38 @@ void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver, // quotes. if (State == UNQUOTED) { // Whitespace means the end of the token. - if (isWhitespace(Src[I])) { + if (isWhitespace(C)) { NewArgv.push_back(Saver.save(StringRef(Token)).data()); Token.clear(); State = INIT; // Mark the end of lines in response files - if (MarkEOLs && Src[I] == '\n') + if (MarkEOLs && C == '\n') NewArgv.push_back(nullptr); continue; } - if (Src[I] == '"') { + if (C == '"') { State = QUOTED; continue; } - if (Src[I] == '\\') { + if (C == '\\') { I = parseBackslash(Src, I, Token); continue; } - Token.push_back(Src[I]); + Token.push_back(C); continue; } // QUOTED state means that it's reading a token quoted by double quotes. if (State == QUOTED) { - if (Src[I] == '"') { + if (C == '"') { State = UNQUOTED; continue; } - if (Src[I] == '\\') { + if (C == '\\') { I = parseBackslash(Src, I, Token); continue; } - Token.push_back(Src[I]); + Token.push_back(C); } } // Append the last token after hitting EOF with no whitespace. -- cgit v1.2.3