Improve performance TokenizeWindowsCommandLine

Patcy by Takuto Ikuta. This patch reduces lld link time of chromium's blink_core.dll in component build. Total size of input argument in .directives become nearly 300MB in the build and calling many strchr and assert becomes bottleneck. On my desktop machine, 4 times stats of the link time are like below. Improved around 10%. This patch TotalSeconds : 13.4918885 TotalSeconds : 13.9474257 TotalSeconds : 13.4941082 TotalSeconds : 13.6077962 Avg : 13.63530465 master TotalSeconds : 15.6938531 TotalSeconds : 15.7022508 TotalSeconds : 15.9567202 TotalSeconds : 14.5851505 Avg : 15.48449365 Differential Revision: https://reviews.llvm.org/D41590 llvm-svn: 321479
author: Rui Ueyama <ruiu@google.com> 2017-12-27 08:59:52 +0000
committer: Rui Ueyama <ruiu@google.com> 2017-12-27 08:59:52 +0000
commit: 6ec880d9b58177fe3001dcd92ad6830333fd6793 (patch)
tree: 784f7d3c42f7cc57fd72c2a75eedc9e7264e528b /llvm/lib/Support/CommandLine.cpp
parent: fb2fd20f508ead839d2fc43994cacd549f53c21c (diff)
download: bcm5719-llvm-6ec880d9b58177fe3001dcd92ad6830333fd6793.tar.gz
bcm5719-llvm-6ec880d9b58177fe3001dcd92ad6830333fd6793.zip
1 files changed, 26 insertions, 20 deletions
diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp
index b547a093270..4caf4a4fdce 100644
--- a/llvm/lib/Support/CommandLine.cpp
+++ b/llvm/lib/Support/CommandLine.cpp
@@ -688,7 +688,9 @@ static bool EatsUnboundedNumberOfValues(const Option *O) {
          O->getNumOccurrencesFlag() == cl::OneOrMore;
 }
 
-static bool isWhitespace(char C) { return strchr(" \t\n\r\f\v", C); }
+static bool isWhitespace(char C) {
+  return C == ' ' || C == '\t' || C == '\r' || C == '\n';
+}
 
 static bool isQuote(char C) { return C == '\"' || C == '\''; }
 
@@ -709,17 +711,19 @@ void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver,
         break;
     }
 
+    char C = Src[I];
+
     // Backslash escapes the next character.
-    if (I + 1 < E && Src[I] == '\\') {
+    if (I + 1 < E && C == '\\') {
       ++I; // Skip the escape.
       Token.push_back(Src[I]);
       continue;
     }
 
     // Consume a quoted string.
-    if (isQuote(Src[I])) {
-      char Quote = Src[I++];
-      while (I != E && Src[I] != Quote) {
+    if (isQuote(C)) {
+      ++I;
+      while (I != E && Src[I] != C) {
         // Backslash escapes the next character.
         if (Src[I] == '\\' && I + 1 != E)
           ++I;
@@ -732,7 +736,7 @@ void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver,
     }
 
     // End the token if this is whitespace.
-    if (isWhitespace(Src[I])) {
+    if (isWhitespace(C)) {
       if (!Token.empty())
         NewArgv.push_back(Saver.save(StringRef(Token)).data());
       Token.clear();
@@ -740,7 +744,7 @@ void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver,
     }
 
     // This is a normal character.  Append it.
-    Token.push_back(Src[I]);
+    Token.push_back(C);
   }
 
   // Append the last token after hitting EOF with no whitespace.
@@ -798,25 +802,27 @@ void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver,
   // end of the source string.
   enum { INIT, UNQUOTED, QUOTED } State = INIT;
   for (size_t I = 0, E = Src.size(); I != E; ++I) {
+    char C = Src[I];
+
     // INIT state indicates that the current input index is at the start of
     // the string or between tokens.
     if (State == INIT) {
-      if (isWhitespace(Src[I])) {
+      if (isWhitespace(C)) {
         // Mark the end of lines in response files
-        if (MarkEOLs && Src[I] == '\n')
+        if (MarkEOLs && C == '\n')
           NewArgv.push_back(nullptr);
         continue;
       }
-      if (Src[I] == '"') {
+      if (C == '"') {
         State = QUOTED;
         continue;
       }
-      if (Src[I] == '\\') {
+      if (C == '\\') {
         I = parseBackslash(Src, I, Token);
         State = UNQUOTED;
         continue;
       }
-      Token.push_back(Src[I]);
+      Token.push_back(C);
       State = UNQUOTED;
       continue;
     }
@@ -825,38 +831,38 @@ void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver,
     // quotes.
     if (State == UNQUOTED) {
       // Whitespace means the end of the token.
-      if (isWhitespace(Src[I])) {
+      if (isWhitespace(C)) {
         NewArgv.push_back(Saver.save(StringRef(Token)).data());
         Token.clear();
         State = INIT;
         // Mark the end of lines in response files
-        if (MarkEOLs && Src[I] == '\n')
+        if (MarkEOLs && C == '\n')
           NewArgv.push_back(nullptr);
         continue;
       }
-      if (Src[I] == '"') {
+      if (C == '"') {
         State = QUOTED;
         continue;
       }
-      if (Src[I] == '\\') {
+      if (C == '\\') {
         I = parseBackslash(Src, I, Token);
         continue;
       }
-      Token.push_back(Src[I]);
+      Token.push_back(C);
       continue;
     }
 
     // QUOTED state means that it's reading a token quoted by double quotes.
     if (State == QUOTED) {
-      if (Src[I] == '"') {
+      if (C == '"') {
         State = UNQUOTED;
         continue;
       }
-      if (Src[I] == '\\') {
+      if (C == '\\') {
         I = parseBackslash(Src, I, Token);
         continue;
       }
-      Token.push_back(Src[I]);
+      Token.push_back(C);
     }
   }
   // Append the last token after hitting EOF with no whitespace.
author	Rui Ueyama <ruiu@google.com>	2017-12-27 08:59:52 +0000
committer	Rui Ueyama <ruiu@google.com>	2017-12-27 08:59:52 +0000
commit	6ec880d9b58177fe3001dcd92ad6830333fd6793 (patch)
tree	784f7d3c42f7cc57fd72c2a75eedc9e7264e528b /llvm/lib/Support/CommandLine.cpp
parent	fb2fd20f508ead839d2fc43994cacd549f53c21c (diff)
download	bcm5719-llvm-6ec880d9b58177fe3001dcd92ad6830333fd6793.tar.gz bcm5719-llvm-6ec880d9b58177fe3001dcd92ad6830333fd6793.zip