Improve parallelism of ICF.

This is the only place we use threads for ICF. The intention of this code was to split an input vector into 256 shards and process them in parallel. What the code was actually doing was to split an input into 257 shards, process the first 256 shards in parallel, and the remaining one in serial. That means this code takes ceil(256/n)+1 instead of ceil(256/n) where n is the number of available CPU cores. The former converges to 2 while the latter converges to 1. This patches fixes the above issue. llvm-svn: 303797
author: Rui Ueyama <ruiu@google.com> 2017-05-24 19:22:34 +0000
committer: Rui Ueyama <ruiu@google.com> 2017-05-24 19:22:34 +0000
commit: f04c04837c88068345f5141ebe6720ce3ae3d559 (patch)
tree: 6d605b03435b00558e020a3437d25a68bf0987ac
parent: 46fe6d47cca5cb7fd47186902612be0984c9a4c3 (diff)
download: bcm5719-llvm-f04c04837c88068345f5141ebe6720ce3ae3d559.tar.gz
bcm5719-llvm-f04c04837c88068345f5141ebe6720ce3ae3d559.zip
2 files changed, 4 insertions, 4 deletions
diff --git a/lld/COFF/ICF.cpp b/lld/COFF/ICF.cpp
index 3b7cc424f0a..aa080958fe1 100644
--- a/lld/COFF/ICF.cpp
+++ b/lld/COFF/ICF.cpp
@@ -193,9 +193,9 @@ void ICF::forEachClass(std::function<void(size_t, size_t)> Fn) {
   size_t NumShards = 256;
   size_t Step = Chunks.size() / NumShards;
   for_each_n(parallel::par, size_t(0), NumShards, [&](size_t I) {
-    forEachClassRange(I * Step, (I + 1) * Step, Fn);
+    size_t End = (I == NumShards - 1) ? Chunks.size() : (I + 1) * Step;
+    forEachClassRange(I * Step, End, Fn);
   });
-  forEachClassRange(Step * NumShards, Chunks.size(), Fn);
 }
 
 // Merge identical COMDAT sections.
diff --git a/lld/ELF/ICF.cpp b/lld/ELF/ICF.cpp
index 3722d4e3ed2..419ae681632 100644
--- a/lld/ELF/ICF.cpp
+++ b/lld/ELF/ICF.cpp
@@ -326,9 +326,9 @@ void ICF<ELFT>::forEachClass(std::function<void(size_t, size_t)> Fn) {
   size_t NumShards = 256;
   size_t Step = Sections.size() / NumShards;
   parallelForEachN(0, NumShards, [&](size_t I) {
-    forEachClassRange(I * Step, (I + 1) * Step, Fn);
+    size_t End = (I == NumShards - 1) ? Sections.size() : (I + 1) * Step;
+    forEachClassRange(I * Step, End, Fn);
   });
-  forEachClassRange(Step * NumShards, Sections.size(), Fn);
   ++Cnt;
 }
author	Rui Ueyama <ruiu@google.com>	2017-05-24 19:22:34 +0000
committer	Rui Ueyama <ruiu@google.com>	2017-05-24 19:22:34 +0000
commit	f04c04837c88068345f5141ebe6720ce3ae3d559 (patch)
tree	6d605b03435b00558e020a3437d25a68bf0987ac
parent	46fe6d47cca5cb7fd47186902612be0984c9a4c3 (diff)
download	bcm5719-llvm-f04c04837c88068345f5141ebe6720ce3ae3d559.tar.gz bcm5719-llvm-f04c04837c88068345f5141ebe6720ce3ae3d559.zip