summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRui Ueyama <ruiu@google.com>2017-05-24 19:22:34 +0000
committerRui Ueyama <ruiu@google.com>2017-05-24 19:22:34 +0000
commitf04c04837c88068345f5141ebe6720ce3ae3d559 (patch)
tree6d605b03435b00558e020a3437d25a68bf0987ac
parent46fe6d47cca5cb7fd47186902612be0984c9a4c3 (diff)
downloadbcm5719-llvm-f04c04837c88068345f5141ebe6720ce3ae3d559.tar.gz
bcm5719-llvm-f04c04837c88068345f5141ebe6720ce3ae3d559.zip
Improve parallelism of ICF.
This is the only place we use threads for ICF. The intention of this code was to split an input vector into 256 shards and process them in parallel. What the code was actually doing was to split an input into 257 shards, process the first 256 shards in parallel, and the remaining one in serial. That means this code takes ceil(256/n)+1 instead of ceil(256/n) where n is the number of available CPU cores. The former converges to 2 while the latter converges to 1. This patches fixes the above issue. llvm-svn: 303797
-rw-r--r--lld/COFF/ICF.cpp4
-rw-r--r--lld/ELF/ICF.cpp4
2 files changed, 4 insertions, 4 deletions
diff --git a/lld/COFF/ICF.cpp b/lld/COFF/ICF.cpp
index 3b7cc424f0a..aa080958fe1 100644
--- a/lld/COFF/ICF.cpp
+++ b/lld/COFF/ICF.cpp
@@ -193,9 +193,9 @@ void ICF::forEachClass(std::function<void(size_t, size_t)> Fn) {
size_t NumShards = 256;
size_t Step = Chunks.size() / NumShards;
for_each_n(parallel::par, size_t(0), NumShards, [&](size_t I) {
- forEachClassRange(I * Step, (I + 1) * Step, Fn);
+ size_t End = (I == NumShards - 1) ? Chunks.size() : (I + 1) * Step;
+ forEachClassRange(I * Step, End, Fn);
});
- forEachClassRange(Step * NumShards, Chunks.size(), Fn);
}
// Merge identical COMDAT sections.
diff --git a/lld/ELF/ICF.cpp b/lld/ELF/ICF.cpp
index 3722d4e3ed2..419ae681632 100644
--- a/lld/ELF/ICF.cpp
+++ b/lld/ELF/ICF.cpp
@@ -326,9 +326,9 @@ void ICF<ELFT>::forEachClass(std::function<void(size_t, size_t)> Fn) {
size_t NumShards = 256;
size_t Step = Sections.size() / NumShards;
parallelForEachN(0, NumShards, [&](size_t I) {
- forEachClassRange(I * Step, (I + 1) * Step, Fn);
+ size_t End = (I == NumShards - 1) ? Sections.size() : (I + 1) * Step;
+ forEachClassRange(I * Step, End, Fn);
});
- forEachClassRange(Step * NumShards, Sections.size(), Fn);
++Cnt;
}
OpenPOWER on IntegriCloud