ThinLTO: sort inputs and schedule by decreasing size

This is a compile time optimization: keeping a large file to process at the end hurts parallelism. The heurisitic used right now is the input buffer size, however we may want to consider the number of functions to import or the different number of files to load for importing as well. From: Mehdi Amini <mehdi.amini@apple.com> llvm-svn: 269684
author: Mehdi Amini <mehdi.amini@apple.com> 2016-05-16 19:33:07 +0000
committer: Mehdi Amini <mehdi.amini@apple.com> 2016-05-16 19:33:07 +0000
commit: 819e9cdfb44174f6b7c5dd520b72616c91e445a6 (patch)
tree: c5ded2030b4d2b9f1369df292c19c5971ae9ada2 /llvm/lib/LTO/ThinLTOCodeGenerator.cpp
parent: 4817a7577c1401be92363813d4a00d5a711ce8ab (diff)
download: bcm5719-llvm-819e9cdfb44174f6b7c5dd520b72616c91e445a6.tar.gz
bcm5719-llvm-819e9cdfb44174f6b7c5dd520b72616c91e445a6.zip
1 files changed, 18 insertions, 4 deletions
diff --git a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
index a91cf4f8d4a..83eb14c1faf 100644
--- a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
+++ b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
@@ -52,6 +52,8 @@
 #include "llvm/Transforms/ObjCARC.h"
 #include "llvm/Transforms/Utils/FunctionImportUtils.h"
 
+#include <numeric>
+
 using namespace llvm;
 
 #define DEBUG_TYPE "thinlto"
@@ -898,11 +900,24 @@ void ThinLTOCodeGenerator::run() {
   for (auto &DefinedGVSummaries : ModuleToDefinedGVSummaries)
     ExportLists[DefinedGVSummaries.first()];
 
+  // Compute the ordering we will process the inputs: the rough heuristic here
+  // is to sort them per size so that the largest module get schedule as soon as
+  // possible. This is purely a compile-time optimization.
+  std::vector<int> ModulesOrdering;
+  ModulesOrdering.resize(Modules.size());
+  std::iota(ModulesOrdering.begin(), ModulesOrdering.end(), 0);
+  std::sort(ModulesOrdering.begin(), ModulesOrdering.end(),
+            [&](int LeftIndex, int RightIndex) {
+              auto LSize = Modules[LeftIndex].getBufferSize();
+              auto RSize = Modules[RightIndex].getBufferSize();
+              return LSize > RSize;
+            });
+
   // Parallel optimizer + codegen
   {
     ThreadPool Pool(ThreadCount);
-    int count = 0;
-    for (auto &ModuleBuffer : Modules) {
+    for (auto IndexCount : ModulesOrdering) {
+      auto &ModuleBuffer = Modules[IndexCount];
       Pool.async([&](int count) {
         auto ModuleIdentifier = ModuleBuffer.getBufferIdentifier();
         auto &ExportList = ExportLists[ModuleIdentifier];
@@ -954,8 +969,7 @@ void ThinLTOCodeGenerator::run() {
 
         OutputBuffer = CacheEntry.write(std::move(OutputBuffer));
         ProducedBinaries[count] = std::move(OutputBuffer);
-      }, count);
-      count++;
+      }, IndexCount);
     }
   }
author	Mehdi Amini <mehdi.amini@apple.com>	2016-05-16 19:33:07 +0000
committer	Mehdi Amini <mehdi.amini@apple.com>	2016-05-16 19:33:07 +0000
commit	819e9cdfb44174f6b7c5dd520b72616c91e445a6 (patch)
tree	c5ded2030b4d2b9f1369df292c19c5971ae9ada2 /llvm/lib/LTO/ThinLTOCodeGenerator.cpp
parent	4817a7577c1401be92363813d4a00d5a711ce8ab (diff)
download	bcm5719-llvm-819e9cdfb44174f6b7c5dd520b72616c91e445a6.tar.gz bcm5719-llvm-819e9cdfb44174f6b7c5dd520b72616c91e445a6.zip