1 files changed, 18 insertions, 4 deletions
diff --git a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
index a91cf4f8d4a..83eb14c1faf 100644
--- a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
+++ b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
@@ -52,6 +52,8 @@
 #include "llvm/Transforms/ObjCARC.h"
 #include "llvm/Transforms/Utils/FunctionImportUtils.h"
 
+#include <numeric>
+
 using namespace llvm;
 
 #define DEBUG_TYPE "thinlto"
@@ -898,11 +900,24 @@ void ThinLTOCodeGenerator::run() {
   for (auto &DefinedGVSummaries : ModuleToDefinedGVSummaries)
     ExportLists[DefinedGVSummaries.first()];
 
+  // Compute the ordering we will process the inputs: the rough heuristic here
+  // is to sort them per size so that the largest module get schedule as soon as
+  // possible. This is purely a compile-time optimization.
+  std::vector<int> ModulesOrdering;
+  ModulesOrdering.resize(Modules.size());
+  std::iota(ModulesOrdering.begin(), ModulesOrdering.end(), 0);
+  std::sort(ModulesOrdering.begin(), ModulesOrdering.end(),
+            [&](int LeftIndex, int RightIndex) {
+              auto LSize = Modules[LeftIndex].getBufferSize();
+              auto RSize = Modules[RightIndex].getBufferSize();
+              return LSize > RSize;
+            });
+
   // Parallel optimizer + codegen
   {
     ThreadPool Pool(ThreadCount);
-    int count = 0;
-    for (auto &ModuleBuffer : Modules) {
+    for (auto IndexCount : ModulesOrdering) {
+      auto &ModuleBuffer = Modules[IndexCount];
       Pool.async([&](int count) {
         auto ModuleIdentifier = ModuleBuffer.getBufferIdentifier();
         auto &ExportList = ExportLists[ModuleIdentifier];
@@ -954,8 +969,7 @@ void ThinLTOCodeGenerator::run() {
 
         OutputBuffer = CacheEntry.write(std::move(OutputBuffer));
         ProducedBinaries[count] = std::move(OutputBuffer);
-      }, count);
-      count++;
+      }, IndexCount);
     }
   }