7 files changed, 115 insertions, 76 deletions
diff --git a/lld/COFF/ICF.cpp b/lld/COFF/ICF.cpp
index 9a43f2bd43f..e3a7d27c39b 100644
--- a/lld/COFF/ICF.cpp
+++ b/lld/COFF/ICF.cpp
@@ -192,7 +192,7 @@ void ICF::forEachClass(std::function<void(size_t, size_t)> Fn) {
   // Split sections into 256 shards and call Fn in parallel.
   size_t NumShards = 256;
   size_t Step = Chunks.size() / NumShards;
-  parallel_for(size_t(0), NumShards, [&](size_t I) {
+  for_each_n(parallel::par, size_t(0), NumShards, [&](size_t I) {
     forEachClassRange(I * Step, (I + 1) * Step, Fn);
   });
   forEachClassRange(Step * NumShards, Chunks.size(), Fn);
diff --git a/lld/COFF/MapFile.cpp b/lld/COFF/MapFile.cpp
index 4e596e602fe..7df88f38879 100644
--- a/lld/COFF/MapFile.cpp
+++ b/lld/COFF/MapFile.cpp
@@ -76,7 +76,7 @@ static SymbolMapTy getSectionSyms(ArrayRef<DefinedRegular *> Syms) {
 static DenseMap<DefinedRegular *, std::string>
 getSymbolStrings(ArrayRef<DefinedRegular *> Syms) {
   std::vector<std::string> Str(Syms.size());
-  parallel_for((size_t)0, Syms.size(), [&](size_t I) {
+  for_each_n(parallel::par, (size_t)0, Syms.size(), [&](size_t I) {
     raw_string_ostream OS(Str[I]);
     writeHeader(OS, Syms[I]->getRVA(), 0, 0);
     OS << indent(2) << toString(*Syms[I]);
diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp
index 8762b88c4d6..d61d87172f4 100644
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp
@@ -745,8 +745,8 @@ void Writer::writeSections() {
     // ADD instructions).
     if (Sec->getPermissions() & IMAGE_SCN_CNT_CODE)
       memset(SecBuf, 0xCC, Sec->getRawSize());
-    parallel_for_each(Sec->getChunks().begin(), Sec->getChunks().end(),
-                      [&](Chunk *C) { C->writeTo(SecBuf); });
+    for_each(parallel::par, Sec->getChunks().begin(), Sec->getChunks().end(),
+             [&](Chunk *C) { C->writeTo(SecBuf); });
   }
 }
 
@@ -760,16 +760,14 @@ void Writer::sortExceptionTable() {
   uint8_t *End = Begin + Sec->getVirtualSize();
   if (Config->Machine == AMD64) {
     struct Entry { ulittle32_t Begin, End, Unwind; };
-    parallel_sort(
-        (Entry *)Begin, (Entry *)End,
-        [](const Entry &A, const Entry &B) { return A.Begin < B.Begin; });
+    sort(parallel::par, (Entry *)Begin, (Entry *)End,
+         [](const Entry &A, const Entry &B) { return A.Begin < B.Begin; });
     return;
   }
   if (Config->Machine == ARMNT) {
     struct Entry { ulittle32_t Begin, Unwind; };
-    parallel_sort(
-        (Entry *)Begin, (Entry *)End,
-        [](const Entry &A, const Entry &B) { return A.Begin < B.Begin; });
+    sort(parallel::par, (Entry *)Begin, (Entry *)End,
+         [](const Entry &A, const Entry &B) { return A.Begin < B.Begin; });
     return;
   }
   errs() << "warning: don't know how to handle .pdata.\n";
diff --git a/lld/ELF/Threads.h b/lld/ELF/Threads.h
index 897432e69f8..e6f680cef3b 100644
--- a/lld/ELF/Threads.h
+++ b/lld/ELF/Threads.h
@@ -71,19 +71,17 @@ namespace elf {
 template <class IterTy, class FuncTy>
 void parallelForEach(IterTy Begin, IterTy End, FuncTy Fn) {
   if (Config->Threads)
-    parallel_for_each(Begin, End, Fn);
+    for_each(parallel::par, Begin, End, Fn);
   else
-    std::for_each(Begin, End, Fn);
+    for_each(parallel::seq, Begin, End, Fn);
 }
 
 inline void parallelFor(size_t Begin, size_t End,
                         std::function<void(size_t)> Fn) {
-  if (Config->Threads) {
-    parallel_for(Begin, End, Fn);
-  } else {
-    for (size_t I = Begin; I < End; ++I)
-      Fn(I);
-  }
+  if (Config->Threads)
+    for_each_n(parallel::par, Begin, End, Fn);
+  else
+    for_each_n(parallel::seq, Begin, End, Fn);
 }
 }
 }
diff --git a/lld/include/lld/Core/Parallel.h b/lld/include/lld/Core/Parallel.h
index 58fa87e85c5..a514b2ec446 100644
--- a/lld/include/lld/Core/Parallel.h
+++ b/lld/include/lld/Core/Parallel.h
@@ -12,8 +12,9 @@
 
 #include "lld/Core/LLVM.h"
 #include "lld/Core/TaskGroup.h"
-#include "llvm/Support/MathExtras.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Config/llvm-config.h"
+#include "llvm/Support/MathExtras.h"
 
 #include <algorithm>
 
@@ -24,25 +25,40 @@
 
 namespace lld {
 
-#if !LLVM_ENABLE_THREADS
-template <class RandomAccessIterator, class Comparator>
-void parallel_sort(
-    RandomAccessIterator Start, RandomAccessIterator End,
-    const Comparator &Comp = std::less<
-        typename std::iterator_traits<RandomAccessIterator>::value_type>()) {
-  std::sort(Start, End, Comp);
-}
-#elif defined(_MSC_VER)
-// Use ppl parallel_sort on Windows.
+namespace parallel {
+struct sequential_execution_policy {};
+struct parallel_execution_policy {};
+
+template <typename T>
+struct is_execution_policy
+    : public std::integral_constant<
+          bool, llvm::is_one_of<T, sequential_execution_policy,
+                                parallel_execution_policy>::value> {};
+
+constexpr sequential_execution_policy seq{};
+constexpr parallel_execution_policy par{};
+
+#if LLVM_ENABLE_THREADS
+
+namespace detail {
+
+#if defined(_MSC_VER)
 template <class RandomAccessIterator, class Comparator>
-void parallel_sort(
-    RandomAccessIterator Start, RandomAccessIterator End,
-    const Comparator &Comp = std::less<
-        typename std::iterator_traits<RandomAccessIterator>::value_type>()) {
+void parallel_sort(RandomAccessIterator Start, RandomAccessIterator End,
+                   const Comparator &Comp) {
   concurrency::parallel_sort(Start, End, Comp);
 }
+template <class IterTy, class FuncTy>
+void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) {
+  concurrency::parallel_for_each(Begin, End, Fn);
+}
+
+template <class IndexTy, class FuncTy>
+void parallel_for_each_n(IndexTy Begin, IndexTy End, FuncTy Fn) {
+  concurrency::parallel_for(Begin, End, Fn);
+}
+
 #else
-namespace detail {
 const ptrdiff_t MinParallelSize = 1024;
 
 /// \brief Inclusive median.
@@ -83,46 +99,15 @@ void parallel_quick_sort(RandomAccessIterator Start, RandomAccessIterator End,
   });
   parallel_quick_sort(Pivot + 1, End, Comp, TG, Depth - 1);
 }
-}
 
 template <class RandomAccessIterator, class Comparator>
-void parallel_sort(
-    RandomAccessIterator Start, RandomAccessIterator End,
-    const Comparator &Comp = std::less<
-        typename std::iterator_traits<RandomAccessIterator>::value_type>()) {
+void parallel_sort(RandomAccessIterator Start, RandomAccessIterator End,
+                   const Comparator &Comp) {
   TaskGroup TG;
-  detail::parallel_quick_sort(Start, End, Comp, TG,
-                              llvm::Log2_64(std::distance(Start, End)) + 1);
-}
-#endif
-
-template <class T> void parallel_sort(T *Start, T *End) {
-  parallel_sort(Start, End, std::less<T>());
+  parallel_quick_sort(Start, End, Comp, TG,
+                      llvm::Log2_64(std::distance(Start, End)) + 1);
 }
 
-#if !LLVM_ENABLE_THREADS
-template <class IterTy, class FuncTy>
-void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) {
-  std::for_each(Begin, End, Fn);
-}
-
-template <class IndexTy, class FuncTy>
-void parallel_for(IndexTy Begin, IndexTy End, FuncTy Fn) {
-  for (IndexTy I = Begin; I != End; ++I)
-    Fn(I);
-}
-#elif defined(_MSC_VER)
-// Use ppl parallel_for_each on Windows.
-template <class IterTy, class FuncTy>
-void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) {
-  concurrency::parallel_for_each(Begin, End, Fn);
-}
-
-template <class IndexTy, class FuncTy>
-void parallel_for(IndexTy Begin, IndexTy End, FuncTy Fn) {
-  concurrency::parallel_for(Begin, End, Fn);
-}
-#else
 template <class IterTy, class FuncTy>
 void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) {
   // TaskGroup has a relatively high overhead, so we want to reduce
@@ -142,7 +127,7 @@ void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) {
 }
 
 template <class IndexTy, class FuncTy>
-void parallel_for(IndexTy Begin, IndexTy End, FuncTy Fn) {
+void parallel_for_each_n(IndexTy Begin, IndexTy End, FuncTy Fn) {
   ptrdiff_t TaskSize = (End - Begin) / 1024;
   if (TaskSize == 0)
     TaskSize = 1;
@@ -160,7 +145,65 @@ void parallel_for(IndexTy Begin, IndexTy End, FuncTy Fn) {
       Fn(J);
   });
 }
+
+#endif
+
+template <typename Iter>
+using DefComparator =
+    std::less<typename std::iterator_traits<Iter>::value_type>;
+
+} // namespace detail
 #endif
+
+// sequential algorithm implementations.
+template <class Policy, class RandomAccessIterator,
+          class Comparator = detail::DefComparator<RandomAccessIterator>>
+void sort(Policy policy, RandomAccessIterator Start, RandomAccessIterator End,
+          const Comparator &Comp = Comparator()) {
+  static_assert(is_execution_policy<Policy>::value,
+                "Invalid execution policy!");
+  std::sort(Start, End, Comp);
+}
+
+template <class Policy, class IterTy, class FuncTy>
+void for_each(Policy policy, IterTy Begin, IterTy End, FuncTy Fn) {
+  static_assert(is_execution_policy<Policy>::value,
+                "Invalid execution policy!");
+  std::for_each(Begin, End, Fn);
+}
+
+template <class Policy, class IndexTy, class FuncTy>
+void for_each_n(Policy policy, IndexTy Begin, IndexTy End, FuncTy Fn) {
+  static_assert(is_execution_policy<Policy>::value,
+                "Invalid execution policy!");
+  for (IndexTy I = Begin; I != End; ++I)
+    Fn(I);
+}
+
+// Parallel algorithm implementations, only available when LLVM_ENABLE_THREADS
+// is true.
+#if defined(LLVM_ENABLE_THREADS)
+template <class RandomAccessIterator,
+          class Comparator = detail::DefComparator<RandomAccessIterator>>
+void sort(parallel_execution_policy policy, RandomAccessIterator Start,
+          RandomAccessIterator End, const Comparator &Comp = Comparator()) {
+  detail::parallel_sort(Start, End, Comp);
+}
+
+template <class IterTy, class FuncTy>
+void for_each(parallel_execution_policy policy, IterTy Begin, IterTy End,
+              FuncTy Fn) {
+  detail::parallel_for_each(Begin, End, Fn);
+}
+
+template <class IndexTy, class FuncTy>
+void for_each_n(parallel_execution_policy policy, IndexTy Begin, IndexTy End,
+                FuncTy Fn) {
+  detail::parallel_for_each_n(Begin, End, Fn);
+}
+#endif
+
+} // namespace parallel
 } // End namespace lld
 
 #endif // LLD_CORE_PARALLEL_H
diff --git a/lld/lib/ReaderWriter/MachO/LayoutPass.cpp b/lld/lib/ReaderWriter/MachO/LayoutPass.cpp
index 24dbf79d3e3..2b5a46cc98f 100644
--- a/lld/lib/ReaderWriter/MachO/LayoutPass.cpp
+++ b/lld/lib/ReaderWriter/MachO/LayoutPass.cpp
@@ -461,10 +461,10 @@ llvm::Error LayoutPass::perform(SimpleFile &mergedFile) {
   });
 
   std::vector<LayoutPass::SortKey> vec = decorate(atomRange);
-  parallel_sort(vec.begin(), vec.end(),
-      [&](const LayoutPass::SortKey &l, const LayoutPass::SortKey &r) -> bool {
-        return compareAtoms(l, r, _customSorter);
-      });
+  sort(parallel::par, vec.begin(), vec.end(),
+       [&](const LayoutPass::SortKey &l, const LayoutPass::SortKey &r) -> bool {
+         return compareAtoms(l, r, _customSorter);
+       });
   DEBUG(checkTransitivity(vec, _customSorter));
   undecorate(atomRange, vec);
 
diff --git a/lld/unittests/CoreTests/ParallelTest.cpp b/lld/unittests/CoreTests/ParallelTest.cpp
index bd8507026a0..601a2b0839b 100644
--- a/lld/unittests/CoreTests/ParallelTest.cpp
+++ b/lld/unittests/CoreTests/ParallelTest.cpp
@@ -26,7 +26,7 @@ TEST(Parallel, sort) {
   for (auto &i : array)
     i = dist(randEngine);
 
-  lld::parallel_sort(std::begin(array), std::end(array));
+  sort(lld::parallel::par, std::begin(array), std::end(array));
   ASSERT_TRUE(std::is_sorted(std::begin(array), std::end(array)));
 }
 
@@ -36,7 +36,7 @@ TEST(Parallel, parallel_for) {
   // writing.
   uint32_t range[2050];
   std::fill(range, range + 2050, 1);
-  lld::parallel_for(0, 2049, [&range](size_t I) { ++range[I]; });
+  for_each_n(lld::parallel::par, 0, 2049, [&range](size_t I) { ++range[I]; });
 
   uint32_t expected[2049];
   std::fill(expected, expected + 2049, 2);