18 files changed, 424 insertions, 78 deletions
diff --git a/libcxx/utils/google-benchmark/src/CMakeLists.txt b/libcxx/utils/google-benchmark/src/CMakeLists.txt
index 40388751779..7707773930c 100644
--- a/libcxx/utils/google-benchmark/src/CMakeLists.txt
+++ b/libcxx/utils/google-benchmark/src/CMakeLists.txt
@@ -21,24 +21,55 @@ set_target_properties(benchmark PROPERTIES
 
 # Link threads.
 target_link_libraries(benchmark  ${BENCHMARK_CXX_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
+find_library(LIBRT rt)
+if(LIBRT)
+  target_link_libraries(benchmark ${LIBRT})
+endif()
 
 # We need extra libraries on Windows
 if(${CMAKE_SYSTEM_NAME} MATCHES "Windows")
   target_link_libraries(benchmark Shlwapi)
 endif()
 
-# Expose public API
-target_include_directories(benchmark PUBLIC ${PROJECT_SOURCE_DIR}/include)
+set(include_install_dir "include")
+set(lib_install_dir "lib/")
+set(bin_install_dir "bin/")
+set(config_install_dir "lib/cmake/${PROJECT_NAME}")
+
+set(generated_dir "${CMAKE_CURRENT_BINARY_DIR}/generated")
+
+set(version_config "${generated_dir}/${PROJECT_NAME}ConfigVersion.cmake")
+set(project_config "${generated_dir}/${PROJECT_NAME}Config.cmake")
+set(targets_export_name "${PROJECT_NAME}Targets")
+
+set(namespace "${PROJECT_NAME}::")
+
+include(CMakePackageConfigHelpers)
+write_basic_package_version_file(
+    "${version_config}" VERSION ${GIT_VERSION} COMPATIBILITY SameMajorVersion
+)
+
+configure_file("${PROJECT_SOURCE_DIR}/cmake/Config.cmake.in" "${project_config}" @ONLY)
 
 # Install target (will install the library to specified CMAKE_INSTALL_PREFIX variable)
 install(
   TARGETS benchmark
-  ARCHIVE DESTINATION lib
-  LIBRARY DESTINATION lib
-  RUNTIME DESTINATION bin
-  COMPONENT library)
+  EXPORT ${targets_export_name}
+  ARCHIVE DESTINATION ${lib_install_dir}
+  LIBRARY DESTINATION ${lib_install_dir}
+  RUNTIME DESTINATION ${bin_install_dir}
+  INCLUDES DESTINATION ${include_install_dir})
 
 install(
   DIRECTORY "${PROJECT_SOURCE_DIR}/include/benchmark"
-  DESTINATION include
+  DESTINATION ${include_install_dir}
   FILES_MATCHING PATTERN "*.*h")
+
+install(
+    FILES "${project_config}" "${version_config}"
+    DESTINATION "${config_install_dir}")
+
+install(
+    EXPORT "${targets_export_name}"
+    NAMESPACE "${namespace}"
+    DESTINATION "${config_install_dir}")
diff --git a/libcxx/utils/google-benchmark/src/benchmark.cc b/libcxx/utils/google-benchmark/src/benchmark.cc
index 95f6a25b756..00ffa07ff56 100644
--- a/libcxx/utils/google-benchmark/src/benchmark.cc
+++ b/libcxx/utils/google-benchmark/src/benchmark.cc
@@ -37,6 +37,7 @@
 #include "colorprint.h"
 #include "commandlineflags.h"
 #include "complexity.h"
+#include "counter.h"
 #include "log.h"
 #include "mutex.h"
 #include "re.h"
@@ -145,6 +146,7 @@ class ThreadManager {
     std::string report_label_;
     std::string error_message_;
     bool has_error_ = false;
+    UserCounters counters;
   };
   GUARDED_BY(GetBenchmarkMutex()) Result results;
 
@@ -249,6 +251,7 @@ BenchmarkReporter::Run CreateRunReport(
     report.complexity_n = results.complexity_n;
     report.complexity = b.complexity;
     report.complexity_lambda = b.complexity_lambda;
+    report.counters = results.counters;
   }
   return report;
 }
@@ -272,6 +275,7 @@ void RunInThread(const benchmark::internal::Benchmark::Instance* b,
     results.bytes_processed += st.bytes_processed();
     results.items_processed += st.items_processed();
     results.complexity_n += st.complexity_length_n();
+    internal::Increment(&results.counters, st.counters);
   }
   manager->NotifyThreadComplete();
 }
@@ -281,7 +285,8 @@ std::vector<BenchmarkReporter::Run> RunBenchmark(
     std::vector<BenchmarkReporter::Run>* complexity_reports) {
   std::vector<BenchmarkReporter::Run> reports;  // return value
 
-  size_t iters = 1;
+  const bool has_explicit_iteration_count = b.iterations != 0;
+  size_t iters = has_explicit_iteration_count ? b.iterations : 1;
   std::unique_ptr<internal::ThreadManager> manager;
   std::vector<std::thread> pool(b.threads - 1);
   const int repeats =
@@ -291,7 +296,7 @@ std::vector<BenchmarkReporter::Run> RunBenchmark(
       (b.report_mode == internal::RM_Unspecified
            ? FLAGS_benchmark_report_aggregates_only
            : b.report_mode == internal::RM_ReportAggregatesOnly);
-  for (int i = 0; i < repeats; i++) {
+  for (int repetition_num = 0; repetition_num < repeats; repetition_num++) {
     for (;;) {
       // Try benchmark
       VLOG(2) << "Running " << b.name << " for " << iters << "\n";
@@ -327,10 +332,20 @@ std::vector<BenchmarkReporter::Run> RunBenchmark(
 
       const double min_time =
           !IsZero(b.min_time) ? b.min_time : FLAGS_benchmark_min_time;
-      // If this was the first run, was elapsed time or cpu time large enough?
-      // If this is not the first run, go with the current value of iter.
-      if ((i > 0) || results.has_error_ || (iters >= kMaxIterations) ||
-          (seconds >= min_time) || (results.real_time_used >= 5 * min_time)) {
+
+      // Determine if this run should be reported; Either it has
+      // run for a sufficient amount of time or because an error was reported.
+      const bool should_report =  repetition_num > 0
+        || has_explicit_iteration_count // An exact iteration count was requested
+        || results.has_error_
+        || iters >= kMaxIterations
+        || seconds >= min_time // the elapsed time is large enough
+        // CPU time is specified but the elapsed real time greatly exceeds the
+        // minimum time. Note that user provided timers are except from this
+        // sanity check.
+        || ((results.real_time_used >= 5 * min_time) && !b.use_manual_time);
+
+      if (should_report) {
         BenchmarkReporter::Run report =
             CreateRunReport(b, results, iters, seconds);
         if (!report.error_occurred && b.complexity != oNone)
@@ -386,6 +401,7 @@ State::State(size_t max_iters, const std::vector<int>& ranges, int thread_i,
       items_processed_(0),
       complexity_n_(0),
       error_occurred_(false),
+      counters(),
       thread_index(thread_i),
       threads(n_threads),
       max_iterations(max_iters),
@@ -634,7 +650,7 @@ void ParseCommandLineFlags(int* argc, char** argv) {
         // TODO: Remove this.
         ParseStringFlag(argv[i], "color_print", &FLAGS_benchmark_color) ||
         ParseInt32Flag(argv[i], "v", &FLAGS_v)) {
-      for (int j = i; j != *argc; ++j) argv[j] = argv[j + 1];
+      for (int j = i; j != *argc - 1; ++j) argv[j] = argv[j + 1];
 
       --(*argc);
       --i;
@@ -664,4 +680,11 @@ void Initialize(int* argc, char** argv) {
   internal::LogLevel() = FLAGS_v;
 }
 
+bool ReportUnrecognizedArguments(int argc, char** argv) {
+  for (int i = 1; i < argc; ++i) {
+    fprintf(stderr, "%s: error: unrecognized command-line flag: %s\n", argv[0], argv[i]);
+  }
+  return argc > 1;
+}
+
 }  // end namespace benchmark
diff --git a/libcxx/utils/google-benchmark/src/benchmark_api_internal.h b/libcxx/utils/google-benchmark/src/benchmark_api_internal.h
index 8b97ce60087..828ed12165f 100644
--- a/libcxx/utils/google-benchmark/src/benchmark_api_internal.h
+++ b/libcxx/utils/google-benchmark/src/benchmark_api_internal.h
@@ -24,9 +24,11 @@ struct Benchmark::Instance {
   bool use_manual_time;
   BigO complexity;
   BigOFunc* complexity_lambda;
+  UserCounters counters;
   bool last_benchmark_instance;
   int repetitions;
   double min_time;
+  size_t iterations;
   int threads;  // Number of concurrent threads to us
 };
 
diff --git a/libcxx/utils/google-benchmark/src/benchmark_register.cc b/libcxx/utils/google-benchmark/src/benchmark_register.cc
index 4e580d8ea38..fe373204189 100644
--- a/libcxx/utils/google-benchmark/src/benchmark_register.cc
+++ b/libcxx/utils/google-benchmark/src/benchmark_register.cc
@@ -143,6 +143,7 @@ bool BenchmarkFamilies::FindBenchmarks(
         instance.time_unit = family->time_unit_;
         instance.range_multiplier = family->range_multiplier_;
         instance.min_time = family->min_time_;
+        instance.iterations = family->iterations_;
         instance.repetitions = family->repetitions_;
         instance.use_real_time = family->use_real_time_;
         instance.use_manual_time = family->use_manual_time_;
@@ -163,16 +164,17 @@ bool BenchmarkFamilies::FindBenchmarks(
             }
           }
 
-          AppendHumanReadable(arg, &instance.name);
+          instance.name += std::to_string(arg);
           ++arg_i;
         }
 
-        if (!IsZero(family->min_time_)) {
+        if (!IsZero(family->min_time_))
           instance.name += StringPrintF("/min_time:%0.3f", family->min_time_);
-        }
-        if (family->repetitions_ != 0) {
+        if (family->iterations_ != 0)
+          instance.name += StringPrintF("/iterations:%d", family->iterations_);
+        if (family->repetitions_ != 0)
           instance.name += StringPrintF("/repeats:%d", family->repetitions_);
-        }
+
         if (family->use_manual_time_) {
           instance.name += "/manual_time";
         } else if (family->use_real_time_) {
@@ -219,6 +221,7 @@ Benchmark::Benchmark(const char* name)
       time_unit_(kNanosecond),
       range_multiplier_(kRangeMultiplier),
       min_time_(0),
+      iterations_(0),
       repetitions_(0),
       use_real_time_(false),
       use_manual_time_(false),
@@ -344,6 +347,22 @@ Benchmark* Benchmark::RangeMultiplier(int multiplier) {
   return this;
 }
 
+
+Benchmark* Benchmark::MinTime(double t) {
+  CHECK(t > 0.0);
+  CHECK(iterations_ == 0);
+  min_time_ = t;
+  return this;
+}
+
+
+Benchmark* Benchmark::Iterations(size_t n) {
+  CHECK(n > 0);
+  CHECK(IsZero(min_time_));
+  iterations_ = n;
+  return this;
+}
+
 Benchmark* Benchmark::Repetitions(int n) {
   CHECK(n > 0);
   repetitions_ = n;
@@ -355,12 +374,6 @@ Benchmark* Benchmark::ReportAggregatesOnly(bool value) {
   return this;
 }
 
-Benchmark* Benchmark::MinTime(double t) {
-  CHECK(t > 0.0);
-  min_time_ = t;
-  return this;
-}
-
 Benchmark* Benchmark::UseRealTime() {
   CHECK(!use_manual_time_)
       << "Cannot set UseRealTime and UseManualTime simultaneously.";
diff --git a/libcxx/utils/google-benchmark/src/complexity.cc b/libcxx/utils/google-benchmark/src/complexity.cc
index dfab791a329..02adbef6292 100644
--- a/libcxx/utils/google-benchmark/src/complexity.cc
+++ b/libcxx/utils/google-benchmark/src/complexity.cc
@@ -171,6 +171,22 @@ std::vector<BenchmarkReporter::Run> ComputeStats(
   // All repetitions should be run with the same number of iterations so we
   // can take this information from the first benchmark.
   int64_t const run_iterations = reports.front().iterations;
+  // create stats for user counters
+  struct CounterStat {
+    Counter c;
+    Stat1_d s;
+  };
+  std::map< std::string, CounterStat > counter_stats;
+  for(Run const& r : reports) {
+    for(auto const& cnt : r.counters) {
+      auto it = counter_stats.find(cnt.first);
+      if(it == counter_stats.end()) {
+        counter_stats.insert({cnt.first, {cnt.second, Stat1_d{}}});
+      } else {
+        CHECK_EQ(counter_stats[cnt.first].c.flags, cnt.second.flags);
+      }
+    }
+  }
 
   // Populate the accumulators.
   for (Run const& run : reports) {
@@ -183,6 +199,12 @@ std::vector<BenchmarkReporter::Run> ComputeStats(
         Stat1_d(run.cpu_accumulated_time / run.iterations, run.iterations);
     items_per_second_stat += Stat1_d(run.items_per_second, run.iterations);
     bytes_per_second_stat += Stat1_d(run.bytes_per_second, run.iterations);
+    // user counters
+    for(auto const& cnt : run.counters) {
+      auto it = counter_stats.find(cnt.first);
+      CHECK_NE(it, counter_stats.end());
+      it->second.s += Stat1_d(cnt.second, run.iterations);
+    }
   }
 
   // Get the data from the accumulator to BenchmarkReporter::Run's.
@@ -196,6 +218,11 @@ std::vector<BenchmarkReporter::Run> ComputeStats(
   mean_data.bytes_per_second = bytes_per_second_stat.Mean();
   mean_data.items_per_second = items_per_second_stat.Mean();
   mean_data.time_unit = reports[0].time_unit;
+  // user counters
+  for(auto const& kv : counter_stats) {
+    auto c = Counter(kv.second.s.Mean(), counter_stats[kv.first].c.flags);
+    mean_data.counters[kv.first] = c;
+  }
 
   // Only add label to mean/stddev if it is same for all runs
   mean_data.report_label = reports[0].report_label;
@@ -215,6 +242,11 @@ std::vector<BenchmarkReporter::Run> ComputeStats(
   stddev_data.bytes_per_second = bytes_per_second_stat.StdDev();
   stddev_data.items_per_second = items_per_second_stat.StdDev();
   stddev_data.time_unit = reports[0].time_unit;
+  // user counters
+  for(auto const& kv : counter_stats) {
+    auto c = Counter(kv.second.s.StdDev(), counter_stats[kv.first].c.flags);
+    stddev_data.counters[kv.first] = c;
+  }
 
   results.push_back(mean_data);
   results.push_back(stddev_data);
@@ -263,6 +295,11 @@ std::vector<BenchmarkReporter::Run> ComputeBigO(
   big_o.report_big_o = true;
   big_o.complexity = result_cpu.complexity;
 
+  // All the time results are reported after being multiplied by the
+  // time unit multiplier. But since RMS is a relative quantity it
+  // should not be multiplied at all. So, here, we _divide_ it by the
+  // multiplier so that when it is multiplied later the result is the
+  // correct one.
   double multiplier = GetTimeUnitMultiplier(reports[0].time_unit);
 
   // Only add label to mean/stddev if it is same for all runs
@@ -275,6 +312,9 @@ std::vector<BenchmarkReporter::Run> ComputeBigO(
   rms.cpu_accumulated_time = result_cpu.rms / multiplier;
   rms.report_rms = true;
   rms.complexity = result_cpu.complexity;
+  // don't forget to keep the time unit, or we won't be able to
+  // recover the correct value.
+  rms.time_unit = reports[0].time_unit;
 
   results.push_back(big_o);
   results.push_back(rms);
diff --git a/libcxx/utils/google-benchmark/src/console_reporter.cc b/libcxx/utils/google-benchmark/src/console_reporter.cc
index 7e0cca3e3a0..3f3de02945a 100644
--- a/libcxx/utils/google-benchmark/src/console_reporter.cc
+++ b/libcxx/utils/google-benchmark/src/console_reporter.cc
@@ -14,6 +14,7 @@
 
 #include "benchmark/reporter.h"
 #include "complexity.h"
+#include "counter.h"
 
 #include <algorithm>
 #include <cstdint>
@@ -34,6 +35,7 @@ namespace benchmark {
 
 bool ConsoleReporter::ReportContext(const Context& context) {
   name_field_width_ = context.name_field_width;
+  printed_header_ = false;
 
   PrintBasicContext(&GetErrorStream(), context);
 
@@ -45,16 +47,32 @@ bool ConsoleReporter::ReportContext(const Context& context) {
     color_output_ = false;
   }
 #endif
+
+  return true;
+}
+
+void ConsoleReporter::PrintHeader(const Run& run) {
   std::string str =
       FormatString("%-*s %13s %13s %10s\n", static_cast<int>(name_field_width_),
                    "Benchmark", "Time", "CPU", "Iterations");
-  GetOutputStream() << str << std::string(str.length() - 1, '-') << "\n";
-
-  return true;
+  if(!run.counters.empty()) {
+    str += " UserCounters...";
+  }
+  std::string line = std::string(str.length(), '-');
+  GetOutputStream() << line << "\n" << str << line << "\n";
 }
 
 void ConsoleReporter::ReportRuns(const std::vector<Run>& reports) {
-  for (const auto& run : reports) PrintRunData(run);
+  for (const auto& run : reports) {
+    // print the header if none was printed yet
+    if (!printed_header_) {
+      printed_header_ = true;
+      PrintHeader(run);
+    }
+    // As an alternative to printing the headers like this, we could sort
+    // the benchmarks by header and then print like that.
+    PrintRunData(run);
+  }
 }
 
 static void IgnoreColorPrint(std::ostream& out, LogColor, const char* fmt,
@@ -114,6 +132,11 @@ void ConsoleReporter::PrintRunData(const Run& result) {
     printer(Out, COLOR_CYAN, "%10lld", result.iterations);
   }
 
+  for (auto& c : result.counters) {
+    auto const& s = HumanReadableNumber(c.second.value);
+    printer(Out, COLOR_DEFAULT, " %s=%s", c.first.c_str(), s.c_str());
+  }
+
   if (!rate.empty()) {
     printer(Out, COLOR_DEFAULT, " %*s", 13, rate.c_str());
   }
diff --git a/libcxx/utils/google-benchmark/src/counter.cc b/libcxx/utils/google-benchmark/src/counter.cc
new file mode 100644
index 00000000000..307863d3c4d
--- /dev/null
+++ b/libcxx/utils/google-benchmark/src/counter.cc
@@ -0,0 +1,68 @@
+// Copyright 2015 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "counter.h"
+
+namespace benchmark {
+namespace internal {
+
+double Finish(Counter const& c, double cpu_time, double num_threads) {
+  double v = c.value;
+  if (c.flags & Counter::kIsRate) {
+    v /= cpu_time;
+  }
+  if (c.flags & Counter::kAvgThreads) {
+    v /= num_threads;
+  }
+  return v;
+}
+
+void Finish(UserCounters *l, double cpu_time, double num_threads) {
+  for (auto &c : *l) {
+    c.second = Finish(c.second, cpu_time, num_threads);
+  }
+}
+
+void Increment(UserCounters *l, UserCounters const& r) {
+  // add counters present in both or just in *l
+  for (auto &c : *l) {
+    auto it = r.find(c.first);
+    if (it != r.end()) {
+      c.second = c.second + it->second;
+    }
+  }
+  // add counters present in r, but not in *l
+  for (auto const &tc : r) {
+    auto it = l->find(tc.first);
+    if (it == l->end()) {
+      (*l)[tc.first] = tc.second;
+    }
+  }
+}
+
+bool SameNames(UserCounters const& l, UserCounters const& r) {
+  if (&l == &r) return true;
+  if (l.size() != r.size()) {
+    return false;
+  }
+  for (auto const& c : l) {
+    if ( r.find(c.first) == r.end()) {
+      return false;
+    }
+  }
+  return true;
+}
+
+} // end namespace internal
+} // end namespace benchmark
diff --git a/libcxx/utils/google-benchmark/src/counter.h b/libcxx/utils/google-benchmark/src/counter.h
new file mode 100644
index 00000000000..bbb92d9a298
--- /dev/null
+++ b/libcxx/utils/google-benchmark/src/counter.h
@@ -0,0 +1,26 @@
+// Copyright 2015 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "benchmark/benchmark_api.h"
+
+namespace benchmark {
+
+// these counter-related functions are hidden to reduce API surface.
+namespace internal {
+void Finish(UserCounters *l, double time, double num_threads);
+void Increment(UserCounters *l, UserCounters const& r);
+bool SameNames(UserCounters const& l, UserCounters const& r);
+} // end namespace internal
+
+} //end namespace benchmark
diff --git a/libcxx/utils/google-benchmark/src/csv_reporter.cc b/libcxx/utils/google-benchmark/src/csv_reporter.cc
index 18ab3b66895..6779815b3c3 100644
--- a/libcxx/utils/google-benchmark/src/csv_reporter.cc
+++ b/libcxx/utils/google-benchmark/src/csv_reporter.cc
@@ -24,6 +24,7 @@
 
 #include "string_util.h"
 #include "timers.h"
+#include "check.h"
 
 // File format reference: http://edoceo.com/utilitas/csv-file-format.
 
@@ -38,21 +39,51 @@ std::vector<std::string> elements = {
 
 bool CSVReporter::ReportContext(const Context& context) {
   PrintBasicContext(&GetErrorStream(), context);
+  return true;
+}
 
+void CSVReporter::ReportRuns(const std::vector<Run> & reports) {
   std::ostream& Out = GetOutputStream();
-  for (auto B = elements.begin(); B != elements.end();) {
-    Out << *B++;
-    if (B != elements.end()) Out << ",";
+
+  if (!printed_header_) {
+    // save the names of all the user counters
+    for (const auto& run : reports) {
+      for (const auto& cnt : run.counters) {
+        user_counter_names_.insert(cnt.first);
+      }
+    }
+
+    // print the header
+    for (auto B = elements.begin(); B != elements.end();) {
+      Out << *B++;
+      if (B != elements.end()) Out << ",";
+    }
+    for (auto B = user_counter_names_.begin(); B != user_counter_names_.end();) {
+      Out << ",\"" << *B++ << "\"";
+    }
+    Out << "\n";
+
+    printed_header_ = true;
+  } else {
+    // check that all the current counters are saved in the name set
+    for (const auto& run : reports) {
+      for (const auto& cnt : run.counters) {
+        CHECK(user_counter_names_.find(cnt.first) != user_counter_names_.end())
+              << "All counters must be present in each run. "
+              << "Counter named \"" << cnt.first
+              << "\" was not in a run after being added to the header";
+      }
+    }
+  }
+
+  // print results for each run
+  for (const auto& run : reports) {
+    PrintRunData(run);
   }
-  Out << "\n";
-  return true;
-}
 
-void CSVReporter::ReportRuns(const std::vector<Run>& reports) {
-  for (const auto& run : reports) PrintRunData(run);
 }
 
-void CSVReporter::PrintRunData(const Run& run) {
+void CSVReporter::PrintRunData(const Run & run) {
   std::ostream& Out = GetOutputStream();
 
   // Field with embedded double-quote characters must be doubled and the field
@@ -102,6 +133,13 @@ void CSVReporter::PrintRunData(const Run& run) {
     Out << "\"" << label << "\"";
   }
   Out << ",,";  // for error_occurred and error_message
+
+  // Print user counters
+  for (const auto &ucn : user_counter_names_) {
+    auto it = run.counters.find(ucn);
+    CHECK(it != run.counters.end());
+    Out << "," << it->second;
+  }
   Out << '\n';
 }
 
diff --git a/libcxx/utils/google-benchmark/src/cycleclock.h b/libcxx/utils/google-benchmark/src/cycleclock.h
index e4825d4ba24..e0f9b01f9d3 100644
--- a/libcxx/utils/google-benchmark/src/cycleclock.h
+++ b/libcxx/utils/google-benchmark/src/cycleclock.h
@@ -43,6 +43,11 @@ extern "C" uint64_t __rdtsc();
 
 #ifndef BENCHMARK_OS_WINDOWS
 #include <sys/time.h>
+#include <time.h>
+#endif
+
+#ifdef BENCHMARK_OS_EMSCRIPTEN
+#include <emscripten.h>
 #endif
 
 namespace benchmark {
@@ -65,6 +70,10 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
   // counter pauses; it does not continue counting, nor does it
   // reset to zero.
   return mach_absolute_time();
+#elif defined(BENCHMARK_OS_EMSCRIPTEN)
+  // this goes above x86-specific code because old versions of Emscripten
+  // define __x86_64__, although they have nothing to do with it.
+  return static_cast<int64_t>(emscripten_get_now() * 1e+6);
 #elif defined(__i386__)
   int64_t ret;
   __asm__ volatile("rdtsc" : "=A"(ret));
@@ -79,7 +88,7 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
   asm("mftbu %0" : "=r"(tbu0));
   asm("mftb  %0" : "=r"(tbl));
   asm("mftbu %0" : "=r"(tbu1));
-  tbl &= -static_cast<int64>(tbu0 == tbu1);
+  tbl &= -static_cast<int64_t>(tbu0 == tbu1);
   // high 32 bits in tbu1; low 32 bits in tbl  (tbu0 is garbage)
   return (tbu1 << 32) | tbl;
 #elif defined(__sparc__)
@@ -99,6 +108,22 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
   _asm rdtsc
 #elif defined(COMPILER_MSVC)
   return __rdtsc();
+#elif defined(BENCHMARK_OS_NACL)
+  // Native Client validator on x86/x86-64 allows RDTSC instructions,
+  // and this case is handled above. Native Client validator on ARM
+  // rejects MRC instructions (used in the ARM-specific sequence below),
+  // so we handle it here. Portable Native Client compiles to
+  // architecture-agnostic bytecode, which doesn't provide any
+  // cycle counter access mnemonics.
+
+  // Native Client does not provide any API to access cycle counter.
+  // Use clock_gettime(CLOCK_MONOTONIC, ...) instead of gettimeofday
+  // because is provides nanosecond resolution (which is noticable at
+  // least for PNaCl modules running on x86 Mac & Linux).
+  // Initialize to always return 0 if clock_gettime fails.
+  struct timespec ts = { 0, 0 };
+  clock_gettime(CLOCK_MONOTONIC, &ts);
+  return static_cast<int64_t>(ts.tv_sec) * 1000000000 + ts.tv_nsec;
 #elif defined(__aarch64__)
   // System timer of ARMv8 runs at a different frequency than the CPU's.
   // The frequency is fixed, typically in the range 1-50MHz.  It can be
@@ -108,7 +133,9 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
   asm volatile("mrs %0, cntvct_el0" : "=r"(virtual_timer_value));
   return virtual_timer_value;
 #elif defined(__ARM_ARCH)
-#if (__ARM_ARCH >= 6)  // V6 is the earliest arch that has a standard cyclecount
+  // V6 is the earliest arch that has a standard cyclecount
+  // Native Client validator doesn't allow MRC instructions.
+#if (__ARM_ARCH >= 6)
   uint32_t pmccntr;
   uint32_t pmuseren;
   uint32_t pmcntenset;
diff --git a/libcxx/utils/google-benchmark/src/internal_macros.h b/libcxx/utils/google-benchmark/src/internal_macros.h
index e8efcbb39c1..ab9dd85c102 100644
--- a/libcxx/utils/google-benchmark/src/internal_macros.h
+++ b/libcxx/utils/google-benchmark/src/internal_macros.h
@@ -30,13 +30,26 @@
 #elif defined(_WIN32)
 #define BENCHMARK_OS_WINDOWS 1
 #elif defined(__APPLE__)
-// TODO(ericwf) This doesn't actually check that it is a Mac OSX system. Just
-// that it is an apple system.
-#define BENCHMARK_OS_MACOSX 1
+#include "TargetConditionals.h"
+  #if defined(TARGET_OS_MAC)
+    #define BENCHMARK_OS_MACOSX 1
+    #if defined(TARGET_OS_IPHONE)
+      #define BENCHMARK_OS_IOS 1
+    #endif
+  #endif
 #elif defined(__FreeBSD__)
 #define BENCHMARK_OS_FREEBSD 1
 #elif defined(__linux__)
 #define BENCHMARK_OS_LINUX 1
+#elif defined(__native_client__)
+#define BENCHMARK_OS_NACL 1
+#elif defined(EMSCRIPTEN)
+#define BENCHMARK_OS_EMSCRIPTEN 1
+#endif
+
+#if !__has_feature(cxx_exceptions) && !defined(__cpp_exceptions) \
+     && !defined(__EXCEPTIONS)
+#define BENCHMARK_HAS_NO_EXCEPTIONS
 #endif
 
 #endif  // BENCHMARK_INTERNAL_MACROS_H_
diff --git a/libcxx/utils/google-benchmark/src/json_reporter.cc b/libcxx/utils/google-benchmark/src/json_reporter.cc
index cea5f9bfa1c..5a653088e5b 100644
--- a/libcxx/utils/google-benchmark/src/json_reporter.cc
+++ b/libcxx/utils/google-benchmark/src/json_reporter.cc
@@ -154,10 +154,15 @@ void JSONReporter::PrintRunData(Run const& run) {
         << indent
         << FormatKV("items_per_second", RoundDouble(run.items_per_second));
   }
+  for(auto &c : run.counters) {
+    out << ",\n"
+        << indent
+        << FormatKV(c.first, RoundDouble(c.second));
+  }
   if (!run.report_label.empty()) {
     out << ",\n" << indent << FormatKV("label", run.report_label);
   }
   out << '\n';
 }
 
-}  // end namespace benchmark
+} // end namespace benchmark
diff --git a/libcxx/utils/google-benchmark/src/re.h b/libcxx/utils/google-benchmark/src/re.h
index af4a498c9f7..01e9736505e 100644
--- a/libcxx/utils/google-benchmark/src/re.h
+++ b/libcxx/utils/google-benchmark/src/re.h
@@ -15,6 +15,15 @@
 #ifndef BENCHMARK_RE_H_
 #define BENCHMARK_RE_H_
 
+#include "internal_macros.h"
+
+// Prefer C regex libraries when compiling w/o exceptions so that we can
+// correctly report errors.
+#if defined(BENCHMARK_HAS_NO_EXCEPTIONS) && defined(HAVE_STD_REGEX) && \
+    (defined(HAVE_GNU_POSIX_REGEX) || defined(HAVE_POSIX_REGEX))
+#undef HAVE_STD_REGEX
+#endif
+
 #if defined(HAVE_STD_REGEX)
 #include <regex>
 #elif defined(HAVE_GNU_POSIX_REGEX)
@@ -62,15 +71,20 @@ class Regex {
 #if defined(HAVE_STD_REGEX)
 
 inline bool Regex::Init(const std::string& spec, std::string* error) {
+#ifdef BENCHMARK_HAS_NO_EXCEPTIONS
+  ((void)error); // suppress unused warning
+#else
   try {
+#endif
     re_ = std::regex(spec, std::regex_constants::extended);
-
     init_ = true;
+#ifndef BENCHMARK_HAS_NO_EXCEPTIONS
   } catch (const std::regex_error& e) {
     if (error) {
       *error = e.what();
     }
   }
+#endif
   return init_;
 }
 
diff --git a/libcxx/utils/google-benchmark/src/sleep.cc b/libcxx/utils/google-benchmark/src/sleep.cc
index 918abc485dc..54aa04a4224 100644
--- a/libcxx/utils/google-benchmark/src/sleep.cc
+++ b/libcxx/utils/google-benchmark/src/sleep.cc
@@ -15,6 +15,7 @@
 #include "sleep.h"
 
 #include <cerrno>
+#include <cstdlib>
 #include <ctime>
 
 #include "internal_macros.h"
@@ -40,7 +41,7 @@ void SleepForMicroseconds(int microseconds) {
 }
 
 void SleepForMilliseconds(int milliseconds) {
-  SleepForMicroseconds(static_cast<int>(milliseconds) * kNumMicrosPerMilli);
+  SleepForMicroseconds(milliseconds * kNumMicrosPerMilli);
 }
 
 void SleepForSeconds(double seconds) {
diff --git a/libcxx/utils/google-benchmark/src/sleep.h b/libcxx/utils/google-benchmark/src/sleep.h
index f1e515ca4f3..f98551afe28 100644
--- a/libcxx/utils/google-benchmark/src/sleep.h
+++ b/libcxx/utils/google-benchmark/src/sleep.h
@@ -1,14 +1,12 @@
 #ifndef BENCHMARK_SLEEP_H_
 #define BENCHMARK_SLEEP_H_
 
-#include <cstdint>
-
 namespace benchmark {
-const int64_t kNumMillisPerSecond = 1000LL;
-const int64_t kNumMicrosPerMilli = 1000LL;
-const int64_t kNumMicrosPerSecond = kNumMillisPerSecond * 1000LL;
-const int64_t kNumNanosPerMicro = 1000LL;
-const int64_t kNumNanosPerSecond = kNumNanosPerMicro * kNumMicrosPerSecond;
+const int kNumMillisPerSecond = 1000;
+const int kNumMicrosPerMilli = 1000;
+const int kNumMicrosPerSecond = kNumMillisPerSecond * 1000;
+const int kNumNanosPerMicro = 1000;
+const int kNumNanosPerSecond = kNumNanosPerMicro * kNumMicrosPerSecond;
 
 void SleepForMilliseconds(int milliseconds);
 void SleepForSeconds(double seconds);
diff --git a/libcxx/utils/google-benchmark/src/string_util.cc b/libcxx/utils/google-benchmark/src/string_util.cc
index 4cefbfba6c2..cd4e7cfde57 100644
--- a/libcxx/utils/google-benchmark/src/string_util.cc
+++ b/libcxx/utils/google-benchmark/src/string_util.cc
@@ -45,6 +45,8 @@ void ToExponentAndMantissa(double val, double thresh, int precision,
       std::max(thresh, 1.0 / std::pow(10.0, precision));
   const double big_threshold = adjusted_threshold * one_k;
   const double small_threshold = adjusted_threshold;
+  // Values in ]simple_threshold,small_threshold[ will be printed as-is
+  const double simple_threshold = 0.01;
 
   if (val > big_threshold) {
     // Positive powers
@@ -62,14 +64,16 @@ void ToExponentAndMantissa(double val, double thresh, int precision,
     *exponent = 0;
   } else if (val < small_threshold) {
     // Negative powers
-    double scaled = val;
-    for (size_t i = 0; i < arraysize(kSmallSIUnits); ++i) {
-      scaled *= one_k;
-      if (scaled >= small_threshold) {
-        mantissa_stream << scaled;
-        *exponent = -static_cast<int64_t>(i + 1);
-        *mantissa = mantissa_stream.str();
-        return;
+    if (val < simple_threshold) {
+      double scaled = val;
+      for (size_t i = 0; i < arraysize(kSmallSIUnits); ++i) {
+        scaled *= one_k;
+        if (scaled >= small_threshold) {
+          mantissa_stream << scaled;
+          *exponent = -static_cast<int64_t>(i + 1);
+          *mantissa = mantissa_stream.str();
+          return;
+        }
       }
     }
     mantissa_stream << val;
diff --git a/libcxx/utils/google-benchmark/src/sysinfo.cc b/libcxx/utils/google-benchmark/src/sysinfo.cc
index dd1e66306e1..7feb79e65f2 100644
--- a/libcxx/utils/google-benchmark/src/sysinfo.cc
+++ b/libcxx/utils/google-benchmark/src/sysinfo.cc
@@ -75,7 +75,9 @@ bool ReadIntFromFile(const char* file, long* value) {
     char line[1024];
     char* err;
     memset(line, '\0', sizeof(line));
-    CHECK(read(fd, line, sizeof(line) - 1));
+    ssize_t read_err = read(fd, line, sizeof(line) - 1);
+    ((void)read_err); // prevent unused warning
+    CHECK(read_err >= 0);
     const long temp_value = strtol(line, &err, 10);
     if (line[0] != '\0' && (*err == '\n' || *err == '\0')) {
       *value = temp_value;
@@ -295,8 +297,13 @@ void InitializeSystemInfo() {
       (size == sizeof(cpu_freq))) {
     cpuinfo_cycles_per_second = cpu_freq;
   } else {
+    #if defined BENCHMARK_OS_IOS
+    fprintf(stderr, "CPU frequency cannot be detected. \n");
+    cpuinfo_cycles_per_second = 0;
+    #else
     fprintf(stderr, "%s\n", strerror(errno));
     std::exit(EXIT_FAILURE);
+    #endif
   }
 #else
   // Generic cycles per second counter
diff --git a/libcxx/utils/google-benchmark/src/timers.cc b/libcxx/utils/google-benchmark/src/timers.cc
index fadc08f791e..8d56e8adf8d 100644
--- a/libcxx/utils/google-benchmark/src/timers.cc
+++ b/libcxx/utils/google-benchmark/src/timers.cc
@@ -35,6 +35,10 @@
 #endif
 #endif
 
+#ifdef BENCHMARK_OS_EMSCRIPTEN
+#include <emscripten.h>
+#endif
+
 #include <cerrno>
 #include <cstdint>
 #include <cstdio>
@@ -100,14 +104,7 @@ BENCHMARK_NORETURN static void DiagnoseAndExit(const char* msg) {
 }  // end namespace
 
 double ProcessCPUUsage() {
-// FIXME We want to use clock_gettime, but its not available in MacOS 10.11. See
-// https://github.com/google/benchmark/pull/292
-#if defined(CLOCK_PROCESS_CPUTIME_ID) && !defined(BENCHMARK_OS_MACOSX)
-  struct timespec spec;
-  if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &spec) == 0)
-    return MakeTime(spec);
-  DiagnoseAndExit("clock_gettime(CLOCK_PROCESS_CPUTIME_ID, ...) failed");
-#elif defined(BENCHMARK_OS_WINDOWS)
+#if defined(BENCHMARK_OS_WINDOWS)
   HANDLE proc = GetCurrentProcess();
   FILETIME creation_time;
   FILETIME exit_time;
@@ -117,21 +114,28 @@ double ProcessCPUUsage() {
                       &user_time))
     return MakeTime(kernel_time, user_time);
   DiagnoseAndExit("GetProccessTimes() failed");
+#elif defined(BENCHMARK_OS_EMSCRIPTEN)
+  // clock_gettime(CLOCK_PROCESS_CPUTIME_ID, ...) returns 0 on Emscripten.
+  // Use Emscripten-specific API. Reported CPU time would be exactly the
+  // same as total time, but this is ok because there aren't long-latency
+  // syncronous system calls in Emscripten.
+  return emscripten_get_now() * 1e-3;
+#elif defined(CLOCK_PROCESS_CPUTIME_ID) && !defined(BENCHMARK_OS_MACOSX)
+  // FIXME We want to use clock_gettime, but its not available in MacOS 10.11. See
+  // https://github.com/google/benchmark/pull/292
+  struct timespec spec;
+  if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &spec) == 0)
+    return MakeTime(spec);
+  DiagnoseAndExit("clock_gettime(CLOCK_PROCESS_CPUTIME_ID, ...) failed");
 #else
   struct rusage ru;
   if (getrusage(RUSAGE_SELF, &ru) == 0) return MakeTime(ru);
-  DiagnoseAndExit("clock_gettime(CLOCK_PROCESS_CPUTIME_ID, ...) failed");
+  DiagnoseAndExit("getrusage(RUSAGE_SELF, ...) failed");
 #endif
 }
 
 double ThreadCPUUsage() {
-// FIXME We want to use clock_gettime, but its not available in MacOS 10.11. See
-// https://github.com/google/benchmark/pull/292
-#if defined(CLOCK_THREAD_CPUTIME_ID) && !defined(BENCHMARK_OS_MACOSX)
-  struct timespec ts;
-  if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts) == 0) return MakeTime(ts);
-  DiagnoseAndExit("clock_gettime(CLOCK_THREAD_CPUTIME_ID, ...) failed");
-#elif defined(BENCHMARK_OS_WINDOWS)
+#if defined(BENCHMARK_OS_WINDOWS)
   HANDLE this_thread = GetCurrentThread();
   FILETIME creation_time;
   FILETIME exit_time;
@@ -141,6 +145,8 @@ double ThreadCPUUsage() {
                  &user_time);
   return MakeTime(kernel_time, user_time);
 #elif defined(BENCHMARK_OS_MACOSX)
+  // FIXME We want to use clock_gettime, but its not available in MacOS 10.11. See
+  // https://github.com/google/benchmark/pull/292
   mach_msg_type_number_t count = THREAD_BASIC_INFO_COUNT;
   thread_basic_info_data_t info;
   mach_port_t thread = pthread_mach_thread_np(pthread_self());
@@ -149,6 +155,13 @@ double ThreadCPUUsage() {
     return MakeTime(info);
   }
   DiagnoseAndExit("ThreadCPUUsage() failed when evaluating thread_info");
+#elif defined(BENCHMARK_OS_EMSCRIPTEN)
+  // Emscripten doesn't support traditional threads
+  return ProcessCPUUsage();
+#elif defined(CLOCK_THREAD_CPUTIME_ID)
+  struct timespec ts;
+  if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts) == 0) return MakeTime(ts);
+  DiagnoseAndExit("clock_gettime(CLOCK_THREAD_CPUTIME_ID, ...) failed");
 #else
 #error Per-thread timing is not available on your system.
 #endif