Update Google Benchmark library

llvm-svn: 322812
author: Eric Fiselier <eric@efcs.ca> 2018-01-18 04:23:01 +0000
committer: Eric Fiselier <eric@efcs.ca> 2018-01-18 04:23:01 +0000
commit: 1903976d37868f8e13e60e44b77a71b5ede04bed (patch)
tree: f2c591f8bb25afea2fd83e115fa21c809cca79b5
parent: 7f0d85ec1e567ceeb53523bede3dc948d1a833b0 (diff)
download: bcm5719-llvm-1903976d37868f8e13e60e44b77a71b5ede04bed.tar.gz
bcm5719-llvm-1903976d37868f8e13e60e44b77a71b5ede04bed.zip
77 files changed, 4886 insertions, 2625 deletions
diff --git a/libcxx/benchmarks/CMakeLists.txt b/libcxx/benchmarks/CMakeLists.txt
index 8211ebd009a..8a154d86f7f 100644
--- a/libcxx/benchmarks/CMakeLists.txt
+++ b/libcxx/benchmarks/CMakeLists.txt
@@ -59,6 +59,7 @@ if (LIBCXX_BENCHMARK_NATIVE_STDLIB)
           -DBENCHMARK_ENABLE_TESTING:BOOL=OFF)
 endif()
 
+
 #==============================================================================
 # Benchmark tests configuration
 #==============================================================================
diff --git a/libcxx/benchmarks/ContainerBenchmarks.hpp b/libcxx/benchmarks/ContainerBenchmarks.hpp
index dc268e7ebca..509e3d23ed9 100644
--- a/libcxx/benchmarks/ContainerBenchmarks.hpp
+++ b/libcxx/benchmarks/ContainerBenchmarks.hpp
@@ -3,7 +3,7 @@
 
 #include <cassert>
 
-#include "benchmark/benchmark_api.h"
+#include "benchmark/benchmark.h"
 
 namespace ContainerBenchmarks {
 
diff --git a/libcxx/benchmarks/algorithms.bench.cpp b/libcxx/benchmarks/algorithms.bench.cpp
index 745cc172718..86315390e0d 100644
--- a/libcxx/benchmarks/algorithms.bench.cpp
+++ b/libcxx/benchmarks/algorithms.bench.cpp
@@ -2,7 +2,7 @@
 #include <vector>
 #include <cstdint>
 
-#include "benchmark/benchmark_api.h"
+#include "benchmark/benchmark.h"
 #include "GenerateInput.hpp"
 
 constexpr std::size_t TestNumInputs = 1024;
@@ -59,4 +59,4 @@ BENCHMARK_CAPTURE(BM_Sort, single_element_strings,
     getDuplicateStringInputs)->Arg(TestNumInputs);
 
 
-BENCHMARK_MAIN()
+BENCHMARK_MAIN();
diff --git a/libcxx/benchmarks/filesystem.bench.cpp b/libcxx/benchmarks/filesystem.bench.cpp
index f7949a163a7..67719803517 100644
--- a/libcxx/benchmarks/filesystem.bench.cpp
+++ b/libcxx/benchmarks/filesystem.bench.cpp
@@ -1,6 +1,6 @@
 #include <experimental/filesystem>
 
-#include "benchmark/benchmark_api.h"
+#include "benchmark/benchmark.h"
 #include "GenerateInput.hpp"
 #include "test_iterators.h"
 
@@ -135,4 +135,4 @@ void BM_PathIterateOnceBackwards(benchmark::State &st, GenInputs gen) {
 BENCHMARK_CAPTURE(BM_PathIterateOnceBackwards, iterate_elements,
   getRandomStringInputs)->Arg(TestNumInputs);
 
-BENCHMARK_MAIN()
+BENCHMARK_MAIN();
diff --git a/libcxx/benchmarks/string.bench.cpp b/libcxx/benchmarks/string.bench.cpp
index ef892391688..8324f3e99bb 100644
--- a/libcxx/benchmarks/string.bench.cpp
+++ b/libcxx/benchmarks/string.bench.cpp
@@ -2,7 +2,7 @@
 #include <vector>
 #include <cstdint>
 
-#include "benchmark/benchmark_api.h"
+#include "benchmark/benchmark.h"
 #include "GenerateInput.hpp"
 
 constexpr std::size_t MAX_STRING_LEN = 8 << 14;
@@ -46,4 +46,4 @@ static void BM_StringFindMatch2(benchmark::State &state) {
 }
 BENCHMARK(BM_StringFindMatch2)->Range(1, MAX_STRING_LEN / 4);
 
-BENCHMARK_MAIN()
+BENCHMARK_MAIN();
diff --git a/libcxx/benchmarks/stringstream.bench.cpp b/libcxx/benchmarks/stringstream.bench.cpp
index 6023cf775bc..75a7a284e07 100644
--- a/libcxx/benchmarks/stringstream.bench.cpp
+++ b/libcxx/benchmarks/stringstream.bench.cpp
@@ -1,4 +1,4 @@
-#include "benchmark/benchmark_api.h"
+#include "benchmark/benchmark.h"
 
 #include <sstream>
 double __attribute__((noinline)) istream_numbers();
@@ -35,4 +35,4 @@ static void BM_Istream_numbers(benchmark::State &state) {
 }
 
 BENCHMARK(BM_Istream_numbers)->RangeMultiplier(2)->Range(1024, 4096);
-BENCHMARK_MAIN()
+BENCHMARK_MAIN();
diff --git a/libcxx/benchmarks/unordered_set_operations.bench.cpp b/libcxx/benchmarks/unordered_set_operations.bench.cpp
index e2afdde56dc..ee0ea29b8d2 100644
--- a/libcxx/benchmarks/unordered_set_operations.bench.cpp
+++ b/libcxx/benchmarks/unordered_set_operations.bench.cpp
@@ -5,7 +5,7 @@
 #include <cstdlib>
 #include <cstring>
 
-#include "benchmark/benchmark_api.h"
+#include "benchmark/benchmark.h"
 
 #include "ContainerBenchmarks.hpp"
 #include "GenerateInput.hpp"
@@ -303,4 +303,4 @@ BENCHMARK_CAPTURE(BM_EmplaceDuplicate,
     std::unordered_set<std::string>{},
     getRandomCStringInputs)->Arg(TestNumInputs);
 
-BENCHMARK_MAIN()
+BENCHMARK_MAIN();
diff --git a/libcxx/benchmarks/util_smartptr.bench.cpp b/libcxx/benchmarks/util_smartptr.bench.cpp
index ad3f03a0448..c984b2ca666 100644
--- a/libcxx/benchmarks/util_smartptr.bench.cpp
+++ b/libcxx/benchmarks/util_smartptr.bench.cpp
@@ -9,7 +9,7 @@
 
 #include <memory>
 
-#include "benchmark/benchmark_api.h"
+#include "benchmark/benchmark.h"
 
 static void BM_SharedPtrCreateDestroy(benchmark::State& st) {
   while (st.KeepRunning()) {
@@ -39,4 +39,4 @@ static void BM_WeakPtrIncDecRef(benchmark::State& st) {
 }
 BENCHMARK(BM_WeakPtrIncDecRef);
 
-BENCHMARK_MAIN()
+BENCHMARK_MAIN();
diff --git a/libcxx/benchmarks/vector_operations.bench.cpp b/libcxx/benchmarks/vector_operations.bench.cpp
index 004e801f0be..a2c4e5dbbc0 100644
--- a/libcxx/benchmarks/vector_operations.bench.cpp
+++ b/libcxx/benchmarks/vector_operations.bench.cpp
@@ -4,7 +4,7 @@
 #include <cstdlib>
 #include <cstring>
 
-#include "benchmark/benchmark_api.h"
+#include "benchmark/benchmark.h"
 
 #include "ContainerBenchmarks.hpp"
 #include "GenerateInput.hpp"
@@ -29,4 +29,4 @@ BENCHMARK_CAPTURE(BM_ConstructIterIter,
   getRandomStringInputs)->Arg(TestNumInputs);
 
 
-BENCHMARK_MAIN()
+BENCHMARK_MAIN();
diff --git a/libcxx/utils/google-benchmark/AUTHORS b/libcxx/utils/google-benchmark/AUTHORS
index c4b059df258..4e4c4ed4754 100644
--- a/libcxx/utils/google-benchmark/AUTHORS
+++ b/libcxx/utils/google-benchmark/AUTHORS
@@ -10,9 +10,11 @@
 
 Albert Pretorius <pretoalb@gmail.com>
 Arne Beer <arne@twobeer.de>
+Carto
 Christopher Seymour <chris.j.seymour@hotmail.com>
 David Coeurjolly <david.coeurjolly@liris.cnrs.fr>
-Dominic Hamon <dma@stripysock.com>
+Dirac Research 
+Dominik Czarnota <dominik.b.czarnota@gmail.com>
 Eric Fiselier <eric@efcs.ca>
 Eugene Zhuk <eugene.zhuk@gmail.com>
 Evgeny Safronov <division494@gmail.com>
@@ -20,10 +22,12 @@ Felix Homann <linuxaudio@showlabor.de>
 Google Inc.
 International Business Machines Corporation
 Ismael Jimenez Martinez <ismael.jimenez.martinez@gmail.com>
-Joao Paulo Magalhaes <joaoppmagalhaes@gmail.com>
+Jern-Kuan Leong <jernkuan@gmail.com>
 JianXiong Zhou <zhoujianxiong2@gmail.com>
+Joao Paulo Magalhaes <joaoppmagalhaes@gmail.com>
 Jussi Knuuttila <jussi.knuuttila@gmail.com>
 Kaito Udagawa <umireon@gmail.com>
+Kishan Kumar <kumar.kishan@outlook.com>
 Lei Xu <eddyxu@gmail.com>
 Matt Clarkson <mattyclarkson@gmail.com>
 Maxim Vafin <maxvafin@gmail.com>
@@ -31,8 +35,9 @@ Nick Hutchinson <nshutchinson@gmail.com>
 Oleksandr Sochka <sasha.sochka@gmail.com>
 Paul Redmond <paul.redmond@gmail.com>
 Radoslav Yovchev <radoslav.tm@gmail.com>
+Roman Lebedev <lebedev.ri@gmail.com>
 Shuo Chen <chenshuo@chenshuo.com>
+Steinar H. Gunderson <sgunderson@bigfoot.com>
+Yixuan Qiu <yixuanq@gmail.com>
 Yusuke Suzuki <utatane.tea@gmail.com>
-Dirac Research 
 Zbigniew Skowron <zbychs@gmail.com>
-Dominik Czarnota <dominik.b.czarnota@gmail.com>
diff --git a/libcxx/utils/google-benchmark/CMakeLists.txt b/libcxx/utils/google-benchmark/CMakeLists.txt
index 1ba31331944..aa0826760cd 100644
--- a/libcxx/utils/google-benchmark/CMakeLists.txt
+++ b/libcxx/utils/google-benchmark/CMakeLists.txt
@@ -1,9 +1,11 @@
 cmake_minimum_required (VERSION 2.8.12)
+
 project (benchmark)
 
 foreach(p
     CMP0054 # CMake 3.1
     CMP0056 # export EXE_LINKER_FLAGS to try_run
+    CMP0057 # Support no if() IN_LIST operator
     )
   if(POLICY ${p})
     cmake_policy(SET ${p} NEW)
@@ -14,9 +16,19 @@ option(BENCHMARK_ENABLE_TESTING "Enable testing of the benchmark library." ON)
 option(BENCHMARK_ENABLE_EXCEPTIONS "Enable the use of exceptions in the benchmark library." ON)
 option(BENCHMARK_ENABLE_LTO "Enable link time optimisation of the benchmark library." OFF)
 option(BENCHMARK_USE_LIBCXX "Build and test using libc++ as the standard library." OFF)
-option(BENCHMARK_BUILD_32_BITS "Build a 32 bit version of the library" OFF)
+option(BENCHMARK_BUILD_32_BITS "Build a 32 bit version of the library." OFF)
+option(BENCHMARK_ENABLE_INSTALL "Enable installation of benchmark. (Projects embedding benchmark may want to turn this OFF.)" ON)
+
+# Allow unmet dependencies to be met using CMake's ExternalProject mechanics, which
+# may require downloading the source code.
+option(BENCHMARK_DOWNLOAD_DEPENDENCIES "Allow the downloading and in-tree building of unmet dependencies" OFF)
+
+# This option can be used to disable building and running unit tests which depend on gtest
+# in cases where it is not possible to build or find a valid version of gtest.
+option(BENCHMARK_ENABLE_GTEST_TESTS "Enable building the unit tests which depend on gtest" ON)
 
 # Make sure we can import out CMake functions
+list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules")
 list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
 
 # Read the git tags to determine the project version
@@ -94,9 +106,7 @@ else()
   if (NOT BENCHMARK_ENABLE_EXCEPTIONS)
     add_cxx_compiler_flag(-fno-exceptions)
   endif()
-  if (NOT BENCHMARK_USE_LIBCXX)
-    add_cxx_compiler_flag(-Wzero-as-null-pointer-constant)
-  endif()
+
   if (HAVE_CXX_FLAG_FSTRICT_ALIASING)
     if (NOT CMAKE_CXX_COMPILER_ID STREQUAL "Intel") #ICC17u2: Many false positives for Wstrict-aliasing
       add_cxx_compiler_flag(-Wstrict-aliasing)
@@ -130,28 +140,27 @@ else()
       if (GCC_RANLIB)
         set(CMAKE_RANLIB ${GCC_RANLIB})
       endif()
+    elseif("${CMAKE_C_COMPILER_ID}" STREQUAL "Clang")
+      include(llvm-toolchain)
     endif()
   endif()
 
   # Coverage build type
-  set(CMAKE_CXX_FLAGS_COVERAGE "${CMAKE_CXX_FLAGS_DEBUG}" CACHE STRING
-    "Flags used by the C++ compiler during coverage builds."
+  set(BENCHMARK_CXX_FLAGS_COVERAGE "${CMAKE_CXX_FLAGS_DEBUG}"
+    CACHE STRING "Flags used by the C++ compiler during coverage builds."
     FORCE)
-  set(CMAKE_EXE_LINKER_FLAGS_COVERAGE
-    "${CMAKE_EXE_LINKER_FLAGS_DEBUG}" CACHE STRING
-    "Flags used for linking binaries during coverage builds."
+  set(BENCHMARK_EXE_LINKER_FLAGS_COVERAGE "${CMAKE_EXE_LINKER_FLAGS_DEBUG}"
+    CACHE STRING "Flags used for linking binaries during coverage builds."
     FORCE)
-  set(CMAKE_SHARED_LINKER_FLAGS_COVERAGE
-    "${CMAKE_SHARED_LINKER_FLAGS_DEBUG}" CACHE STRING
-    "Flags used by the shared libraries linker during coverage builds."
+  set(BENCHMARK_SHARED_LINKER_FLAGS_COVERAGE "${CMAKE_SHARED_LINKER_FLAGS_DEBUG}"
+    CACHE STRING "Flags used by the shared libraries linker during coverage builds."
     FORCE)
   mark_as_advanced(
-    CMAKE_CXX_FLAGS_COVERAGE
-    CMAKE_EXE_LINKER_FLAGS_COVERAGE
-    CMAKE_SHARED_LINKER_FLAGS_COVERAGE)
+    BENCHMARK_CXX_FLAGS_COVERAGE
+    BENCHMARK_EXE_LINKER_FLAGS_COVERAGE
+    BENCHMARK_SHARED_LINKER_FLAGS_COVERAGE)
   set(CMAKE_BUILD_TYPE "${CMAKE_BUILD_TYPE}" CACHE STRING
-    "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel Coverage."
-    FORCE)
+    "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel Coverage.")
   add_cxx_compiler_flag(--coverage COVERAGE)
 endif()
 
@@ -197,5 +206,8 @@ add_subdirectory(src)
 
 if (BENCHMARK_ENABLE_TESTING)
   enable_testing()
+  if (BENCHMARK_ENABLE_GTEST_TESTS)
+    include(HandleGTest)
+  endif()
   add_subdirectory(test)
 endif()
diff --git a/libcxx/utils/google-benchmark/CONTRIBUTORS b/libcxx/utils/google-benchmark/CONTRIBUTORS
index 8ca4565aa74..c59134b9bd5 100644
--- a/libcxx/utils/google-benchmark/CONTRIBUTORS
+++ b/libcxx/utils/google-benchmark/CONTRIBUTORS
@@ -28,17 +28,20 @@ Billy Robert O'Neal III <billy.oneal@gmail.com> <bion@microsoft.com>
 Chris Kennelly <ckennelly@google.com> <ckennelly@ckennelly.com>
 Christopher Seymour <chris.j.seymour@hotmail.com>
 David Coeurjolly <david.coeurjolly@liris.cnrs.fr>
-Dominic Hamon <dma@stripysock.com>
+Dominic Hamon <dma@stripysock.com> <dominic@google.com>
+Dominik Czarnota <dominik.b.czarnota@gmail.com>
 Eric Fiselier <eric@efcs.ca>
 Eugene Zhuk <eugene.zhuk@gmail.com>
 Evgeny Safronov <division494@gmail.com>
 Felix Homann <linuxaudio@showlabor.de>
 Ismael Jimenez Martinez <ismael.jimenez.martinez@gmail.com>
-Joao Paulo Magalhaes <joaoppmagalhaes@gmail.com>
+Jern-Kuan Leong <jernkuan@gmail.com>
 JianXiong Zhou <zhoujianxiong2@gmail.com>
+Joao Paulo Magalhaes <joaoppmagalhaes@gmail.com>
 Jussi Knuuttila <jussi.knuuttila@gmail.com>
-Kaito Udagawa <umireon@gmail.com>
 Kai Wolf <kai.wolf@gmail.com>
+Kishan Kumar <kumar.kishan@outlook.com>
+Kaito Udagawa <umireon@gmail.com>
 Lei Xu <eddyxu@gmail.com>
 Matt Clarkson <mattyclarkson@gmail.com>
 Maxim Vafin <maxvafin@gmail.com>
@@ -48,9 +51,12 @@ Pascal Leroy <phl@google.com>
 Paul Redmond <paul.redmond@gmail.com>
 Pierre Phaneuf <pphaneuf@google.com>
 Radoslav Yovchev <radoslav.tm@gmail.com>
+Raul Marin <rmrodriguez@cartodb.com>
 Ray Glover <ray.glover@uk.ibm.com>
+Roman Lebedev <lebedev.ri@gmail.com>
 Shuo Chen <chenshuo@chenshuo.com>
-Yusuke Suzuki <utatane.tea@gmail.com>
 Tobias Ulvgård <tobias.ulvgard@dirac.se>
+Tom Madams <tom.ej.madams@gmail.com> <tmadams@google.com>
+Yixuan Qiu <yixuanq@gmail.com>
+Yusuke Suzuki <utatane.tea@gmail.com>
 Zbigniew Skowron <zbychs@gmail.com>
-Dominik Czarnota <dominik.b.czarnota@gmail.com>
diff --git a/libcxx/utils/google-benchmark/README.md b/libcxx/utils/google-benchmark/README.md
index f16a9d7906a..6bd81e701c7 100644
--- a/libcxx/utils/google-benchmark/README.md
+++ b/libcxx/utils/google-benchmark/README.md
@@ -2,6 +2,7 @@
 [![Build Status](https://travis-ci.org/google/benchmark.svg?branch=master)](https://travis-ci.org/google/benchmark)
 [![Build status](https://ci.appveyor.com/api/projects/status/u0qsyp7t1tk7cpxs/branch/master?svg=true)](https://ci.appveyor.com/project/google/benchmark/branch/master)
 [![Coverage Status](https://coveralls.io/repos/google/benchmark/badge.svg)](https://coveralls.io/r/google/benchmark)
+[![slackin](https://slackin-iqtfqnpzxd.now.sh/badge.svg)](https://slackin-iqtfqnpzxd.now.sh/)
 
 A library to support the benchmarking of functions, similar to unit-tests.
 
@@ -13,13 +14,88 @@ IRC channel: https://freenode.net #googlebenchmark
 
 [Additional Tooling Documentation](docs/tools.md)
 
+
+## Building
+
+The basic steps for configuring and building the library look like this:
+
+```bash
+$ git clone https://github.com/google/benchmark.git
+# Benchmark requires GTest as a dependency. Add the source tree as a subdirectory.
+$ git clone https://github.com/google/googletest.git benchmark/googletest
+$ mkdir build && cd build
+$ cmake -G <generator> [options] ../benchmark
+# Assuming a makefile generator was used
+$ make
+```
+
+Note that Google Benchmark requires GTest to build and run the tests. This
+dependency can be provided three ways:
+
+* Checkout the GTest sources into `benchmark/googletest`.
+* Otherwise, if `-DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON` is specified during
+  configuration, the library will automatically download and build any required
+  dependencies.
+* Otherwise, if nothing is done, CMake will use `find_package(GTest REQUIRED)`
+  to resolve the required GTest dependency.
+
+If you do not wish to build and run the tests, add `-DBENCHMARK_ENABLE_GTEST_TESTS=OFF`
+to `CMAKE_ARGS`.
+
+
+## Installation Guide
+
+For Ubuntu and Debian Based System
+
+First make sure you have git and cmake installed (If not please install it)
+
+```
+sudo apt-get install git
+sudo apt-get install cmake
+```
+
+Now, let's clone the repository and build it
+
+```
+git clone https://github.com/google/benchmark.git
+cd benchmark
+mkdir build
+cd build
+cmake .. -DCMAKE_BUILD_TYPE=RELEASE
+make
+```
+
+We need to install the library globally now
+
+```
+sudo make install
+```
+
+Now you have google/benchmark installed in your machine 
+Note: Don't forget to link to pthread library while building
+
+## Stable and Experimental Library Versions
+
+The main branch contains the latest stable version of the benchmarking library;
+the API of which can be considered largely stable, with source breaking changes
+being made only upon the release of a new major version.
+
+Newer, experimental, features are implemented and tested on the
+[`v2` branch](https://github.com/google/benchmark/tree/v2). Users who wish
+to use, test, and provide feedback on the new features are encouraged to try
+this branch. However, this branch provides no stability guarantees and reserves
+the right to change and break the API at any time.
+
+
 ## Example usage
 ### Basic usage
 Define a function that executes the code to be measured.
 
 ```c++
+#include <benchmark/benchmark.h>
+
 static void BM_StringCreation(benchmark::State& state) {
-  while (state.KeepRunning())
+  for (auto _ : state)
     std::string empty_string;
 }
 // Register the function as a benchmark
@@ -28,7 +104,7 @@ BENCHMARK(BM_StringCreation);
 // Define another benchmark
 static void BM_StringCopy(benchmark::State& state) {
   std::string x = "hello";
-  while (state.KeepRunning())
+  for (auto _ : state)
     std::string copy(x);
 }
 BENCHMARK(BM_StringCopy);
@@ -36,6 +112,10 @@ BENCHMARK(BM_StringCopy);
 BENCHMARK_MAIN();
 ```
 
+Don't forget to inform your linker to add benchmark library e.g. through `-lbenchmark` compilation flag.
+
+The benchmark library will reporting the timing for the code within the `for(...)` loop.
+
 ### Passing arguments
 Sometimes a family of benchmarks can be implemented with just one routine that
 takes an extra argument to specify which one of the family of benchmarks to
@@ -47,7 +127,7 @@ static void BM_memcpy(benchmark::State& state) {
   char* src = new char[state.range(0)];
   char* dst = new char[state.range(0)];
   memset(src, 'x', state.range(0));
-  while (state.KeepRunning())
+  for (auto _ : state)
     memcpy(dst, src, state.range(0));
   state.SetBytesProcessed(int64_t(state.iterations()) *
                           int64_t(state.range(0)));
@@ -80,22 +160,23 @@ insertion.
 
 ```c++
 static void BM_SetInsert(benchmark::State& state) {
-  while (state.KeepRunning()) {
+  std::set<int> data;
+  for (auto _ : state) {
     state.PauseTiming();
-    std::set<int> data = ConstructRandomSet(state.range(0));
+    data = ConstructRandomSet(state.range(0));
     state.ResumeTiming();
     for (int j = 0; j < state.range(1); ++j)
       data.insert(RandomNumber());
   }
 }
 BENCHMARK(BM_SetInsert)
-    ->Args({1<<10, 1})
-    ->Args({1<<10, 8})
-    ->Args({1<<10, 64})
+    ->Args({1<<10, 128})
+    ->Args({2<<10, 128})
+    ->Args({4<<10, 128})
+    ->Args({8<<10, 128})
     ->Args({1<<10, 512})
-    ->Args({8<<10, 1})
-    ->Args({8<<10, 8})
-    ->Args({8<<10, 64})
+    ->Args({2<<10, 512})
+    ->Args({4<<10, 512})
     ->Args({8<<10, 512});
 ```
 
@@ -105,7 +186,7 @@ product of the two specified ranges and will generate a benchmark for each such
 pair.
 
 ```c++
-BENCHMARK(BM_SetInsert)->Ranges({{1<<10, 8<<10}, {1, 512}});
+BENCHMARK(BM_SetInsert)->Ranges({{1<<10, 8<<10}, {128, 512}});
 ```
 
 For more complex patterns of inputs, passing a custom function to `Apply` allows
@@ -131,7 +212,7 @@ running time and the normalized root-mean square error of string comparison.
 static void BM_StringCompare(benchmark::State& state) {
   std::string s1(state.range(0), '-');
   std::string s2(state.range(0), '-');
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
     benchmark::DoNotOptimize(s1.compare(s2));
   }
   state.SetComplexityN(state.range(0));
@@ -165,7 +246,7 @@ absence of multiprogramming.
 template <class Q> int BM_Sequential(benchmark::State& state) {
   Q q;
   typename Q::value_type v;
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
     for (int i = state.range(0); i--; )
       q.push(v);
     for (int e = state.range(0); e--; )
@@ -181,7 +262,7 @@ BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue<int>)->Range(1<<0, 1<<10);
 Three macros are provided for adding benchmark templates.
 
 ```c++
-#if __cplusplus >= 201103L // C++11 and greater.
+#ifdef BENCHMARK_HAS_CXX11
 #define BENCHMARK_TEMPLATE(func, ...) // Takes any number of parameters.
 #else // C++ < C++11
 #define BENCHMARK_TEMPLATE(func, arg1)
@@ -190,6 +271,62 @@ Three macros are provided for adding benchmark templates.
 #define BENCHMARK_TEMPLATE2(func, arg1, arg2)
 ```
 
+### A Faster KeepRunning loop
+
+In C++11 mode, a ranged-based for loop should be used in preference to
+the `KeepRunning` loop for running the benchmarks. For example:
+
+```c++
+static void BM_Fast(benchmark::State &state) {
+  for (auto _ : state) {
+    FastOperation();
+  }
+}
+BENCHMARK(BM_Fast);
+```
+
+The reason the ranged-for loop is faster than using `KeepRunning`, is
+because `KeepRunning` requires a memory load and store of the iteration count
+ever iteration, whereas the ranged-for variant is able to keep the iteration count
+in a register.
+
+For example, an empty inner loop of using the ranged-based for method looks like:
+
+```asm
+# Loop Init
+  mov rbx, qword ptr [r14 + 104]
+  call benchmark::State::StartKeepRunning()
+  test rbx, rbx
+  je .LoopEnd
+.LoopHeader: # =>This Inner Loop Header: Depth=1
+  add rbx, -1
+  jne .LoopHeader
+.LoopEnd:
+```
+
+Compared to an empty `KeepRunning` loop, which looks like:
+
+```asm
+.LoopHeader: # in Loop: Header=BB0_3 Depth=1
+  cmp byte ptr [rbx], 1
+  jne .LoopInit
+.LoopBody: # =>This Inner Loop Header: Depth=1
+  mov rax, qword ptr [rbx + 8]
+  lea rcx, [rax + 1]
+  mov qword ptr [rbx + 8], rcx
+  cmp rax, qword ptr [rbx + 104]
+  jb .LoopHeader
+  jmp .LoopEnd
+.LoopInit:
+  mov rdi, rbx
+  call benchmark::State::StartKeepRunning()
+  jmp .LoopBody
+.LoopEnd:
+```
+
+Unless C++03 compatibility is required, the ranged-for variant of writing
+the benchmark loop should be preferred.  
+
 ## Passing arbitrary arguments to a benchmark
 In C++11 it is possible to define a benchmark that takes an arbitrary number
 of extra arguments. The `BENCHMARK_CAPTURE(func, test_case_name, ...args)`
@@ -199,11 +336,11 @@ The `test_case_name` is appended to the name of the benchmark and
 should describe the values passed.
 
 ```c++
-template <class ...ExtraArgs>`
+template <class ...ExtraArgs>
 void BM_takes_args(benchmark::State& state, ExtraArgs&&... extra_args) {
   [...]
 }
-// Registers a benchmark named "BM_takes_args/int_string_test` that passes
+// Registers a benchmark named "BM_takes_args/int_string_test" that passes
 // the specified values to `extra_args`.
 BENCHMARK_CAPTURE(BM_takes_args, int_string_test, 42, std::string("abc"));
 ```
@@ -223,8 +360,7 @@ scope, the `RegisterBenchmark` can be called anywhere. This allows for
 benchmark tests to be registered programmatically.
 
 Additionally `RegisterBenchmark` allows any callable object to be registered
-as a benchmark. Including capturing lambdas and function objects. This
-allows the creation
+as a benchmark. Including capturing lambdas and function objects.
 
 For Example:
 ```c++
@@ -240,9 +376,10 @@ int main(int argc, char** argv) {
 
 ### Multithreaded benchmarks
 In a multithreaded test (benchmark invoked by multiple threads simultaneously),
-it is guaranteed that none of the threads will start until all have called
-`KeepRunning`, and all will have finished before KeepRunning returns false. As
-such, any global setup or teardown can be wrapped in a check against the thread
+it is guaranteed that none of the threads will start until all have reached
+the start of the benchmark loop, and all will have finished before any thread
+exits the benchmark loop. (This behavior is also provided by the `KeepRunning()`
+API) As such, any global setup or teardown can be wrapped in a check against the thread
 index:
 
 ```c++
@@ -250,7 +387,7 @@ static void BM_MultiThreaded(benchmark::State& state) {
   if (state.thread_index == 0) {
     // Setup code here.
   }
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
     // Run the test as normal.
   }
   if (state.thread_index == 0) {
@@ -274,10 +411,10 @@ Without `UseRealTime`, CPU time is used by default.
 ## Manual timing
 For benchmarking something for which neither CPU time nor real-time are
 correct or accurate enough, completely manual timing is supported using
-the `UseManualTime` function. 
+the `UseManualTime` function.
 
 When `UseManualTime` is used, the benchmarked code must call
-`SetIterationTime` once per iteration of the `KeepRunning` loop to
+`SetIterationTime` once per iteration of the benchmark loop to
 report the manually measured time.
 
 An example use case for this is benchmarking GPU execution (e.g. OpenCL
@@ -293,7 +430,7 @@ static void BM_ManualTiming(benchmark::State& state) {
     static_cast<double>(microseconds)
   };
 
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
     auto start = std::chrono::high_resolution_clock::now();
     // Simulate some useful workload with a sleep
     std::this_thread::sleep_for(sleep_duration);
@@ -316,7 +453,7 @@ functions can be used.
 
 ```c++
 static void BM_test(benchmark::State& state) {
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
       int x = 0;
       for (int i=0; i < 64; ++i) {
         benchmark::DoNotOptimize(x += i);
@@ -355,7 +492,7 @@ away.
 
 ```c++
 static void BM_vector_push_back(benchmark::State& state) {
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
     std::vector<int> v;
     v.reserve(1);
     benchmark::DoNotOptimize(v.data()); // Allow v.data() to be clobbered.
@@ -384,7 +521,7 @@ the minimum time, or the wallclock time is 5x minimum time. The minimum time is
 set as a flag `--benchmark_min_time` or per benchmark by calling `MinTime` on
 the registered benchmark object.
 
-## Reporting the mean and standard devation by repeated benchmarks
+## Reporting the mean, median and standard deviation by repeated benchmarks
 By default each benchmark is run once and that single result is reported.
 However benchmarks are often noisy and a single result may not be representative
 of the overall behavior. For this reason it's possible to repeatedly rerun the
@@ -392,19 +529,42 @@ benchmark.
 
 The number of runs of each benchmark is specified globally by the
 `--benchmark_repetitions` flag or on a per benchmark basis by calling
-`Repetitions` on the registered benchmark object. When a benchmark is run
-more than once the mean and standard deviation of the runs will be reported.
+`Repetitions` on the registered benchmark object. When a benchmark is run more
+than once the mean, median and standard deviation of the runs will be reported.
 
 Additionally the `--benchmark_report_aggregates_only={true|false}` flag or
 `ReportAggregatesOnly(bool)` function can be used to change how repeated tests
 are reported. By default the result of each repeated run is reported. When this
-option is 'true' only the mean and standard deviation of the runs is reported.
+option is `true` only the mean, median and standard deviation of the runs is reported.
 Calling `ReportAggregatesOnly(bool)` on a registered benchmark object overrides
 the value of the flag for that benchmark.
 
+## User-defined statistics for repeated benchmarks
+While having mean, median and standard deviation is nice, this may not be
+enough for everyone. For example you may want to know what is the largest
+observation, e.g. because you have some real-time constraints. This is easy.
+The following code will specify a custom statistic to be calculated, defined
+by a lambda function.
+
+```c++
+void BM_spin_empty(benchmark::State& state) {
+  for (auto _ : state) {
+    for (int x = 0; x < state.range(0); ++x) {
+      benchmark::DoNotOptimize(x);
+    }
+  }
+}
+
+BENCHMARK(BM_spin_empty)
+  ->ComputeStatistics("max", [](const std::vector<double>& v) -> double {
+    return *(std::max_element(std::begin(v), std::end(v)));
+  })
+  ->Arg(512);
+```
+
 ## Fixtures
 Fixture tests are created by
-first defining a type that derives from ::benchmark::Fixture and then
+first defining a type that derives from `::benchmark::Fixture` and then
 creating/registering the tests using the following macros:
 
 * `BENCHMARK_F(ClassName, Method)`
@@ -417,13 +577,13 @@ For Example:
 class MyFixture : public benchmark::Fixture {};
 
 BENCHMARK_F(MyFixture, FooTest)(benchmark::State& st) {
-   while (st.KeepRunning()) {
+   for (auto _ : st) {
      ...
   }
 }
 
 BENCHMARK_DEFINE_F(MyFixture, BarTest)(benchmark::State& st) {
-   while (st.KeepRunning()) {
+   for (auto _ : st) {
      ...
   }
 }
@@ -432,6 +592,31 @@ BENCHMARK_REGISTER_F(MyFixture, BarTest)->Threads(2);
 /* BarTest is now registered */
 ```
 
+### Templated fixtures
+Also you can create templated fixture by using the following macros:
+
+* `BENCHMARK_TEMPLATE_F(ClassName, Method, ...)`
+* `BENCHMARK_TEMPLATE_DEFINE_F(ClassName, Method, ...)`
+
+For example:
+```c++
+template<typename T>
+class MyFixture : public benchmark::Fixture {};
+
+BENCHMARK_TEMPLATE_F(MyFixture, IntTest, int)(benchmark::State& st) {
+   for (auto _ : st) {
+     ...
+  }
+}
+
+BENCHMARK_TEMPLATE_DEFINE_F(MyFixture, DoubleTest, double)(benchmark::State& st) {
+   for (auto _ : st) {
+     ...
+  }
+}
+
+BENCHMARK_REGISTER_F(MyFixture, DoubleTest)->Threads(2);
+```
 
 ## User-defined counters
 
@@ -441,7 +626,7 @@ will add columns "Foo", "Bar" and "Baz" in its output:
 ```c++
 static void UserCountersExample1(benchmark::State& state) {
   double numFoos = 0, numBars = 0, numBazs = 0;
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
     // ... count Foo,Bar,Baz events
   }
   state.counters["Foo"] = numFoos;
@@ -491,17 +676,85 @@ When you're compiling in C++11 mode or later you can use `insert()` with
   state.counters["Baz"] = numBazs;
 ```
 
+### Counter reporting
+
+When using the console reporter, by default, user counters are are printed at
+the end after the table, the same way as ``bytes_processed`` and
+``items_processed``. This is best for cases in which there are few counters,
+or where there are only a couple of lines per benchmark. Here's an example of
+the default output:
+
+```
+------------------------------------------------------------------------------
+Benchmark                        Time           CPU Iterations UserCounters...
+------------------------------------------------------------------------------
+BM_UserCounter/threads:8      2248 ns      10277 ns      68808 Bar=16 Bat=40 Baz=24 Foo=8
+BM_UserCounter/threads:1      9797 ns       9788 ns      71523 Bar=2 Bat=5 Baz=3 Foo=1024m
+BM_UserCounter/threads:2      4924 ns       9842 ns      71036 Bar=4 Bat=10 Baz=6 Foo=2
+BM_UserCounter/threads:4      2589 ns      10284 ns      68012 Bar=8 Bat=20 Baz=12 Foo=4
+BM_UserCounter/threads:8      2212 ns      10287 ns      68040 Bar=16 Bat=40 Baz=24 Foo=8
+BM_UserCounter/threads:16     1782 ns      10278 ns      68144 Bar=32 Bat=80 Baz=48 Foo=16
+BM_UserCounter/threads:32     1291 ns      10296 ns      68256 Bar=64 Bat=160 Baz=96 Foo=32
+BM_UserCounter/threads:4      2615 ns      10307 ns      68040 Bar=8 Bat=20 Baz=12 Foo=4
+BM_Factorial                    26 ns         26 ns   26608979 40320
+BM_Factorial/real_time          26 ns         26 ns   26587936 40320
+BM_CalculatePiRange/1           16 ns         16 ns   45704255 0
+BM_CalculatePiRange/8           73 ns         73 ns    9520927 3.28374
+BM_CalculatePiRange/64         609 ns        609 ns    1140647 3.15746
+BM_CalculatePiRange/512       4900 ns       4901 ns     142696 3.14355
+```
+
+If this doesn't suit you, you can print each counter as a table column by
+passing the flag `--benchmark_counters_tabular=true` to the benchmark
+application. This is best for cases in which there are a lot of counters, or
+a lot of lines per individual benchmark. Note that this will trigger a
+reprinting of the table header any time the counter set changes between
+individual benchmarks. Here's an example of corresponding output when
+`--benchmark_counters_tabular=true` is passed:
+
+```
+---------------------------------------------------------------------------------------
+Benchmark                        Time           CPU Iterations    Bar   Bat   Baz   Foo
+---------------------------------------------------------------------------------------
+BM_UserCounter/threads:8      2198 ns       9953 ns      70688     16    40    24     8
+BM_UserCounter/threads:1      9504 ns       9504 ns      73787      2     5     3     1
+BM_UserCounter/threads:2      4775 ns       9550 ns      72606      4    10     6     2
+BM_UserCounter/threads:4      2508 ns       9951 ns      70332      8    20    12     4
+BM_UserCounter/threads:8      2055 ns       9933 ns      70344     16    40    24     8
+BM_UserCounter/threads:16     1610 ns       9946 ns      70720     32    80    48    16
+BM_UserCounter/threads:32     1192 ns       9948 ns      70496     64   160    96    32
+BM_UserCounter/threads:4      2506 ns       9949 ns      70332      8    20    12     4
+--------------------------------------------------------------
+Benchmark                        Time           CPU Iterations
+--------------------------------------------------------------
+BM_Factorial                    26 ns         26 ns   26392245 40320
+BM_Factorial/real_time          26 ns         26 ns   26494107 40320
+BM_CalculatePiRange/1           15 ns         15 ns   45571597 0
+BM_CalculatePiRange/8           74 ns         74 ns    9450212 3.28374
+BM_CalculatePiRange/64         595 ns        595 ns    1173901 3.15746
+BM_CalculatePiRange/512       4752 ns       4752 ns     147380 3.14355
+BM_CalculatePiRange/4k       37970 ns      37972 ns      18453 3.14184
+BM_CalculatePiRange/32k     303733 ns     303744 ns       2305 3.14162
+BM_CalculatePiRange/256k   2434095 ns    2434186 ns        288 3.1416
+BM_CalculatePiRange/1024k  9721140 ns    9721413 ns         71 3.14159
+BM_CalculatePi/threads:8      2255 ns       9943 ns      70936
+```
+Note above the additional header printed when the benchmark changes from
+``BM_UserCounter`` to ``BM_Factorial``. This is because ``BM_Factorial`` does
+not have the same counter set as ``BM_UserCounter``.
+
 ## Exiting Benchmarks in Error
 
 When errors caused by external influences, such as file I/O and network
 communication, occur within a benchmark the
 `State::SkipWithError(const char* msg)` function can be used to skip that run
 of benchmark and report the error. Note that only future iterations of the
-`KeepRunning()` are skipped. Users may explicitly return to exit the
-benchmark immediately.
+`KeepRunning()` are skipped. For the ranged-for version of the benchmark loop
+Users must explicitly exit the loop, otherwise all iterations will be performed.
+Users may explicitly return to exit the benchmark immediately.
 
 The `SkipWithError(...)` function may be used at any point within the benchmark,
-including before and after the `KeepRunning()` loop.
+including before and after the benchmark loop.
 
 For example:
 
@@ -512,7 +765,7 @@ static void BM_test(benchmark::State& state) {
       state.SkipWithError("Resource is not good!");
       // KeepRunning() loop will not be entered.
   }
-  while (state.KeepRunning()) {
+  for (state.KeepRunning()) {
       auto data = resource.read_data();
       if (!resource.good()) {
         state.SkipWithError("Failed to read data!");
@@ -521,6 +774,14 @@ static void BM_test(benchmark::State& state) {
      do_stuff(data);
   }
 }
+
+static void BM_test_ranged_fo(benchmark::State & state) {
+  state.SkipWithError("test will not be entered");
+  for (auto _ : state) {
+    state.SkipWithError("Failed!");
+    break; // REQUIRED to prevent all further iterations.
+  }
+}
 ```
 
 ## Running a subset of the benchmarks
@@ -547,7 +808,7 @@ The library supports multiple output formats. Use the
 is the default format.
 
 The Console format is intended to be a human readable format. By default
-the format generates color output. Context is output on stderr and the 
+the format generates color output. Context is output on stderr and the
 tabular data on stdout. Example tabular output looks like:
 ```
 Benchmark                               Time(ns)    CPU(ns) Iterations
@@ -628,6 +889,9 @@ To enable link-time optimisation, use
 cmake -DCMAKE_BUILD_TYPE=Release -DBENCHMARK_ENABLE_LTO=true
 ```
 
+If you are using gcc, you might need to set `GCC_AR` and `GCC_RANLIB` cmake cache variables, if autodetection fails.
+If you are using clang, you may need to set `LLVMAR_EXECUTABLE`, `LLVMNM_EXECUTABLE` and `LLVMRANLIB_EXECUTABLE` cmake cache variables.
+
 ## Linking against the library
 When using gcc, it is necessary to link against pthread to avoid runtime exceptions.
 This is due to how gcc implements std::thread.
@@ -650,6 +914,18 @@ Anything older *may* work.
 Note: Using the library and its headers in C++03 is supported. C++11 is only
 required to build the library.
 
+## Disable CPU frequency scaling
+If you see this error:
+```
+***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
+```
+you might want to disable the CPU frequency scaling while running the benchmark:
+```bash
+sudo cpupower frequency-set --governor performance
+./mybench
+sudo cpupower frequency-set --governor powersave
+```
+
 # Known Issues
 
 ### Windows
diff --git a/libcxx/utils/google-benchmark/cmake/AddCXXCompilerFlag.cmake b/libcxx/utils/google-benchmark/cmake/AddCXXCompilerFlag.cmake
index 0b176ba27f1..17d5f3dcc37 100644
--- a/libcxx/utils/google-benchmark/cmake/AddCXXCompilerFlag.cmake
+++ b/libcxx/utils/google-benchmark/cmake/AddCXXCompilerFlag.cmake
@@ -38,7 +38,7 @@ function(add_cxx_compiler_flag FLAG)
     if(ARGV1)
       string(TOUPPER "_${VARIANT}" VARIANT)
     endif()
-    set(CMAKE_CXX_FLAGS${VARIANT} "${CMAKE_CXX_FLAGS${VARIANT}} ${FLAG}" PARENT_SCOPE)
+    set(CMAKE_CXX_FLAGS${VARIANT} "${CMAKE_CXX_FLAGS${VARIANT}} ${BENCHMARK_CXX_FLAGS${VARIANT}} ${FLAG}" PARENT_SCOPE)
   endif()
 endfunction()
 
diff --git a/libcxx/utils/google-benchmark/cmake/CXXFeatureCheck.cmake b/libcxx/utils/google-benchmark/cmake/CXXFeatureCheck.cmake
index 6efe6a84ee4..b2a82171577 100644
--- a/libcxx/utils/google-benchmark/cmake/CXXFeatureCheck.cmake
+++ b/libcxx/utils/google-benchmark/cmake/CXXFeatureCheck.cmake
@@ -22,16 +22,35 @@ function(cxx_feature_check FILE)
   string(TOUPPER ${FILE} VAR)
   string(TOUPPER "HAVE_${VAR}" FEATURE)
   if (DEFINED HAVE_${VAR})
+    set(HAVE_${VAR} 1 PARENT_SCOPE)
+    add_definitions(-DHAVE_${VAR})
     return()
   endif()
+
   message("-- Performing Test ${FEATURE}")
-  try_run(RUN_${FEATURE} COMPILE_${FEATURE}
-          ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/cmake/${FILE}.cpp
-          CMAKE_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS}
-          LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES})
+  if(CMAKE_CROSSCOMPILING)
+    try_compile(COMPILE_${FEATURE}
+            ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/cmake/${FILE}.cpp
+            CMAKE_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS}
+            LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES})
+    if(COMPILE_${FEATURE})
+      message(WARNING
+            "If you see build failures due to cross compilation, try setting HAVE_${VAR} to 0")
+      set(RUN_${FEATURE} 0)
+    else()
+      set(RUN_${FEATURE} 1)
+    endif()
+  else()
+    message("-- Performing Test ${FEATURE}")
+    try_run(RUN_${FEATURE} COMPILE_${FEATURE}
+            ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/cmake/${FILE}.cpp
+            CMAKE_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS}
+            LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES})
+  endif()
+
   if(RUN_${FEATURE} EQUAL 0)
     message("-- Performing Test ${FEATURE} -- success")
-    set(HAVE_${VAR} 1 CACHE INTERNAL "Feature test for ${FILE}" PARENT_SCOPE)
+    set(HAVE_${VAR} 1 PARENT_SCOPE)
     add_definitions(-DHAVE_${VAR})
   else()
     if(NOT COMPILE_${FEATURE})
@@ -41,4 +60,3 @@ function(cxx_feature_check FILE)
     endif()
   endif()
 endfunction()
-
diff --git a/libcxx/utils/google-benchmark/cmake/HandleGTest.cmake b/libcxx/utils/google-benchmark/cmake/HandleGTest.cmake
new file mode 100644
index 00000000000..77ffc4c51c9
--- /dev/null
+++ b/libcxx/utils/google-benchmark/cmake/HandleGTest.cmake
@@ -0,0 +1,79 @@
+
+macro(split_list listname)
+  string(REPLACE ";" " " ${listname} "${${listname}}")
+endmacro()
+
+macro(build_external_gtest)
+  include(ExternalProject)
+  set(GTEST_FLAGS "")
+  if (BENCHMARK_USE_LIBCXX)
+    if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
+      list(APPEND GTEST_FLAGS -stdlib=libc++)
+    else()
+      message(WARNING "Unsupported compiler (${CMAKE_CXX_COMPILER}) when using libc++")
+    endif()
+  endif()
+  if (BENCHMARK_BUILD_32_BITS)
+    list(APPEND GTEST_FLAGS -m32)
+  endif()
+  if (NOT "${CMAKE_CXX_FLAGS}" STREQUAL "")
+    list(APPEND GTEST_FLAGS ${CMAKE_CXX_FLAGS})
+  endif()
+  string(TOUPPER "${CMAKE_BUILD_TYPE}" GTEST_BUILD_TYPE)
+  if ("${GTEST_BUILD_TYPE}" STREQUAL "COVERAGE")
+    set(GTEST_BUILD_TYPE "DEBUG")
+  endif()
+  split_list(GTEST_FLAGS)
+  ExternalProject_Add(googletest
+      EXCLUDE_FROM_ALL ON
+      GIT_REPOSITORY https://github.com/google/googletest.git
+      GIT_TAG master
+      PREFIX "${CMAKE_BINARY_DIR}/googletest"
+      INSTALL_DIR "${CMAKE_BINARY_DIR}/googletest"
+      CMAKE_CACHE_ARGS
+        -DCMAKE_BUILD_TYPE:STRING=${GTEST_BUILD_TYPE}
+        -DCMAKE_C_COMPILER:STRING=${CMAKE_C_COMPILER}
+        -DCMAKE_CXX_COMPILER:STRING=${CMAKE_CXX_COMPILER}
+        -DCMAKE_INSTALL_PREFIX:PATH=<INSTALL_DIR>
+        -DCMAKE_CXX_FLAGS:STRING=${GTEST_FLAGS}
+        -Dgtest_force_shared_crt:BOOL=ON
+      )
+
+  ExternalProject_Get_Property(googletest install_dir)
+
+  add_library(gtest UNKNOWN IMPORTED)
+  add_library(gtest_main UNKNOWN IMPORTED)
+
+  set(LIB_SUFFIX "${CMAKE_STATIC_LIBRARY_SUFFIX}")
+  set(LIB_PREFIX "${CMAKE_STATIC_LIBRARY_PREFIX}")
+
+  if("${GTEST_BUILD_TYPE}" STREQUAL "DEBUG")
+    set(LIB_SUFFIX "d${CMAKE_STATIC_LIBRARY_SUFFIX}")
+  endif()
+  file(MAKE_DIRECTORY ${install_dir}/include)
+  set_target_properties(gtest PROPERTIES
+    IMPORTED_LOCATION ${install_dir}/lib/${LIB_PREFIX}gtest${LIB_SUFFIX}
+    INTERFACE_INCLUDE_DIRECTORIES ${install_dir}/include
+  )
+  set_target_properties(gtest_main PROPERTIES
+    IMPORTED_LOCATION ${install_dir}/lib/${LIB_PREFIX}gtest_main${LIB_SUFFIX}
+    INTERFACE_INCLUDE_DIRECTORIES ${install_dir}/include
+  )
+  add_dependencies(gtest googletest)
+  add_dependencies(gtest_main googletest)
+  set(GTEST_BOTH_LIBRARIES gtest gtest_main)
+  #set(GTEST_INCLUDE_DIRS ${install_dir}/include)
+endmacro(build_external_gtest)
+
+if (BENCHMARK_ENABLE_GTEST_TESTS)
+  if (IS_DIRECTORY ${CMAKE_SOURCE_DIR}/googletest)
+    set(INSTALL_GTEST OFF CACHE INTERNAL "")
+    set(INSTALL_GMOCK OFF CACHE INTERNAL "")
+    add_subdirectory(${CMAKE_SOURCE_DIR}/googletest)
+    set(GTEST_BOTH_LIBRARIES gtest gtest_main)
+  elseif(BENCHMARK_DOWNLOAD_DEPENDENCIES)
+    build_external_gtest()
+  else()
+    find_package(GTest REQUIRED)
+  endif()
+endif()
diff --git a/libcxx/utils/google-benchmark/cmake/benchmark.pc.in b/libcxx/utils/google-benchmark/cmake/benchmark.pc.in
new file mode 100644
index 00000000000..1e84bff68d8
--- /dev/null
+++ b/libcxx/utils/google-benchmark/cmake/benchmark.pc.in
@@ -0,0 +1,11 @@
+prefix=@CMAKE_INSTALL_PREFIX@
+exec_prefix=${prefix}
+libdir=${prefix}/lib
+includedir=${prefix}/include
+
+Name: @PROJECT_NAME@
+Description: Google microbenchmark framework
+Version: @VERSION@
+
+Libs: -L${libdir} -lbenchmark
+Cflags: -I${includedir}
diff --git a/libcxx/utils/google-benchmark/cmake/llvm-toolchain.cmake b/libcxx/utils/google-benchmark/cmake/llvm-toolchain.cmake
new file mode 100644
index 00000000000..fc119e52fd2
--- /dev/null
+++ b/libcxx/utils/google-benchmark/cmake/llvm-toolchain.cmake
@@ -0,0 +1,8 @@
+find_package(LLVMAr REQUIRED)
+set(CMAKE_AR "${LLVMAR_EXECUTABLE}" CACHE FILEPATH "" FORCE)
+
+find_package(LLVMNm REQUIRED)
+set(CMAKE_NM "${LLVMNM_EXECUTABLE}" CACHE FILEPATH "" FORCE)
+
+find_package(LLVMRanLib REQUIRED)
+set(CMAKE_RANLIB "${LLVMRANLIB_EXECUTABLE}" CACHE FILEPATH "" FORCE)
diff --git a/libcxx/utils/google-benchmark/docs/tools.md b/libcxx/utils/google-benchmark/docs/tools.md
index f176f74a48f..70500bd3223 100644
--- a/libcxx/utils/google-benchmark/docs/tools.md
+++ b/libcxx/utils/google-benchmark/docs/tools.md
@@ -11,49 +11,232 @@ $ compare_bench.py <old-benchmark> <new-benchmark> [benchmark options]...
 
 Where `<old-benchmark>` and `<new-benchmark>` either specify a benchmark executable file, or a JSON output file. The type of the input file is automatically detected. If a benchmark executable is specified then the benchmark is run to obtain the results. Otherwise the results are simply loaded from the output file.
 
+`[benchmark options]` will be passed to the benchmarks invocations. They can be anything that binary accepts, be it either normal `--benchmark_*` parameters, or some custom parameters your binary takes.
+
 The sample output using the JSON test files under `Inputs/` gives:
 
 ``` bash
 $ ./compare_bench.py ./gbench/Inputs/test1_run1.json ./gbench/Inputs/test1_run2.json
 Comparing ./gbench/Inputs/test1_run1.json to ./gbench/Inputs/test1_run2.json
-Benchmark                   Time           CPU
-----------------------------------------------
-BM_SameTimes               +0.00         +0.00
-BM_2xFaster                -0.50         -0.50
-BM_2xSlower                +1.00         +1.00
-BM_10PercentFaster         -0.10         -0.10
-BM_10PercentSlower         +0.10         +0.10
+Benchmark                        Time             CPU      Time Old      Time New       CPU Old       CPU New
+-------------------------------------------------------------------------------------------------------------
+BM_SameTimes                  +0.0000         +0.0000            10            10            10            10
+BM_2xFaster                   -0.5000         -0.5000            50            25            50            25
+BM_2xSlower                   +1.0000         +1.0000            50           100            50           100
+BM_1PercentFaster             -0.0100         -0.0100           100            99           100            99
+BM_1PercentSlower             +0.0100         +0.0100           100           101           100           101
+BM_10PercentFaster            -0.1000         -0.1000           100            90           100            90
+BM_10PercentSlower            +0.1000         +0.1000           100           110           100           110
+BM_100xSlower                +99.0000        +99.0000           100         10000           100         10000
+BM_100xFaster                 -0.9900         -0.9900         10000           100         10000           100
+BM_10PercentCPUToTime         +0.1000         -0.1000           100           110           100            90
+BM_ThirdFaster                -0.3333         -0.3334           100            67           100            67
+BM_BadTimeUnit                -0.9000         +0.2000             0             0             0             1
 ```
 
+As you can note, the values in `Time` and `CPU` columns are calculated as `(new - old) / |old|`.
+
 When a benchmark executable is run, the raw output from the benchmark is printed in real time to stdout. The sample output using `benchmark/basic_test` for both arguments looks like:
 
 ```
 ./compare_bench.py  test/basic_test test/basic_test  --benchmark_filter=BM_empty.*
-RUNNING: test/basic_test --benchmark_filter=BM_empty.*
-Run on (4 X 4228.32 MHz CPU s)
-2016-08-02 19:21:33
+RUNNING: test/basic_test --benchmark_filter=BM_empty.* --benchmark_out=/tmp/tmpN7LF3a
+Run on (8 X 4000 MHz CPU s)
+2017-11-07 23:28:36
+---------------------------------------------------------------------
 Benchmark                              Time           CPU Iterations
---------------------------------------------------------------------
-BM_empty                               9 ns          9 ns   79545455
-BM_empty/threads:4                     4 ns          9 ns   75268816
-BM_empty_stop_start                    8 ns          8 ns   83333333
-BM_empty_stop_start/threads:4          3 ns          8 ns   83333332
-RUNNING: test/basic_test --benchmark_filter=BM_empty.*
-Run on (4 X 4228.32 MHz CPU s)
-2016-08-02 19:21:35
+---------------------------------------------------------------------
+BM_empty                               4 ns          4 ns  170178757
+BM_empty/threads:8                     1 ns          7 ns  103868920
+BM_empty_stop_start                    0 ns          0 ns 1000000000
+BM_empty_stop_start/threads:8          0 ns          0 ns 1403031720
+RUNNING: /test/basic_test --benchmark_filter=BM_empty.* --benchmark_out=/tmp/tmplvrIp8
+Run on (8 X 4000 MHz CPU s)
+2017-11-07 23:28:38
+---------------------------------------------------------------------
 Benchmark                              Time           CPU Iterations
---------------------------------------------------------------------
-BM_empty                               9 ns          9 ns   76086957
-BM_empty/threads:4                     4 ns          9 ns   76086956
-BM_empty_stop_start                    8 ns          8 ns   87500000
-BM_empty_stop_start/threads:4          3 ns          8 ns   88607596
-Comparing test/basic_test to test/basic_test
-Benchmark                              Time           CPU
----------------------------------------------------------
-BM_empty                              +0.00         +0.00
-BM_empty/threads:4                    +0.00         +0.00
-BM_empty_stop_start                   +0.00         +0.00
-BM_empty_stop_start/threads:4         +0.00         +0.00
+---------------------------------------------------------------------
+BM_empty                               4 ns          4 ns  169534855
+BM_empty/threads:8                     1 ns          7 ns  104188776
+BM_empty_stop_start                    0 ns          0 ns 1000000000
+BM_empty_stop_start/threads:8          0 ns          0 ns 1404159424
+Comparing ../build/test/basic_test to ../build/test/basic_test
+Benchmark                                Time             CPU      Time Old      Time New       CPU Old       CPU New
+---------------------------------------------------------------------------------------------------------------------
+BM_empty                              -0.0048         -0.0049             4             4             4             4
+BM_empty/threads:8                    -0.0123         -0.0054             1             1             7             7
+BM_empty_stop_start                   -0.0000         -0.0000             0             0             0             0
+BM_empty_stop_start/threads:8         -0.0029         +0.0001             0             0             0             0
+
 ```
 
+As you can note, the values in `Time` and `CPU` columns are calculated as `(new - old) / |old|`.
 Obviously this example doesn't give any useful output, but it's intended to show the output format when 'compare_bench.py' needs to run benchmarks.
+
+## compare.py
+
+The `compare.py` can be used to compare the result of benchmarks.
+There are three modes of operation:
+
+1. Just compare two benchmarks, what `compare_bench.py` did.
+The program is invoked like:
+
+``` bash
+$ compare.py benchmarks <benchmark_baseline> <benchmark_contender> [benchmark options]...
+```
+Where `<benchmark_baseline>` and `<benchmark_contender>` either specify a benchmark executable file, or a JSON output file. The type of the input file is automatically detected. If a benchmark executable is specified then the benchmark is run to obtain the results. Otherwise the results are simply loaded from the output file.
+
+`[benchmark options]` will be passed to the benchmarks invocations. They can be anything that binary accepts, be it either normal `--benchmark_*` parameters, or some custom parameters your binary takes.
+
+Example output:
+```
+$ ./compare.py benchmarks ./a.out ./a.out
+RUNNING: ./a.out --benchmark_out=/tmp/tmprBT5nW
+Run on (8 X 4000 MHz CPU s)
+2017-11-07 21:16:44
+------------------------------------------------------
+Benchmark               Time           CPU Iterations
+------------------------------------------------------
+BM_memcpy/8            36 ns         36 ns   19101577   211.669MB/s
+BM_memcpy/64           76 ns         76 ns    9412571   800.199MB/s
+BM_memcpy/512          84 ns         84 ns    8249070   5.64771GB/s
+BM_memcpy/1024        116 ns        116 ns    6181763   8.19505GB/s
+BM_memcpy/8192        643 ns        643 ns    1062855   11.8636GB/s
+BM_copy/8             222 ns        222 ns    3137987   34.3772MB/s
+BM_copy/64           1608 ns       1608 ns     432758   37.9501MB/s
+BM_copy/512         12589 ns      12589 ns      54806   38.7867MB/s
+BM_copy/1024        25169 ns      25169 ns      27713   38.8003MB/s
+BM_copy/8192       201165 ns     201112 ns       3486   38.8466MB/s
+RUNNING: ./a.out --benchmark_out=/tmp/tmpt1wwG_
+Run on (8 X 4000 MHz CPU s)
+2017-11-07 21:16:53
+------------------------------------------------------
+Benchmark               Time           CPU Iterations
+------------------------------------------------------
+BM_memcpy/8            36 ns         36 ns   19397903   211.255MB/s
+BM_memcpy/64           73 ns         73 ns    9691174   839.635MB/s
+BM_memcpy/512          85 ns         85 ns    8312329   5.60101GB/s
+BM_memcpy/1024        118 ns        118 ns    6438774   8.11608GB/s
+BM_memcpy/8192        656 ns        656 ns    1068644   11.6277GB/s
+BM_copy/8             223 ns        223 ns    3146977   34.2338MB/s
+BM_copy/64           1611 ns       1611 ns     435340   37.8751MB/s
+BM_copy/512         12622 ns      12622 ns      54818   38.6844MB/s
+BM_copy/1024        25257 ns      25239 ns      27779   38.6927MB/s
+BM_copy/8192       205013 ns     205010 ns       3479    38.108MB/s
+Comparing ./a.out to ./a.out
+Benchmark                 Time             CPU      Time Old      Time New       CPU Old       CPU New
+------------------------------------------------------------------------------------------------------
+BM_memcpy/8            +0.0020         +0.0020            36            36            36            36
+BM_memcpy/64           -0.0468         -0.0470            76            73            76            73
+BM_memcpy/512          +0.0081         +0.0083            84            85            84            85
+BM_memcpy/1024         +0.0098         +0.0097           116           118           116           118
+BM_memcpy/8192         +0.0200         +0.0203           643           656           643           656
+BM_copy/8              +0.0046         +0.0042           222           223           222           223
+BM_copy/64             +0.0020         +0.0020          1608          1611          1608          1611
+BM_copy/512            +0.0027         +0.0026         12589         12622         12589         12622
+BM_copy/1024           +0.0035         +0.0028         25169         25257         25169         25239
+BM_copy/8192           +0.0191         +0.0194        201165        205013        201112        205010
+```
+
+What it does is for the every benchmark from the first run it looks for the benchmark with exactly the same name in the second run, and then compares the results. If the names differ, the benchmark is omitted from the diff.
+As you can note, the values in `Time` and `CPU` columns are calculated as `(new - old) / |old|`.
+
+2. Compare two different filters of one benchmark
+The program is invoked like:
+
+``` bash
+$ compare.py filters <benchmark> <filter_baseline> <filter_contender> [benchmark options]...
+```
+Where `<benchmark>` either specify a benchmark executable file, or a JSON output file. The type of the input file is automatically detected. If a benchmark executable is specified then the benchmark is run to obtain the results. Otherwise the results are simply loaded from the output file.
+
+Where `<filter_baseline>` and `<filter_contender>` are the same regex filters that you would pass to the `[--benchmark_filter=<regex>]` parameter of the benchmark binary.
+
+`[benchmark options]` will be passed to the benchmarks invocations. They can be anything that binary accepts, be it either normal `--benchmark_*` parameters, or some custom parameters your binary takes.
+
+Example output:
+```
+$ ./compare.py filters ./a.out BM_memcpy BM_copy
+RUNNING: ./a.out --benchmark_filter=BM_memcpy --benchmark_out=/tmp/tmpBWKk0k
+Run on (8 X 4000 MHz CPU s)
+2017-11-07 21:37:28
+------------------------------------------------------
+Benchmark               Time           CPU Iterations
+------------------------------------------------------
+BM_memcpy/8            36 ns         36 ns   17891491   211.215MB/s
+BM_memcpy/64           74 ns         74 ns    9400999   825.646MB/s
+BM_memcpy/512          87 ns         87 ns    8027453   5.46126GB/s
+BM_memcpy/1024        111 ns        111 ns    6116853    8.5648GB/s
+BM_memcpy/8192        657 ns        656 ns    1064679   11.6247GB/s
+RUNNING: ./a.out --benchmark_filter=BM_copy --benchmark_out=/tmp/tmpAvWcOM
+Run on (8 X 4000 MHz CPU s)
+2017-11-07 21:37:33
+----------------------------------------------------
+Benchmark             Time           CPU Iterations
+----------------------------------------------------
+BM_copy/8           227 ns        227 ns    3038700   33.6264MB/s
+BM_copy/64         1640 ns       1640 ns     426893   37.2154MB/s
+BM_copy/512       12804 ns      12801 ns      55417   38.1444MB/s
+BM_copy/1024      25409 ns      25407 ns      27516   38.4365MB/s
+BM_copy/8192     202986 ns     202990 ns       3454   38.4871MB/s
+Comparing BM_memcpy to BM_copy (from ./a.out)
+Benchmark                               Time             CPU      Time Old      Time New       CPU Old       CPU New
+--------------------------------------------------------------------------------------------------------------------
+[BM_memcpy vs. BM_copy]/8            +5.2829         +5.2812            36           227            36           227
+[BM_memcpy vs. BM_copy]/64          +21.1719        +21.1856            74          1640            74          1640
+[BM_memcpy vs. BM_copy]/512        +145.6487       +145.6097            87         12804            87         12801
+[BM_memcpy vs. BM_copy]/1024       +227.1860       +227.1776           111         25409           111         25407
+[BM_memcpy vs. BM_copy]/8192       +308.1664       +308.2898           657        202986           656        202990
+```
+
+As you can see, it applies filter to the benchmarks, both when running the benchmark, and before doing the diff. And to make the diff work, the matches are replaced with some common string. Thus, you can compare two different benchmark families within one benchmark binary.
+As you can note, the values in `Time` and `CPU` columns are calculated as `(new - old) / |old|`.
+
+3. Compare filter one from benchmark one to filter two from benchmark two:
+The program is invoked like:
+
+``` bash
+$ compare.py filters <benchmark_baseline> <filter_baseline> <benchmark_contender> <filter_contender> [benchmark options]...
+```
+
+Where `<benchmark_baseline>` and `<benchmark_contender>` either specify a benchmark executable file, or a JSON output file. The type of the input file is automatically detected. If a benchmark executable is specified then the benchmark is run to obtain the results. Otherwise the results are simply loaded from the output file.
+
+Where `<filter_baseline>` and `<filter_contender>` are the same regex filters that you would pass to the `[--benchmark_filter=<regex>]` parameter of the benchmark binary.
+
+`[benchmark options]` will be passed to the benchmarks invocations. They can be anything that binary accepts, be it either normal `--benchmark_*` parameters, or some custom parameters your binary takes.
+
+Example output:
+```
+$ ./compare.py benchmarksfiltered ./a.out BM_memcpy ./a.out BM_copy
+RUNNING: ./a.out --benchmark_filter=BM_memcpy --benchmark_out=/tmp/tmp_FvbYg
+Run on (8 X 4000 MHz CPU s)
+2017-11-07 21:38:27
+------------------------------------------------------
+Benchmark               Time           CPU Iterations
+------------------------------------------------------
+BM_memcpy/8            37 ns         37 ns   18953482   204.118MB/s
+BM_memcpy/64           74 ns         74 ns    9206578   828.245MB/s
+BM_memcpy/512          91 ns         91 ns    8086195   5.25476GB/s
+BM_memcpy/1024        120 ns        120 ns    5804513   7.95662GB/s
+BM_memcpy/8192        664 ns        664 ns    1028363   11.4948GB/s
+RUNNING: ./a.out --benchmark_filter=BM_copy --benchmark_out=/tmp/tmpDfL5iE
+Run on (8 X 4000 MHz CPU s)
+2017-11-07 21:38:32
+----------------------------------------------------
+Benchmark             Time           CPU Iterations
+----------------------------------------------------
+BM_copy/8           230 ns        230 ns    2985909   33.1161MB/s
+BM_copy/64         1654 ns       1653 ns     419408   36.9137MB/s
+BM_copy/512       13122 ns      13120 ns      53403   37.2156MB/s
+BM_copy/1024      26679 ns      26666 ns      26575   36.6218MB/s
+BM_copy/8192     215068 ns     215053 ns       3221   36.3283MB/s
+Comparing BM_memcpy (from ./a.out) to BM_copy (from ./a.out)
+Benchmark                               Time             CPU      Time Old      Time New       CPU Old       CPU New
+--------------------------------------------------------------------------------------------------------------------
+[BM_memcpy vs. BM_copy]/8            +5.1649         +5.1637            37           230            37           230
+[BM_memcpy vs. BM_copy]/64          +21.4352        +21.4374            74          1654            74          1653
+[BM_memcpy vs. BM_copy]/512        +143.6022       +143.5865            91         13122            91         13120
+[BM_memcpy vs. BM_copy]/1024       +221.5903       +221.4790           120         26679           120         26666
+[BM_memcpy vs. BM_copy]/8192       +322.9059       +323.0096           664        215068           664        215053
+```
+This is a mix of the previous two modes, two (potentially different) benchmark binaries are run, and a different filter is applied to each one.
+As you can note, the values in `Time` and `CPU` columns are calculated as `(new - old) / |old|`.
diff --git a/libcxx/utils/google-benchmark/include/benchmark/benchmark.h b/libcxx/utils/google-benchmark/include/benchmark/benchmark.h
index b3b0a8e947f..340cbc1ecf3 100644
--- a/libcxx/utils/google-benchmark/include/benchmark/benchmark.h
+++ b/libcxx/utils/google-benchmark/include/benchmark/benchmark.h
@@ -11,11 +11,1379 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
+
+// Support for registering benchmarks for functions.
+
+/* Example usage:
+// Define a function that executes the code to be measured a
+// specified number of times:
+static void BM_StringCreation(benchmark::State& state) {
+  for (auto _ : state)
+    std::string empty_string;
+}
+
+// Register the function as a benchmark
+BENCHMARK(BM_StringCreation);
+
+// Define another benchmark
+static void BM_StringCopy(benchmark::State& state) {
+  std::string x = "hello";
+  for (auto _ : state)
+    std::string copy(x);
+}
+BENCHMARK(BM_StringCopy);
+
+// Augment the main() program to invoke benchmarks if specified
+// via the --benchmarks command line flag.  E.g.,
+//       my_unittest --benchmark_filter=all
+//       my_unittest --benchmark_filter=BM_StringCreation
+//       my_unittest --benchmark_filter=String
+//       my_unittest --benchmark_filter='Copy|Creation'
+int main(int argc, char** argv) {
+  benchmark::Initialize(&argc, argv);
+  benchmark::RunSpecifiedBenchmarks();
+  return 0;
+}
+
+// Sometimes a family of microbenchmarks can be implemented with
+// just one routine that takes an extra argument to specify which
+// one of the family of benchmarks to run.  For example, the following
+// code defines a family of microbenchmarks for measuring the speed
+// of memcpy() calls of different lengths:
+
+static void BM_memcpy(benchmark::State& state) {
+  char* src = new char[state.range(0)]; char* dst = new char[state.range(0)];
+  memset(src, 'x', state.range(0));
+  for (auto _ : state)
+    memcpy(dst, src, state.range(0));
+  state.SetBytesProcessed(int64_t(state.iterations()) *
+                          int64_t(state.range(0)));
+  delete[] src; delete[] dst;
+}
+BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(8<<10);
+
+// The preceding code is quite repetitive, and can be replaced with the
+// following short-hand.  The following invocation will pick a few
+// appropriate arguments in the specified range and will generate a
+// microbenchmark for each such argument.
+BENCHMARK(BM_memcpy)->Range(8, 8<<10);
+
+// You might have a microbenchmark that depends on two inputs.  For
+// example, the following code defines a family of microbenchmarks for
+// measuring the speed of set insertion.
+static void BM_SetInsert(benchmark::State& state) {
+  set<int> data;
+  for (auto _ : state) {
+    state.PauseTiming();
+    data = ConstructRandomSet(state.range(0));
+    state.ResumeTiming();
+    for (int j = 0; j < state.range(1); ++j)
+      data.insert(RandomNumber());
+  }
+}
+BENCHMARK(BM_SetInsert)
+   ->Args({1<<10, 128})
+   ->Args({2<<10, 128})
+   ->Args({4<<10, 128})
+   ->Args({8<<10, 128})
+   ->Args({1<<10, 512})
+   ->Args({2<<10, 512})
+   ->Args({4<<10, 512})
+   ->Args({8<<10, 512});
+
+// The preceding code is quite repetitive, and can be replaced with
+// the following short-hand.  The following macro will pick a few
+// appropriate arguments in the product of the two specified ranges
+// and will generate a microbenchmark for each such pair.
+BENCHMARK(BM_SetInsert)->Ranges({{1<<10, 8<<10}, {128, 512}});
+
+// For more complex patterns of inputs, passing a custom function
+// to Apply allows programmatic specification of an
+// arbitrary set of arguments to run the microbenchmark on.
+// The following example enumerates a dense range on
+// one parameter, and a sparse range on the second.
+static void CustomArguments(benchmark::internal::Benchmark* b) {
+  for (int i = 0; i <= 10; ++i)
+    for (int j = 32; j <= 1024*1024; j *= 8)
+      b->Args({i, j});
+}
+BENCHMARK(BM_SetInsert)->Apply(CustomArguments);
+
+// Templated microbenchmarks work the same way:
+// Produce then consume 'size' messages 'iters' times
+// Measures throughput in the absence of multiprogramming.
+template <class Q> int BM_Sequential(benchmark::State& state) {
+  Q q;
+  typename Q::value_type v;
+  for (auto _ : state) {
+    for (int i = state.range(0); i--; )
+      q.push(v);
+    for (int e = state.range(0); e--; )
+      q.Wait(&v);
+  }
+  // actually messages, not bytes:
+  state.SetBytesProcessed(
+      static_cast<int64_t>(state.iterations())*state.range(0));
+}
+BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue<int>)->Range(1<<0, 1<<10);
+
+Use `Benchmark::MinTime(double t)` to set the minimum time used to run the
+benchmark. This option overrides the `benchmark_min_time` flag.
+
+void BM_test(benchmark::State& state) {
+ ... body ...
+}
+BENCHMARK(BM_test)->MinTime(2.0); // Run for at least 2 seconds.
+
+In a multithreaded test, it is guaranteed that none of the threads will start
+until all have reached the loop start, and all will have finished before any
+thread exits the loop body. As such, any global setup or teardown you want to
+do can be wrapped in a check against the thread index:
+
+static void BM_MultiThreaded(benchmark::State& state) {
+  if (state.thread_index == 0) {
+    // Setup code here.
+  }
+  for (auto _ : state) {
+    // Run the test as normal.
+  }
+  if (state.thread_index == 0) {
+    // Teardown code here.
+  }
+}
+BENCHMARK(BM_MultiThreaded)->Threads(4);
+
+
+If a benchmark runs a few milliseconds it may be hard to visually compare the
+measured times, since the output data is given in nanoseconds per default. In
+order to manually set the time unit, you can specify it manually:
+
+BENCHMARK(BM_test)->Unit(benchmark::kMillisecond);
+*/
+
 #ifndef BENCHMARK_BENCHMARK_H_
 #define BENCHMARK_BENCHMARK_H_
 
-#include "benchmark_api.h"
-#include "macros.h"
-#include "reporter.h"
+
+// The _MSVC_LANG check should detect Visual Studio 2015 Update 3 and newer.
+#if __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)
+#define BENCHMARK_HAS_CXX11
+#endif
+
+#include <stdint.h>
+
+#include <cassert>
+#include <cstddef>
+#include <iosfwd>
+#include <string>
+#include <vector>
+#include <map>
+#include <set>
+
+#if defined(BENCHMARK_HAS_CXX11)
+#include <type_traits>
+#include <initializer_list>
+#include <utility>
+#endif
+
+#if defined(_MSC_VER)
+#include <intrin.h> // for _ReadWriteBarrier
+#endif
+
+#ifndef BENCHMARK_HAS_CXX11
+#define BENCHMARK_DISALLOW_COPY_AND_ASSIGN(TypeName) \
+  TypeName(const TypeName&);                         \
+  TypeName& operator=(const TypeName&)
+#else
+#define BENCHMARK_DISALLOW_COPY_AND_ASSIGN(TypeName) \
+  TypeName(const TypeName&) = delete;                \
+  TypeName& operator=(const TypeName&) = delete
+#endif
+
+#if defined(__GNUC__)
+#define BENCHMARK_UNUSED __attribute__((unused))
+#define BENCHMARK_ALWAYS_INLINE __attribute__((always_inline))
+#define BENCHMARK_NOEXCEPT noexcept
+#define BENCHMARK_NOEXCEPT_OP(x) noexcept(x)
+#elif defined(_MSC_VER) && !defined(__clang__)
+#define BENCHMARK_UNUSED
+#define BENCHMARK_ALWAYS_INLINE __forceinline
+#if _MSC_VER >= 1900
+#define BENCHMARK_NOEXCEPT noexcept
+#define BENCHMARK_NOEXCEPT_OP(x) noexcept(x)
+#else
+#define BENCHMARK_NOEXCEPT
+#define BENCHMARK_NOEXCEPT_OP(x)
+#endif
+#define __func__ __FUNCTION__
+#else
+#define BENCHMARK_UNUSED
+#define BENCHMARK_ALWAYS_INLINE
+#define BENCHMARK_NOEXCEPT
+#define BENCHMARK_NOEXCEPT_OP(x)
+#endif
+
+#define BENCHMARK_INTERNAL_TOSTRING2(x) #x
+#define BENCHMARK_INTERNAL_TOSTRING(x) BENCHMARK_INTERNAL_TOSTRING2(x)
+
+#if defined(__GNUC__)
+#define BENCHMARK_BUILTIN_EXPECT(x, y) __builtin_expect(x, y)
+#define BENCHMARK_DEPRECATED_MSG(msg) __attribute__((deprecated(msg)))
+#else
+#define BENCHMARK_BUILTIN_EXPECT(x, y) x
+#define BENCHMARK_DEPRECATED_MSG(msg)
+#define BENCHMARK_WARNING_MSG(msg) __pragma(message(__FILE__ "(" BENCHMARK_INTERNAL_TOSTRING(__LINE__) ") : warning note: " msg))
+#endif
+
+#if defined(__GNUC__) && !defined(__clang__)
+#define BENCHMARK_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#endif
+
+namespace benchmark {
+class BenchmarkReporter;
+
+void Initialize(int* argc, char** argv);
+
+// Report to stdout all arguments in 'argv' as unrecognized except the first.
+// Returns true there is at least on unrecognized argument (i.e. 'argc' > 1).
+bool ReportUnrecognizedArguments(int argc, char** argv);
+
+// Generate a list of benchmarks matching the specified --benchmark_filter flag
+// and if --benchmark_list_tests is specified return after printing the name
+// of each matching benchmark. Otherwise run each matching benchmark and
+// report the results.
+//
+// The second and third overload use the specified 'console_reporter' and
+//  'file_reporter' respectively. 'file_reporter' will write to the file
+//  specified
+//   by '--benchmark_output'. If '--benchmark_output' is not given the
+//  'file_reporter' is ignored.
+//
+// RETURNS: The number of matching benchmarks.
+size_t RunSpecifiedBenchmarks();
+size_t RunSpecifiedBenchmarks(BenchmarkReporter* console_reporter);
+size_t RunSpecifiedBenchmarks(BenchmarkReporter* console_reporter,
+                              BenchmarkReporter* file_reporter);
+
+// If this routine is called, peak memory allocation past this point in the
+// benchmark is reported at the end of the benchmark report line. (It is
+// computed by running the benchmark once with a single iteration and a memory
+// tracer.)
+// TODO(dominic)
+// void MemoryUsage();
+
+namespace internal {
+class Benchmark;
+class BenchmarkImp;
+class BenchmarkFamilies;
+
+void UseCharPointer(char const volatile*);
+
+// Take ownership of the pointer and register the benchmark. Return the
+// registered benchmark.
+Benchmark* RegisterBenchmarkInternal(Benchmark*);
+
+// Ensure that the standard streams are properly initialized in every TU.
+int InitializeStreams();
+BENCHMARK_UNUSED static int stream_init_anchor = InitializeStreams();
+
+}  // namespace internal
+
+
+#if (!defined(__GNUC__) && !defined(__clang__)) || defined(__pnacl__) || \
+    defined(EMSCRIPTN)
+# define BENCHMARK_HAS_NO_INLINE_ASSEMBLY
+#endif
+
+
+// The DoNotOptimize(...) function can be used to prevent a value or
+// expression from being optimized away by the compiler. This function is
+// intended to add little to no overhead.
+// See: https://youtu.be/nXaxk27zwlk?t=2441
+#ifndef BENCHMARK_HAS_NO_INLINE_ASSEMBLY
+template <class Tp>
+inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
+  // Clang doesn't like the 'X' constraint on `value` and certain GCC versions
+  // don't like the 'g' constraint. Attempt to placate them both.
+#if defined(__clang__)
+  asm volatile("" : : "g"(value) : "memory");
+#else
+  asm volatile("" : : "i,r,m"(value) : "memory");
+#endif
+}
+// Force the compiler to flush pending writes to global memory. Acts as an
+// effective read/write barrier
+inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() {
+  asm volatile("" : : : "memory");
+}
+#elif defined(_MSC_VER)
+template <class Tp>
+inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
+  internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
+  _ReadWriteBarrier();
+}
+
+inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() {
+  _ReadWriteBarrier();
+}
+#else
+template <class Tp>
+inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
+  internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
+}
+// FIXME Add ClobberMemory() for non-gnu and non-msvc compilers
+#endif
+
+
+
+// This class is used for user-defined counters.
+class Counter {
+public:
+
+  enum Flags {
+    kDefaults   = 0,
+    // Mark the counter as a rate. It will be presented divided
+    // by the duration of the benchmark.
+    kIsRate     = 1,
+    // Mark the counter as a thread-average quantity. It will be
+    // presented divided by the number of threads.
+    kAvgThreads = 2,
+    // Mark the counter as a thread-average rate. See above.
+    kAvgThreadsRate = kIsRate|kAvgThreads
+  };
+
+  double value;
+  Flags  flags;
+
+  BENCHMARK_ALWAYS_INLINE
+  Counter(double v = 0., Flags f = kDefaults) : value(v), flags(f) {}
+
+  BENCHMARK_ALWAYS_INLINE operator double const& () const { return value; }
+  BENCHMARK_ALWAYS_INLINE operator double      & ()       { return value; }
+
+};
+
+// This is the container for the user-defined counters.
+typedef std::map<std::string, Counter> UserCounters;
+
+
+// TimeUnit is passed to a benchmark in order to specify the order of magnitude
+// for the measured time.
+enum TimeUnit { kNanosecond, kMicrosecond, kMillisecond };
+
+// BigO is passed to a benchmark in order to specify the asymptotic
+// computational
+// complexity for the benchmark. In case oAuto is selected, complexity will be
+// calculated automatically to the best fit.
+enum BigO { oNone, o1, oN, oNSquared, oNCubed, oLogN, oNLogN, oAuto, oLambda };
+
+// BigOFunc is passed to a benchmark in order to specify the asymptotic
+// computational complexity for the benchmark.
+typedef double(BigOFunc)(int);
+
+// StatisticsFunc is passed to a benchmark in order to compute some descriptive
+// statistics over all the measurements of some type
+typedef double(StatisticsFunc)(const std::vector<double>&);
+
+struct Statistics {
+  std::string name_;
+  StatisticsFunc* compute_;
+
+  Statistics(std::string name, StatisticsFunc* compute)
+    : name_(name), compute_(compute) {}
+};
+
+namespace internal {
+class ThreadTimer;
+class ThreadManager;
+
+enum ReportMode
+#if defined(BENCHMARK_HAS_CXX11)
+  : unsigned
+#else
+#endif
+  {
+  RM_Unspecified,  // The mode has not been manually specified
+  RM_Default,      // The mode is user-specified as default.
+  RM_ReportAggregatesOnly
+};
+}  // namespace internal
+
+// State is passed to a running Benchmark and contains state for the
+// benchmark to use.
+class State {
+ public:
+  struct StateIterator;
+  friend struct StateIterator;
+
+  // Returns iterators used to run each iteration of a benchmark using a
+  // C++11 ranged-based for loop. These functions should not be called directly.
+  //
+  // REQUIRES: The benchmark has not started running yet. Neither begin nor end
+  // have been called previously.
+  //
+  // NOTE: KeepRunning may not be used after calling either of these functions.
+  BENCHMARK_ALWAYS_INLINE StateIterator begin();
+  BENCHMARK_ALWAYS_INLINE StateIterator end();
+
+  // Returns true if the benchmark should continue through another iteration.
+  // NOTE: A benchmark may not return from the test until KeepRunning() has
+  // returned false.
+  bool KeepRunning() {
+    if (BENCHMARK_BUILTIN_EXPECT(!started_, false)) {
+      StartKeepRunning();
+    }
+    bool const res = (--total_iterations_ != 0);
+    if (BENCHMARK_BUILTIN_EXPECT(!res, false)) {
+      FinishKeepRunning();
+    }
+    return res;
+  }
+
+  // REQUIRES: timer is running and 'SkipWithError(...)' has not been called
+  //           by the current thread.
+  // Stop the benchmark timer.  If not called, the timer will be
+  // automatically stopped after the last iteration of the benchmark loop.
+  //
+  // For threaded benchmarks the PauseTiming() function only pauses the timing
+  // for the current thread.
+  //
+  // NOTE: The "real time" measurement is per-thread. If different threads
+  // report different measurements the largest one is reported.
+  //
+  // NOTE: PauseTiming()/ResumeTiming() are relatively
+  // heavyweight, and so their use should generally be avoided
+  // within each benchmark iteration, if possible.
+  void PauseTiming();
+
+  // REQUIRES: timer is not running and 'SkipWithError(...)' has not been called
+  //           by the current thread.
+  // Start the benchmark timer.  The timer is NOT running on entrance to the
+  // benchmark function. It begins running after control flow enters the
+  // benchmark loop.
+  //
+  // NOTE: PauseTiming()/ResumeTiming() are relatively
+  // heavyweight, and so their use should generally be avoided
+  // within each benchmark iteration, if possible.
+  void ResumeTiming();
+
+  // REQUIRES: 'SkipWithError(...)' has not been called previously by the
+  //            current thread.
+  // Report the benchmark as resulting in an error with the specified 'msg'.
+  // After this call the user may explicitly 'return' from the benchmark.
+  //
+  // If the ranged-for style of benchmark loop is used, the user must explicitly
+  // break from the loop, otherwise all future iterations will be run.
+  // If the 'KeepRunning()' loop is used the current thread will automatically
+  // exit the loop at the end of the current iteration.
+  //
+  // For threaded benchmarks only the current thread stops executing and future
+  // calls to `KeepRunning()` will block until all threads have completed
+  // the `KeepRunning()` loop. If multiple threads report an error only the
+  // first error message is used.
+  //
+  // NOTE: Calling 'SkipWithError(...)' does not cause the benchmark to exit
+  // the current scope immediately. If the function is called from within
+  // the 'KeepRunning()' loop the current iteration will finish. It is the users
+  // responsibility to exit the scope as needed.
+  void SkipWithError(const char* msg);
+
+  // REQUIRES: called exactly once per iteration of the benchmarking loop.
+  // Set the manually measured time for this benchmark iteration, which
+  // is used instead of automatically measured time if UseManualTime() was
+  // specified.
+  //
+  // For threaded benchmarks the final value will be set to the largest
+  // reported values.
+  void SetIterationTime(double seconds);
+
+  // Set the number of bytes processed by the current benchmark
+  // execution.  This routine is typically called once at the end of a
+  // throughput oriented benchmark.  If this routine is called with a
+  // value > 0, the report is printed in MB/sec instead of nanoseconds
+  // per iteration.
+  //
+  // REQUIRES: a benchmark has exited its benchmarking loop.
+  BENCHMARK_ALWAYS_INLINE
+  void SetBytesProcessed(size_t bytes) { bytes_processed_ = bytes; }
+
+  BENCHMARK_ALWAYS_INLINE
+  size_t bytes_processed() const { return bytes_processed_; }
+
+  // If this routine is called with complexity_n > 0 and complexity report is
+  // requested for the
+  // family benchmark, then current benchmark will be part of the computation
+  // and complexity_n will
+  // represent the length of N.
+  BENCHMARK_ALWAYS_INLINE
+  void SetComplexityN(int complexity_n) { complexity_n_ = complexity_n; }
+
+  BENCHMARK_ALWAYS_INLINE
+  int complexity_length_n() { return complexity_n_; }
+
+  // If this routine is called with items > 0, then an items/s
+  // label is printed on the benchmark report line for the currently
+  // executing benchmark. It is typically called at the end of a processing
+  // benchmark where a processing items/second output is desired.
+  //
+  // REQUIRES: a benchmark has exited its benchmarking loop.
+  BENCHMARK_ALWAYS_INLINE
+  void SetItemsProcessed(size_t items) { items_processed_ = items; }
+
+  BENCHMARK_ALWAYS_INLINE
+  size_t items_processed() const { return items_processed_; }
+
+  // If this routine is called, the specified label is printed at the
+  // end of the benchmark report line for the currently executing
+  // benchmark.  Example:
+  //  static void BM_Compress(benchmark::State& state) {
+  //    ...
+  //    double compress = input_size / output_size;
+  //    state.SetLabel(StringPrintf("compress:%.1f%%", 100.0*compression));
+  //  }
+  // Produces output that looks like:
+  //  BM_Compress   50         50   14115038  compress:27.3%
+  //
+  // REQUIRES: a benchmark has exited its benchmarking loop.
+  void SetLabel(const char* label);
+
+  void BENCHMARK_ALWAYS_INLINE SetLabel(const std::string& str) {
+    this->SetLabel(str.c_str());
+  }
+
+  // Range arguments for this run. CHECKs if the argument has been set.
+  BENCHMARK_ALWAYS_INLINE
+  int range(std::size_t pos = 0) const {
+    assert(range_.size() > pos);
+    return range_[pos];
+  }
+
+  BENCHMARK_DEPRECATED_MSG("use 'range(0)' instead")
+  int range_x() const { return range(0); }
+
+  BENCHMARK_DEPRECATED_MSG("use 'range(1)' instead")
+  int range_y() const { return range(1); }
+
+  BENCHMARK_ALWAYS_INLINE
+  size_t iterations() const { return (max_iterations - total_iterations_) + 1; }
+
+ private:
+  bool started_;
+  bool finished_;
+  size_t total_iterations_;
+
+  std::vector<int> range_;
+
+  size_t bytes_processed_;
+  size_t items_processed_;
+
+  int complexity_n_;
+
+  bool error_occurred_;
+
+ public:
+  // Container for user-defined counters.
+  UserCounters counters;
+  // Index of the executing thread. Values from [0, threads).
+  const int thread_index;
+  // Number of threads concurrently executing the benchmark.
+  const int threads;
+  const size_t max_iterations;
+
+  // TODO(EricWF) make me private
+  State(size_t max_iters, const std::vector<int>& ranges, int thread_i,
+        int n_threads, internal::ThreadTimer* timer,
+        internal::ThreadManager* manager);
+
+ private:
+  void StartKeepRunning();
+  void FinishKeepRunning();
+  internal::ThreadTimer* timer_;
+  internal::ThreadManager* manager_;
+  BENCHMARK_DISALLOW_COPY_AND_ASSIGN(State);
+};
+
+struct State::StateIterator {
+  struct BENCHMARK_UNUSED Value {};
+  typedef std::forward_iterator_tag iterator_category;
+  typedef Value value_type;
+  typedef Value reference;
+  typedef Value pointer;
+
+ private:
+  friend class State;
+  BENCHMARK_ALWAYS_INLINE
+  StateIterator() : cached_(0), parent_() {}
+
+  BENCHMARK_ALWAYS_INLINE
+  explicit StateIterator(State* st)
+      : cached_(st->error_occurred_ ? 0 : st->max_iterations), parent_(st) {}
+
+ public:
+  BENCHMARK_ALWAYS_INLINE
+  Value operator*() const { return Value(); }
+
+  BENCHMARK_ALWAYS_INLINE
+  StateIterator& operator++() {
+    assert(cached_ > 0);
+    --cached_;
+    return *this;
+  }
+
+  BENCHMARK_ALWAYS_INLINE
+  bool operator!=(StateIterator const&) const {
+    if (BENCHMARK_BUILTIN_EXPECT(cached_ != 0, true)) return true;
+    parent_->FinishKeepRunning();
+    return false;
+  }
+
+ private:
+  size_t cached_;
+  State* const parent_;
+};
+
+inline BENCHMARK_ALWAYS_INLINE State::StateIterator State::begin() {
+  return StateIterator(this);
+}
+inline BENCHMARK_ALWAYS_INLINE State::StateIterator State::end() {
+  StartKeepRunning();
+  return StateIterator();
+}
+
+namespace internal {
+
+typedef void(Function)(State&);
+
+// ------------------------------------------------------
+// Benchmark registration object.  The BENCHMARK() macro expands
+// into an internal::Benchmark* object.  Various methods can
+// be called on this object to change the properties of the benchmark.
+// Each method returns "this" so that multiple method calls can
+// chained into one expression.
+class Benchmark {
+ public:
+  virtual ~Benchmark();
+
+  // Note: the following methods all return "this" so that multiple
+  // method calls can be chained together in one expression.
+
+  // Run this benchmark once with "x" as the extra argument passed
+  // to the function.
+  // REQUIRES: The function passed to the constructor must accept an arg1.
+  Benchmark* Arg(int x);
+
+  // Run this benchmark with the given time unit for the generated output report
+  Benchmark* Unit(TimeUnit unit);
+
+  // Run this benchmark once for a number of values picked from the
+  // range [start..limit].  (start and limit are always picked.)
+  // REQUIRES: The function passed to the constructor must accept an arg1.
+  Benchmark* Range(int start, int limit);
+
+  // Run this benchmark once for all values in the range [start..limit] with
+  // specific step
+  // REQUIRES: The function passed to the constructor must accept an arg1.
+  Benchmark* DenseRange(int start, int limit, int step = 1);
+
+  // Run this benchmark once with "args" as the extra arguments passed
+  // to the function.
+  // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
+  Benchmark* Args(const std::vector<int>& args);
+
+  // Equivalent to Args({x, y})
+  // NOTE: This is a legacy C++03 interface provided for compatibility only.
+  //   New code should use 'Args'.
+  Benchmark* ArgPair(int x, int y) {
+    std::vector<int> args;
+    args.push_back(x);
+    args.push_back(y);
+    return Args(args);
+  }
+
+  // Run this benchmark once for a number of values picked from the
+  // ranges [start..limit].  (starts and limits are always picked.)
+  // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
+  Benchmark* Ranges(const std::vector<std::pair<int, int> >& ranges);
+
+  // Equivalent to ArgNames({name})
+  Benchmark* ArgName(const std::string& name);
+
+  // Set the argument names to display in the benchmark name. If not called,
+  // only argument values will be shown.
+  Benchmark* ArgNames(const std::vector<std::string>& names);
+
+  // Equivalent to Ranges({{lo1, hi1}, {lo2, hi2}}).
+  // NOTE: This is a legacy C++03 interface provided for compatibility only.
+  //   New code should use 'Ranges'.
+  Benchmark* RangePair(int lo1, int hi1, int lo2, int hi2) {
+    std::vector<std::pair<int, int> > ranges;
+    ranges.push_back(std::make_pair(lo1, hi1));
+    ranges.push_back(std::make_pair(lo2, hi2));
+    return Ranges(ranges);
+  }
+
+  // Pass this benchmark object to *func, which can customize
+  // the benchmark by calling various methods like Arg, Args,
+  // Threads, etc.
+  Benchmark* Apply(void (*func)(Benchmark* benchmark));
+
+  // Set the range multiplier for non-dense range. If not called, the range
+  // multiplier kRangeMultiplier will be used.
+  Benchmark* RangeMultiplier(int multiplier);
+
+  // Set the minimum amount of time to use when running this benchmark. This
+  // option overrides the `benchmark_min_time` flag.
+  // REQUIRES: `t > 0` and `Iterations` has not been called on this benchmark.
+  Benchmark* MinTime(double t);
+
+  // Specify the amount of iterations that should be run by this benchmark.
+  // REQUIRES: 'n > 0' and `MinTime` has not been called on this benchmark.
+  //
+  // NOTE: This function should only be used when *exact* iteration control is
+  //   needed and never to control or limit how long a benchmark runs, where
+  // `--benchmark_min_time=N` or `MinTime(...)` should be used instead.
+  Benchmark* Iterations(size_t n);
+
+  // Specify the amount of times to repeat this benchmark. This option overrides
+  // the `benchmark_repetitions` flag.
+  // REQUIRES: `n > 0`
+  Benchmark* Repetitions(int n);
+
+  // Specify if each repetition of the benchmark should be reported separately
+  // or if only the final statistics should be reported. If the benchmark
+  // is not repeated then the single result is always reported.
+  Benchmark* ReportAggregatesOnly(bool value = true);
+
+  // If a particular benchmark is I/O bound, runs multiple threads internally or
+  // if for some reason CPU timings are not representative, call this method. If
+  // called, the elapsed time will be used to control how many iterations are
+  // run, and in the printing of items/second or MB/seconds values.  If not
+  // called, the cpu time used by the benchmark will be used.
+  Benchmark* UseRealTime();
+
+  // If a benchmark must measure time manually (e.g. if GPU execution time is
+  // being
+  // measured), call this method. If called, each benchmark iteration should
+  // call
+  // SetIterationTime(seconds) to report the measured time, which will be used
+  // to control how many iterations are run, and in the printing of items/second
+  // or MB/second values.
+  Benchmark* UseManualTime();
+
+  // Set the asymptotic computational complexity for the benchmark. If called
+  // the asymptotic computational complexity will be shown on the output.
+  Benchmark* Complexity(BigO complexity = benchmark::oAuto);
+
+  // Set the asymptotic computational complexity for the benchmark. If called
+  // the asymptotic computational complexity will be shown on the output.
+  Benchmark* Complexity(BigOFunc* complexity);
+
+  // Add this statistics to be computed over all the values of benchmark run
+  Benchmark* ComputeStatistics(std::string name, StatisticsFunc* statistics);
+
+  // Support for running multiple copies of the same benchmark concurrently
+  // in multiple threads.  This may be useful when measuring the scaling
+  // of some piece of code.
+
+  // Run one instance of this benchmark concurrently in t threads.
+  Benchmark* Threads(int t);
+
+  // Pick a set of values T from [min_threads,max_threads].
+  // min_threads and max_threads are always included in T.  Run this
+  // benchmark once for each value in T.  The benchmark run for a
+  // particular value t consists of t threads running the benchmark
+  // function concurrently.  For example, consider:
+  //    BENCHMARK(Foo)->ThreadRange(1,16);
+  // This will run the following benchmarks:
+  //    Foo in 1 thread
+  //    Foo in 2 threads
+  //    Foo in 4 threads
+  //    Foo in 8 threads
+  //    Foo in 16 threads
+  Benchmark* ThreadRange(int min_threads, int max_threads);
+
+  // For each value n in the range, run this benchmark once using n threads.
+  // min_threads and max_threads are always included in the range.
+  // stride specifies the increment. E.g. DenseThreadRange(1, 8, 3) starts
+  // a benchmark with 1, 4, 7 and 8 threads.
+  Benchmark* DenseThreadRange(int min_threads, int max_threads, int stride = 1);
+
+  // Equivalent to ThreadRange(NumCPUs(), NumCPUs())
+  Benchmark* ThreadPerCpu();
+
+  virtual void Run(State& state) = 0;
+
+  // Used inside the benchmark implementation
+  struct Instance;
+
+ protected:
+  explicit Benchmark(const char* name);
+  Benchmark(Benchmark const&);
+  void SetName(const char* name);
+
+  int ArgsCnt() const;
+
+  static void AddRange(std::vector<int>* dst, int lo, int hi, int mult);
+
+ private:
+  friend class BenchmarkFamilies;
+
+  std::string name_;
+  ReportMode report_mode_;
+  std::vector<std::string> arg_names_;   // Args for all benchmark runs
+  std::vector<std::vector<int> > args_;  // Args for all benchmark runs
+  TimeUnit time_unit_;
+  int range_multiplier_;
+  double min_time_;
+  size_t iterations_;
+  int repetitions_;
+  bool use_real_time_;
+  bool use_manual_time_;
+  BigO complexity_;
+  BigOFunc* complexity_lambda_;
+  std::vector<Statistics> statistics_;
+  std::vector<int> thread_counts_;
+
+  Benchmark& operator=(Benchmark const&);
+};
+
+}  // namespace internal
+
+// Create and register a benchmark with the specified 'name' that invokes
+// the specified functor 'fn'.
+//
+// RETURNS: A pointer to the registered benchmark.
+internal::Benchmark* RegisterBenchmark(const char* name,
+                                       internal::Function* fn);
+
+#if defined(BENCHMARK_HAS_CXX11)
+template <class Lambda>
+internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn);
+#endif
+
+// Remove all registered benchmarks. All pointers to previously registered
+// benchmarks are invalidated.
+void ClearRegisteredBenchmarks();
+
+namespace internal {
+// The class used to hold all Benchmarks created from static function.
+// (ie those created using the BENCHMARK(...) macros.
+class FunctionBenchmark : public Benchmark {
+ public:
+  FunctionBenchmark(const char* name, Function* func)
+      : Benchmark(name), func_(func) {}
+
+  virtual void Run(State& st);
+
+ private:
+  Function* func_;
+};
+
+#ifdef BENCHMARK_HAS_CXX11
+template <class Lambda>
+class LambdaBenchmark : public Benchmark {
+ public:
+  virtual void Run(State& st) { lambda_(st); }
+
+ private:
+  template <class OLambda>
+  LambdaBenchmark(const char* name, OLambda&& lam)
+      : Benchmark(name), lambda_(std::forward<OLambda>(lam)) {}
+
+  LambdaBenchmark(LambdaBenchmark const&) = delete;
+
+ private:
+  template <class Lam>
+  friend Benchmark* ::benchmark::RegisterBenchmark(const char*, Lam&&);
+
+  Lambda lambda_;
+};
+#endif
+
+}  // namespace internal
+
+inline internal::Benchmark* RegisterBenchmark(const char* name,
+                                              internal::Function* fn) {
+  return internal::RegisterBenchmarkInternal(
+      ::new internal::FunctionBenchmark(name, fn));
+}
+
+#ifdef BENCHMARK_HAS_CXX11
+template <class Lambda>
+internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn) {
+  using BenchType =
+      internal::LambdaBenchmark<typename std::decay<Lambda>::type>;
+  return internal::RegisterBenchmarkInternal(
+      ::new BenchType(name, std::forward<Lambda>(fn)));
+}
+#endif
+
+#if defined(BENCHMARK_HAS_CXX11) && \
+    (!defined(BENCHMARK_GCC_VERSION) || BENCHMARK_GCC_VERSION >= 409)
+template <class Lambda, class... Args>
+internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn,
+                                       Args&&... args) {
+  return benchmark::RegisterBenchmark(
+      name, [=](benchmark::State& st) { fn(st, args...); });
+}
+#else
+#define BENCHMARK_HAS_NO_VARIADIC_REGISTER_BENCHMARK
+#endif
+
+// The base class for all fixture tests.
+class Fixture : public internal::Benchmark {
+ public:
+  Fixture() : internal::Benchmark("") {}
+
+  virtual void Run(State& st) {
+    this->SetUp(st);
+    this->BenchmarkCase(st);
+    this->TearDown(st);
+  }
+
+  // These will be deprecated ...
+  virtual void SetUp(const State&) {}
+  virtual void TearDown(const State&) {}
+  // ... In favor of these.
+  virtual void SetUp(State& st) { SetUp(const_cast<const State&>(st)); }
+  virtual void TearDown(State& st) { TearDown(const_cast<const State&>(st)); }
+
+ protected:
+  virtual void BenchmarkCase(State&) = 0;
+};
+
+}  // namespace benchmark
+
+// ------------------------------------------------------
+// Macro to register benchmarks
+
+// Check that __COUNTER__ is defined and that __COUNTER__ increases by 1
+// every time it is expanded. X + 1 == X + 0 is used in case X is defined to be
+// empty. If X is empty the expression becomes (+1 == +0).
+#if defined(__COUNTER__) && (__COUNTER__ + 1 == __COUNTER__ + 0)
+#define BENCHMARK_PRIVATE_UNIQUE_ID __COUNTER__
+#else
+#define BENCHMARK_PRIVATE_UNIQUE_ID __LINE__
+#endif
+
+// Helpers for generating unique variable names
+#define BENCHMARK_PRIVATE_NAME(n) \
+  BENCHMARK_PRIVATE_CONCAT(_benchmark_, BENCHMARK_PRIVATE_UNIQUE_ID, n)
+#define BENCHMARK_PRIVATE_CONCAT(a, b, c) BENCHMARK_PRIVATE_CONCAT2(a, b, c)
+#define BENCHMARK_PRIVATE_CONCAT2(a, b, c) a##b##c
+
+#define BENCHMARK_PRIVATE_DECLARE(n)                                 \
+  static ::benchmark::internal::Benchmark* BENCHMARK_PRIVATE_NAME(n) \
+      BENCHMARK_UNUSED
+
+#define BENCHMARK(n)                                     \
+  BENCHMARK_PRIVATE_DECLARE(n) =                         \
+      (::benchmark::internal::RegisterBenchmarkInternal( \
+          new ::benchmark::internal::FunctionBenchmark(#n, n)))
+
+// Old-style macros
+#define BENCHMARK_WITH_ARG(n, a) BENCHMARK(n)->Arg((a))
+#define BENCHMARK_WITH_ARG2(n, a1, a2) BENCHMARK(n)->Args({(a1), (a2)})
+#define BENCHMARK_WITH_UNIT(n, t) BENCHMARK(n)->Unit((t))
+#define BENCHMARK_RANGE(n, lo, hi) BENCHMARK(n)->Range((lo), (hi))
+#define BENCHMARK_RANGE2(n, l1, h1, l2, h2) \
+  BENCHMARK(n)->RangePair({{(l1), (h1)}, {(l2), (h2)}})
+
+#ifdef BENCHMARK_HAS_CXX11
+
+// Register a benchmark which invokes the function specified by `func`
+// with the additional arguments specified by `...`.
+//
+// For example:
+//
+// template <class ...ExtraArgs>`
+// void BM_takes_args(benchmark::State& state, ExtraArgs&&... extra_args) {
+//  [...]
+//}
+// /* Registers a benchmark named "BM_takes_args/int_string_test` */
+// BENCHMARK_CAPTURE(BM_takes_args, int_string_test, 42, std::string("abc"));
+#define BENCHMARK_CAPTURE(func, test_case_name, ...)     \
+  BENCHMARK_PRIVATE_DECLARE(func) =                      \
+      (::benchmark::internal::RegisterBenchmarkInternal( \
+          new ::benchmark::internal::FunctionBenchmark(  \
+              #func "/" #test_case_name,                 \
+              [](::benchmark::State& st) { func(st, __VA_ARGS__); })))
+
+#endif  // BENCHMARK_HAS_CXX11
+
+// This will register a benchmark for a templatized function.  For example:
+//
+// template<int arg>
+// void BM_Foo(int iters);
+//
+// BENCHMARK_TEMPLATE(BM_Foo, 1);
+//
+// will register BM_Foo<1> as a benchmark.
+#define BENCHMARK_TEMPLATE1(n, a)                        \
+  BENCHMARK_PRIVATE_DECLARE(n) =                         \
+      (::benchmark::internal::RegisterBenchmarkInternal( \
+          new ::benchmark::internal::FunctionBenchmark(#n "<" #a ">", n<a>)))
+
+#define BENCHMARK_TEMPLATE2(n, a, b)                                         \
+  BENCHMARK_PRIVATE_DECLARE(n) =                                             \
+      (::benchmark::internal::RegisterBenchmarkInternal(                     \
+          new ::benchmark::internal::FunctionBenchmark(#n "<" #a "," #b ">", \
+                                                       n<a, b>)))
+
+#ifdef BENCHMARK_HAS_CXX11
+#define BENCHMARK_TEMPLATE(n, ...)                       \
+  BENCHMARK_PRIVATE_DECLARE(n) =                         \
+      (::benchmark::internal::RegisterBenchmarkInternal( \
+          new ::benchmark::internal::FunctionBenchmark(  \
+              #n "<" #__VA_ARGS__ ">", n<__VA_ARGS__>)))
+#else
+#define BENCHMARK_TEMPLATE(n, a) BENCHMARK_TEMPLATE1(n, a)
+#endif
+
+#define BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method)        \
+  class BaseClass##_##Method##_Benchmark : public BaseClass { \
+   public:                                                    \
+    BaseClass##_##Method##_Benchmark() : BaseClass() {        \
+      this->SetName(#BaseClass "/" #Method);                  \
+    }                                                         \
+                                                              \
+   protected:                                                 \
+    virtual void BenchmarkCase(::benchmark::State&);          \
+  };
+
+#define BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \
+  class BaseClass##_##Method##_Benchmark : public BaseClass<a> {    \
+   public:                                                          \
+    BaseClass##_##Method##_Benchmark() : BaseClass<a>() {           \
+      this->SetName(#BaseClass"<" #a ">/" #Method);                 \
+    }                                                               \
+                                                                    \
+   protected:                                                       \
+    virtual void BenchmarkCase(::benchmark::State&);                \
+  };
+
+#define BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \
+  class BaseClass##_##Method##_Benchmark : public BaseClass<a, b> {    \
+   public:                                                             \
+    BaseClass##_##Method##_Benchmark() : BaseClass<a, b>() {           \
+      this->SetName(#BaseClass"<" #a "," #b ">/" #Method);             \
+    }                                                                  \
+                                                                       \
+   protected:                                                          \
+    virtual void BenchmarkCase(::benchmark::State&);                   \
+  };
+
+#ifdef BENCHMARK_HAS_CXX11
+#define BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, ...)       \
+  class BaseClass##_##Method##_Benchmark : public BaseClass<__VA_ARGS__> { \
+   public:                                                                 \
+    BaseClass##_##Method##_Benchmark() : BaseClass<__VA_ARGS__>() {        \
+      this->SetName(#BaseClass"<" #__VA_ARGS__ ">/" #Method);              \
+    }                                                                      \
+                                                                           \
+   protected:                                                              \
+    virtual void BenchmarkCase(::benchmark::State&);                       \
+  };
+#else
+#define BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(n, a) BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(n, a)
+#endif
+
+#define BENCHMARK_DEFINE_F(BaseClass, Method)    \
+  BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
+  void BaseClass##_##Method##_Benchmark::BenchmarkCase
+
+#define BENCHMARK_TEMPLATE1_DEFINE_F(BaseClass, Method, a)    \
+  BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \
+  void BaseClass##_##Method##_Benchmark::BenchmarkCase
+
+#define BENCHMARK_TEMPLATE2_DEFINE_F(BaseClass, Method, a, b)    \
+  BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \
+  void BaseClass##_##Method##_Benchmark::BenchmarkCase
+
+#ifdef BENCHMARK_HAS_CXX11
+#define BENCHMARK_TEMPLATE_DEFINE_F(BaseClass, Method, ...)            \
+  BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, __VA_ARGS__) \
+  void BaseClass##_##Method##_Benchmark::BenchmarkCase
+#else
+#define BENCHMARK_TEMPLATE_DEFINE_F(BaseClass, Method, a) BENCHMARK_TEMPLATE1_DEFINE_F(BaseClass, Method, a)
+#endif
+
+#define BENCHMARK_REGISTER_F(BaseClass, Method) \
+  BENCHMARK_PRIVATE_REGISTER_F(BaseClass##_##Method##_Benchmark)
+
+#define BENCHMARK_PRIVATE_REGISTER_F(TestName) \
+  BENCHMARK_PRIVATE_DECLARE(TestName) =        \
+      (::benchmark::internal::RegisterBenchmarkInternal(new TestName()))
+
+// This macro will define and register a benchmark within a fixture class.
+#define BENCHMARK_F(BaseClass, Method)           \
+  BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
+  BENCHMARK_REGISTER_F(BaseClass, Method);       \
+  void BaseClass##_##Method##_Benchmark::BenchmarkCase
+
+#define BENCHMARK_TEMPLATE1_F(BaseClass, Method, a)           \
+  BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \
+  BENCHMARK_REGISTER_F(BaseClass, Method);                    \
+  void BaseClass##_##Method##_Benchmark::BenchmarkCase
+
+#define BENCHMARK_TEMPLATE2_F(BaseClass, Method, a, b)           \
+  BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \
+  BENCHMARK_REGISTER_F(BaseClass, Method);                       \
+  void BaseClass##_##Method##_Benchmark::BenchmarkCase
+
+#ifdef BENCHMARK_HAS_CXX11
+#define BENCHMARK_TEMPLATE_F(BaseClass, Method, ...)           \
+  BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, __VA_ARGS__) \
+  BENCHMARK_REGISTER_F(BaseClass, Method);                     \
+  void BaseClass##_##Method##_Benchmark::BenchmarkCase
+#else
+#define BENCHMARK_TEMPLATE_F(BaseClass, Method, a) BENCHMARK_TEMPLATE1_F(BaseClass, Method, a)
+#endif
+
+// Helper macro to create a main routine in a test that runs the benchmarks
+#define BENCHMARK_MAIN()                   \
+  int main(int argc, char** argv) {        \
+    ::benchmark::Initialize(&argc, argv);  \
+    if (::benchmark::ReportUnrecognizedArguments(argc, argv)) return 1; \
+    ::benchmark::RunSpecifiedBenchmarks(); \
+  }                                        \
+  int main(int, char**)
+
+
+// ------------------------------------------------------
+// Benchmark Reporters
+
+namespace benchmark {
+
+struct CPUInfo {
+  struct CacheInfo {
+    std::string type;
+    int level;
+    int size;
+    int num_sharing;
+  };
+
+  int num_cpus;
+  double cycles_per_second;
+  std::vector<CacheInfo> caches;
+  bool scaling_enabled;
+
+  static const CPUInfo& Get();
+
+ private:
+  CPUInfo();
+  BENCHMARK_DISALLOW_COPY_AND_ASSIGN(CPUInfo);
+};
+
+// Interface for custom benchmark result printers.
+// By default, benchmark reports are printed to stdout. However an application
+// can control the destination of the reports by calling
+// RunSpecifiedBenchmarks and passing it a custom reporter object.
+// The reporter object must implement the following interface.
+class BenchmarkReporter {
+ public:
+  struct Context {
+    CPUInfo const& cpu_info;
+    // The number of chars in the longest benchmark name.
+    size_t name_field_width;
+
+    Context();
+  };
+
+  struct Run {
+    Run()
+        : error_occurred(false),
+          iterations(1),
+          time_unit(kNanosecond),
+          real_accumulated_time(0),
+          cpu_accumulated_time(0),
+          bytes_per_second(0),
+          items_per_second(0),
+          max_heapbytes_used(0),
+          complexity(oNone),
+          complexity_lambda(),
+          complexity_n(0),
+          report_big_o(false),
+          report_rms(false),
+          counters() {}
+
+    std::string benchmark_name;
+    std::string report_label;  // Empty if not set by benchmark.
+    bool error_occurred;
+    std::string error_message;
+
+    int64_t iterations;
+    TimeUnit time_unit;
+    double real_accumulated_time;
+    double cpu_accumulated_time;
+
+    // Return a value representing the real time per iteration in the unit
+    // specified by 'time_unit'.
+    // NOTE: If 'iterations' is zero the returned value represents the
+    // accumulated time.
+    double GetAdjustedRealTime() const;
+
+    // Return a value representing the cpu time per iteration in the unit
+    // specified by 'time_unit'.
+    // NOTE: If 'iterations' is zero the returned value represents the
+    // accumulated time.
+    double GetAdjustedCPUTime() const;
+
+    // Zero if not set by benchmark.
+    double bytes_per_second;
+    double items_per_second;
+
+    // This is set to 0.0 if memory tracing is not enabled.
+    double max_heapbytes_used;
+
+    // Keep track of arguments to compute asymptotic complexity
+    BigO complexity;
+    BigOFunc* complexity_lambda;
+    int complexity_n;
+
+    // what statistics to compute from the measurements
+    const std::vector<Statistics>* statistics;
+
+    // Inform print function whether the current run is a complexity report
+    bool report_big_o;
+    bool report_rms;
+
+    UserCounters counters;
+  };
+
+  // Construct a BenchmarkReporter with the output stream set to 'std::cout'
+  // and the error stream set to 'std::cerr'
+  BenchmarkReporter();
+
+  // Called once for every suite of benchmarks run.
+  // The parameter "context" contains information that the
+  // reporter may wish to use when generating its report, for example the
+  // platform under which the benchmarks are running. The benchmark run is
+  // never started if this function returns false, allowing the reporter
+  // to skip runs based on the context information.
+  virtual bool ReportContext(const Context& context) = 0;
+
+  // Called once for each group of benchmark runs, gives information about
+  // cpu-time and heap memory usage during the benchmark run. If the group
+  // of runs contained more than two entries then 'report' contains additional
+  // elements representing the mean and standard deviation of those runs.
+  // Additionally if this group of runs was the last in a family of benchmarks
+  // 'reports' contains additional entries representing the asymptotic
+  // complexity and RMS of that benchmark family.
+  virtual void ReportRuns(const std::vector<Run>& report) = 0;
+
+  // Called once and only once after ever group of benchmarks is run and
+  // reported.
+  virtual void Finalize() {}
+
+  // REQUIRES: The object referenced by 'out' is valid for the lifetime
+  // of the reporter.
+  void SetOutputStream(std::ostream* out) {
+    assert(out);
+    output_stream_ = out;
+  }
+
+  // REQUIRES: The object referenced by 'err' is valid for the lifetime
+  // of the reporter.
+  void SetErrorStream(std::ostream* err) {
+    assert(err);
+    error_stream_ = err;
+  }
+
+  std::ostream& GetOutputStream() const { return *output_stream_; }
+
+  std::ostream& GetErrorStream() const { return *error_stream_; }
+
+  virtual ~BenchmarkReporter();
+
+  // Write a human readable string to 'out' representing the specified
+  // 'context'.
+  // REQUIRES: 'out' is non-null.
+  static void PrintBasicContext(std::ostream* out, Context const& context);
+
+ private:
+  std::ostream* output_stream_;
+  std::ostream* error_stream_;
+};
+
+// Simple reporter that outputs benchmark data to the console. This is the
+// default reporter used by RunSpecifiedBenchmarks().
+class ConsoleReporter : public BenchmarkReporter {
+public:
+  enum OutputOptions {
+    OO_None = 0,
+    OO_Color = 1,
+    OO_Tabular = 2,
+    OO_ColorTabular = OO_Color|OO_Tabular,
+    OO_Defaults = OO_ColorTabular
+  };
+  explicit ConsoleReporter(OutputOptions opts_ = OO_Defaults)
+      : output_options_(opts_), name_field_width_(0),
+        prev_counters_(), printed_header_(false) {}
+
+  virtual bool ReportContext(const Context& context);
+  virtual void ReportRuns(const std::vector<Run>& reports);
+
+ protected:
+  virtual void PrintRunData(const Run& report);
+  virtual void PrintHeader(const Run& report);
+
+  OutputOptions output_options_;
+  size_t name_field_width_;
+  UserCounters prev_counters_;
+  bool printed_header_;
+};
+
+class JSONReporter : public BenchmarkReporter {
+ public:
+  JSONReporter() : first_report_(true) {}
+  virtual bool ReportContext(const Context& context);
+  virtual void ReportRuns(const std::vector<Run>& reports);
+  virtual void Finalize();
+
+ private:
+  void PrintRunData(const Run& report);
+
+  bool first_report_;
+};
+
+class CSVReporter : public BenchmarkReporter {
+ public:
+  CSVReporter() : printed_header_(false) {}
+  virtual bool ReportContext(const Context& context);
+  virtual void ReportRuns(const std::vector<Run>& reports);
+
+ private:
+  void PrintRunData(const Run& report);
+
+  bool printed_header_;
+  std::set< std::string > user_counter_names_;
+};
+
+inline const char* GetTimeUnitString(TimeUnit unit) {
+  switch (unit) {
+    case kMillisecond:
+      return "ms";
+    case kMicrosecond:
+      return "us";
+    case kNanosecond:
+    default:
+      return "ns";
+  }
+}
+
+inline double GetTimeUnitMultiplier(TimeUnit unit) {
+  switch (unit) {
+    case kMillisecond:
+      return 1e3;
+    case kMicrosecond:
+      return 1e6;
+    case kNanosecond:
+    default:
+      return 1e9;
+  }
+}
+
+} // namespace benchmark
 
 #endif  // BENCHMARK_BENCHMARK_H_
diff --git a/libcxx/utils/google-benchmark/include/benchmark/benchmark_api.h b/libcxx/utils/google-benchmark/include/benchmark/benchmark_api.h
deleted file mode 100644
index 1e853e2cd4e..00000000000
--- a/libcxx/utils/google-benchmark/include/benchmark/benchmark_api.h
+++ /dev/null
@@ -1,915 +0,0 @@
-// Support for registering benchmarks for functions.
-
-/* Example usage:
-// Define a function that executes the code to be measured a
-// specified number of times:
-static void BM_StringCreation(benchmark::State& state) {
-  while (state.KeepRunning())
-    std::string empty_string;
-}
-
-// Register the function as a benchmark
-BENCHMARK(BM_StringCreation);
-
-// Define another benchmark
-static void BM_StringCopy(benchmark::State& state) {
-  std::string x = "hello";
-  while (state.KeepRunning())
-    std::string copy(x);
-}
-BENCHMARK(BM_StringCopy);
-
-// Augment the main() program to invoke benchmarks if specified
-// via the --benchmarks command line flag.  E.g.,
-//       my_unittest --benchmark_filter=all
-//       my_unittest --benchmark_filter=BM_StringCreation
-//       my_unittest --benchmark_filter=String
-//       my_unittest --benchmark_filter='Copy|Creation'
-int main(int argc, char** argv) {
-  benchmark::Initialize(&argc, argv);
-  benchmark::RunSpecifiedBenchmarks();
-  return 0;
-}
-
-// Sometimes a family of microbenchmarks can be implemented with
-// just one routine that takes an extra argument to specify which
-// one of the family of benchmarks to run.  For example, the following
-// code defines a family of microbenchmarks for measuring the speed
-// of memcpy() calls of different lengths:
-
-static void BM_memcpy(benchmark::State& state) {
-  char* src = new char[state.range(0)]; char* dst = new char[state.range(0)];
-  memset(src, 'x', state.range(0));
-  while (state.KeepRunning())
-    memcpy(dst, src, state.range(0));
-  state.SetBytesProcessed(int64_t(state.iterations()) *
-                          int64_t(state.range(0)));
-  delete[] src; delete[] dst;
-}
-BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(8<<10);
-
-// The preceding code is quite repetitive, and can be replaced with the
-// following short-hand.  The following invocation will pick a few
-// appropriate arguments in the specified range and will generate a
-// microbenchmark for each such argument.
-BENCHMARK(BM_memcpy)->Range(8, 8<<10);
-
-// You might have a microbenchmark that depends on two inputs.  For
-// example, the following code defines a family of microbenchmarks for
-// measuring the speed of set insertion.
-static void BM_SetInsert(benchmark::State& state) {
-  while (state.KeepRunning()) {
-    state.PauseTiming();
-    set<int> data = ConstructRandomSet(state.range(0));
-    state.ResumeTiming();
-    for (int j = 0; j < state.range(1); ++j)
-      data.insert(RandomNumber());
-  }
-}
-BENCHMARK(BM_SetInsert)
-   ->Args({1<<10, 1})
-   ->Args({1<<10, 8})
-   ->Args({1<<10, 64})
-   ->Args({1<<10, 512})
-   ->Args({8<<10, 1})
-   ->Args({8<<10, 8})
-   ->Args({8<<10, 64})
-   ->Args({8<<10, 512});
-
-// The preceding code is quite repetitive, and can be replaced with
-// the following short-hand.  The following macro will pick a few
-// appropriate arguments in the product of the two specified ranges
-// and will generate a microbenchmark for each such pair.
-BENCHMARK(BM_SetInsert)->Ranges({{1<<10, 8<<10}, {1, 512}});
-
-// For more complex patterns of inputs, passing a custom function
-// to Apply allows programmatic specification of an
-// arbitrary set of arguments to run the microbenchmark on.
-// The following example enumerates a dense range on
-// one parameter, and a sparse range on the second.
-static void CustomArguments(benchmark::internal::Benchmark* b) {
-  for (int i = 0; i <= 10; ++i)
-    for (int j = 32; j <= 1024*1024; j *= 8)
-      b->Args({i, j});
-}
-BENCHMARK(BM_SetInsert)->Apply(CustomArguments);
-
-// Templated microbenchmarks work the same way:
-// Produce then consume 'size' messages 'iters' times
-// Measures throughput in the absence of multiprogramming.
-template <class Q> int BM_Sequential(benchmark::State& state) {
-  Q q;
-  typename Q::value_type v;
-  while (state.KeepRunning()) {
-    for (int i = state.range(0); i--; )
-      q.push(v);
-    for (int e = state.range(0); e--; )
-      q.Wait(&v);
-  }
-  // actually messages, not bytes:
-  state.SetBytesProcessed(
-      static_cast<int64_t>(state.iterations())*state.range(0));
-}
-BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue<int>)->Range(1<<0, 1<<10);
-
-Use `Benchmark::MinTime(double t)` to set the minimum time used to run the
-benchmark. This option overrides the `benchmark_min_time` flag.
-
-void BM_test(benchmark::State& state) {
- ... body ...
-}
-BENCHMARK(BM_test)->MinTime(2.0); // Run for at least 2 seconds.
-
-In a multithreaded test, it is guaranteed that none of the threads will start
-until all have called KeepRunning, and all will have finished before KeepRunning
-returns false. As such, any global setup or teardown you want to do can be
-wrapped in a check against the thread index:
-
-static void BM_MultiThreaded(benchmark::State& state) {
-  if (state.thread_index == 0) {
-    // Setup code here.
-  }
-  while (state.KeepRunning()) {
-    // Run the test as normal.
-  }
-  if (state.thread_index == 0) {
-    // Teardown code here.
-  }
-}
-BENCHMARK(BM_MultiThreaded)->Threads(4);
-
-
-If a benchmark runs a few milliseconds it may be hard to visually compare the
-measured times, since the output data is given in nanoseconds per default. In
-order to manually set the time unit, you can specify it manually:
-
-BENCHMARK(BM_test)->Unit(benchmark::kMillisecond);
-*/
-
-#ifndef BENCHMARK_BENCHMARK_API_H_
-#define BENCHMARK_BENCHMARK_API_H_
-
-#include <assert.h>
-#include <stddef.h>
-#include <stdint.h>
-
-#include <string>
-#include <vector>
-#include <map>
-
-#include "macros.h"
-
-#if defined(BENCHMARK_HAS_CXX11)
-#include <type_traits>
-#include <initializer_list>
-#include <utility>
-#endif
-
-#if defined(_MSC_VER)
-#include <intrin.h> // for _ReadWriteBarrier
-#endif
-
-namespace benchmark {
-class BenchmarkReporter;
-
-void Initialize(int* argc, char** argv);
-
-// Report to stdout all arguments in 'argv' as unrecognized except the first.
-// Returns true there is at least on unrecognized argument (i.e. 'argc' > 1).
-bool ReportUnrecognizedArguments(int argc, char** argv);
-
-// Generate a list of benchmarks matching the specified --benchmark_filter flag
-// and if --benchmark_list_tests is specified return after printing the name
-// of each matching benchmark. Otherwise run each matching benchmark and
-// report the results.
-//
-// The second and third overload use the specified 'console_reporter' and
-//  'file_reporter' respectively. 'file_reporter' will write to the file
-//  specified
-//   by '--benchmark_output'. If '--benchmark_output' is not given the
-//  'file_reporter' is ignored.
-//
-// RETURNS: The number of matching benchmarks.
-size_t RunSpecifiedBenchmarks();
-size_t RunSpecifiedBenchmarks(BenchmarkReporter* console_reporter);
-size_t RunSpecifiedBenchmarks(BenchmarkReporter* console_reporter,
-                              BenchmarkReporter* file_reporter);
-
-// If this routine is called, peak memory allocation past this point in the
-// benchmark is reported at the end of the benchmark report line. (It is
-// computed by running the benchmark once with a single iteration and a memory
-// tracer.)
-// TODO(dominic)
-// void MemoryUsage();
-
-namespace internal {
-class Benchmark;
-class BenchmarkImp;
-class BenchmarkFamilies;
-
-void UseCharPointer(char const volatile*);
-
-// Take ownership of the pointer and register the benchmark. Return the
-// registered benchmark.
-Benchmark* RegisterBenchmarkInternal(Benchmark*);
-
-// Ensure that the standard streams are properly initialized in every TU.
-int InitializeStreams();
-BENCHMARK_UNUSED static int stream_init_anchor = InitializeStreams();
-
-}  // end namespace internal
-
-
-#if !defined(__GNUC__) || defined(__pnacl__) || defined(EMSCRIPTN)
-# define BENCHMARK_HAS_NO_INLINE_ASSEMBLY
-#endif
-
-// The DoNotOptimize(...) function can be used to prevent a value or
-// expression from being optimized away by the compiler. This function is
-// intended to add little to no overhead.
-// See: https://youtu.be/nXaxk27zwlk?t=2441
-#ifndef BENCHMARK_HAS_NO_INLINE_ASSEMBLY
-template <class Tp>
-inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
-  asm volatile("" : : "g"(value) : "memory");
-}
-// Force the compiler to flush pending writes to global memory. Acts as an
-// effective read/write barrier
-inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() {
-  asm volatile("" : : : "memory");
-}
-#elif defined(_MSC_VER)
-template <class Tp>
-inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
-  internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
-  _ReadWriteBarrier();
-}
-
-inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() {
-  _ReadWriteBarrier();
-}
-#else
-template <class Tp>
-inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
-  internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
-}
-// FIXME Add ClobberMemory() for non-gnu and non-msvc compilers
-#endif
-
-
-
-// This class is used for user-defined counters.
-class Counter {
-public:
-
-  enum Flags {
-    kDefaults   = 0,
-    // Mark the counter as a rate. It will be presented divided
-    // by the duration of the benchmark.
-    kIsRate     = 1,
-    // Mark the counter as a thread-average quantity. It will be
-    // presented divided by the number of threads.
-    kAvgThreads = 2,
-    // Mark the counter as a thread-average rate. See above.
-    kAvgThreadsRate = kIsRate|kAvgThreads
-  };
-
-  double value;
-  Flags  flags;
-
-  BENCHMARK_ALWAYS_INLINE
-  Counter(double v = 0., Flags f = kDefaults) : value(v), flags(f) {}
-
-  BENCHMARK_ALWAYS_INLINE operator double const& () const { return value; }
-  BENCHMARK_ALWAYS_INLINE operator double      & ()       { return value; }
-
-};
-
-// This is the container for the user-defined counters.
-typedef std::map<std::string, Counter> UserCounters;
-
-
-// TimeUnit is passed to a benchmark in order to specify the order of magnitude
-// for the measured time.
-enum TimeUnit { kNanosecond, kMicrosecond, kMillisecond };
-
-// BigO is passed to a benchmark in order to specify the asymptotic
-// computational
-// complexity for the benchmark. In case oAuto is selected, complexity will be
-// calculated automatically to the best fit.
-enum BigO { oNone, o1, oN, oNSquared, oNCubed, oLogN, oNLogN, oAuto, oLambda };
-
-// BigOFunc is passed to a benchmark in order to specify the asymptotic
-// computational complexity for the benchmark.
-typedef double(BigOFunc)(int);
-
-namespace internal {
-class ThreadTimer;
-class ThreadManager;
-
-#if defined(BENCHMARK_HAS_CXX11)
-enum ReportMode : unsigned {
-#else
-enum ReportMode {
-#endif
-  RM_Unspecified,  // The mode has not been manually specified
-  RM_Default,      // The mode is user-specified as default.
-  RM_ReportAggregatesOnly
-};
-}
-
-// State is passed to a running Benchmark and contains state for the
-// benchmark to use.
-class State {
- public:
-  // Returns true if the benchmark should continue through another iteration.
-  // NOTE: A benchmark may not return from the test until KeepRunning() has
-  // returned false.
-  bool KeepRunning() {
-    if (BENCHMARK_BUILTIN_EXPECT(!started_, false)) {
-      StartKeepRunning();
-    }
-    bool const res = total_iterations_++ < max_iterations;
-    if (BENCHMARK_BUILTIN_EXPECT(!res, false)) {
-      FinishKeepRunning();
-    }
-    return res;
-  }
-
-  // REQUIRES: timer is running and 'SkipWithError(...)' has not been called
-  //           by the current thread.
-  // Stop the benchmark timer.  If not called, the timer will be
-  // automatically stopped after KeepRunning() returns false for the first time.
-  //
-  // For threaded benchmarks the PauseTiming() function only pauses the timing
-  // for the current thread.
-  //
-  // NOTE: The "real time" measurement is per-thread. If different threads
-  // report different measurements the largest one is reported.
-  //
-  // NOTE: PauseTiming()/ResumeTiming() are relatively
-  // heavyweight, and so their use should generally be avoided
-  // within each benchmark iteration, if possible.
-  void PauseTiming();
-
-  // REQUIRES: timer is not running and 'SkipWithError(...)' has not been called
-  //           by the current thread.
-  // Start the benchmark timer.  The timer is NOT running on entrance to the
-  // benchmark function. It begins running after the first call to KeepRunning()
-  //
-  // NOTE: PauseTiming()/ResumeTiming() are relatively
-  // heavyweight, and so their use should generally be avoided
-  // within each benchmark iteration, if possible.
-  void ResumeTiming();
-
-  // REQUIRES: 'SkipWithError(...)' has not been called previously by the
-  //            current thread.
-  // Skip any future iterations of the 'KeepRunning()' loop in the current
-  // thread and report an error with the specified 'msg'. After this call
-  // the user may explicitly 'return' from the benchmark.
-  //
-  // For threaded benchmarks only the current thread stops executing and future
-  // calls to `KeepRunning()` will block until all threads have completed
-  // the `KeepRunning()` loop. If multiple threads report an error only the
-  // first error message is used.
-  //
-  // NOTE: Calling 'SkipWithError(...)' does not cause the benchmark to exit
-  // the current scope immediately. If the function is called from within
-  // the 'KeepRunning()' loop the current iteration will finish. It is the users
-  // responsibility to exit the scope as needed.
-  void SkipWithError(const char* msg);
-
-  // REQUIRES: called exactly once per iteration of the KeepRunning loop.
-  // Set the manually measured time for this benchmark iteration, which
-  // is used instead of automatically measured time if UseManualTime() was
-  // specified.
-  //
-  // For threaded benchmarks the final value will be set to the largest
-  // reported values.
-  void SetIterationTime(double seconds);
-
-  // Set the number of bytes processed by the current benchmark
-  // execution.  This routine is typically called once at the end of a
-  // throughput oriented benchmark.  If this routine is called with a
-  // value > 0, the report is printed in MB/sec instead of nanoseconds
-  // per iteration.
-  //
-  // REQUIRES: a benchmark has exited its KeepRunning loop.
-  BENCHMARK_ALWAYS_INLINE
-  void SetBytesProcessed(size_t bytes) { bytes_processed_ = bytes; }
-
-  BENCHMARK_ALWAYS_INLINE
-  size_t bytes_processed() const { return bytes_processed_; }
-
-  // If this routine is called with complexity_n > 0 and complexity report is
-  // requested for the
-  // family benchmark, then current benchmark will be part of the computation
-  // and complexity_n will
-  // represent the length of N.
-  BENCHMARK_ALWAYS_INLINE
-  void SetComplexityN(int complexity_n) { complexity_n_ = complexity_n; }
-
-  BENCHMARK_ALWAYS_INLINE
-  int complexity_length_n() { return complexity_n_; }
-
-  // If this routine is called with items > 0, then an items/s
-  // label is printed on the benchmark report line for the currently
-  // executing benchmark. It is typically called at the end of a processing
-  // benchmark where a processing items/second output is desired.
-  //
-  // REQUIRES: a benchmark has exited its KeepRunning loop.
-  BENCHMARK_ALWAYS_INLINE
-  void SetItemsProcessed(size_t items) { items_processed_ = items; }
-
-  BENCHMARK_ALWAYS_INLINE
-  size_t items_processed() const { return items_processed_; }
-
-  // If this routine is called, the specified label is printed at the
-  // end of the benchmark report line for the currently executing
-  // benchmark.  Example:
-  //  static void BM_Compress(benchmark::State& state) {
-  //    ...
-  //    double compress = input_size / output_size;
-  //    state.SetLabel(StringPrintf("compress:%.1f%%", 100.0*compression));
-  //  }
-  // Produces output that looks like:
-  //  BM_Compress   50         50   14115038  compress:27.3%
-  //
-  // REQUIRES: a benchmark has exited its KeepRunning loop.
-  void SetLabel(const char* label);
-
-  void BENCHMARK_ALWAYS_INLINE SetLabel(const std::string& str) {
-    this->SetLabel(str.c_str());
-  }
-
-  // Range arguments for this run. CHECKs if the argument has been set.
-  BENCHMARK_ALWAYS_INLINE
-  int range(std::size_t pos = 0) const {
-    assert(range_.size() > pos);
-    return range_[pos];
-  }
-
-  BENCHMARK_DEPRECATED_MSG("use 'range(0)' instead")
-  int range_x() const { return range(0); }
-
-  BENCHMARK_DEPRECATED_MSG("use 'range(1)' instead")
-  int range_y() const { return range(1); }
-
-  BENCHMARK_ALWAYS_INLINE
-  size_t iterations() const { return total_iterations_; }
-
- private:
-  bool started_;
-  bool finished_;
-  size_t total_iterations_;
-
-  std::vector<int> range_;
-
-  size_t bytes_processed_;
-  size_t items_processed_;
-
-  int complexity_n_;
-
-  bool error_occurred_;
-
- public:
-  // Container for user-defined counters.
-  UserCounters counters;
-  // Index of the executing thread. Values from [0, threads).
-  const int thread_index;
-  // Number of threads concurrently executing the benchmark.
-  const int threads;
-  const size_t max_iterations;
-
-  // TODO make me private
-  State(size_t max_iters, const std::vector<int>& ranges, int thread_i,
-        int n_threads, internal::ThreadTimer* timer,
-        internal::ThreadManager* manager);
-
- private:
-  void StartKeepRunning();
-  void FinishKeepRunning();
-  internal::ThreadTimer* timer_;
-  internal::ThreadManager* manager_;
-  BENCHMARK_DISALLOW_COPY_AND_ASSIGN(State);
-};
-
-namespace internal {
-
-typedef void(Function)(State&);
-
-// ------------------------------------------------------
-// Benchmark registration object.  The BENCHMARK() macro expands
-// into an internal::Benchmark* object.  Various methods can
-// be called on this object to change the properties of the benchmark.
-// Each method returns "this" so that multiple method calls can
-// chained into one expression.
-class Benchmark {
- public:
-  virtual ~Benchmark();
-
-  // Note: the following methods all return "this" so that multiple
-  // method calls can be chained together in one expression.
-
-  // Run this benchmark once with "x" as the extra argument passed
-  // to the function.
-  // REQUIRES: The function passed to the constructor must accept an arg1.
-  Benchmark* Arg(int x);
-
-  // Run this benchmark with the given time unit for the generated output report
-  Benchmark* Unit(TimeUnit unit);
-
-  // Run this benchmark once for a number of values picked from the
-  // range [start..limit].  (start and limit are always picked.)
-  // REQUIRES: The function passed to the constructor must accept an arg1.
-  Benchmark* Range(int start, int limit);
-
-  // Run this benchmark once for all values in the range [start..limit] with
-  // specific step
-  // REQUIRES: The function passed to the constructor must accept an arg1.
-  Benchmark* DenseRange(int start, int limit, int step = 1);
-
-  // Run this benchmark once with "args" as the extra arguments passed
-  // to the function.
-  // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
-  Benchmark* Args(const std::vector<int>& args);
-
-  // Equivalent to Args({x, y})
-  // NOTE: This is a legacy C++03 interface provided for compatibility only.
-  //   New code should use 'Args'.
-  Benchmark* ArgPair(int x, int y) {
-    std::vector<int> args;
-    args.push_back(x);
-    args.push_back(y);
-    return Args(args);
-  }
-
-  // Run this benchmark once for a number of values picked from the
-  // ranges [start..limit].  (starts and limits are always picked.)
-  // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
-  Benchmark* Ranges(const std::vector<std::pair<int, int> >& ranges);
-
-  // Equivalent to ArgNames({name})
-  Benchmark* ArgName(const std::string& name);
-
-  // Set the argument names to display in the benchmark name. If not called,
-  // only argument values will be shown.
-  Benchmark* ArgNames(const std::vector<std::string>& names);
-
-  // Equivalent to Ranges({{lo1, hi1}, {lo2, hi2}}).
-  // NOTE: This is a legacy C++03 interface provided for compatibility only.
-  //   New code should use 'Ranges'.
-  Benchmark* RangePair(int lo1, int hi1, int lo2, int hi2) {
-    std::vector<std::pair<int, int> > ranges;
-    ranges.push_back(std::make_pair(lo1, hi1));
-    ranges.push_back(std::make_pair(lo2, hi2));
-    return Ranges(ranges);
-  }
-
-  // Pass this benchmark object to *func, which can customize
-  // the benchmark by calling various methods like Arg, Args,
-  // Threads, etc.
-  Benchmark* Apply(void (*func)(Benchmark* benchmark));
-
-  // Set the range multiplier for non-dense range. If not called, the range
-  // multiplier kRangeMultiplier will be used.
-  Benchmark* RangeMultiplier(int multiplier);
-
-  // Set the minimum amount of time to use when running this benchmark. This
-  // option overrides the `benchmark_min_time` flag.
-  // REQUIRES: `t > 0` and `Iterations` has not been called on this benchmark.
-  Benchmark* MinTime(double t);
-
-  // Specify the amount of iterations that should be run by this benchmark.
-  // REQUIRES: 'n > 0' and `MinTime` has not been called on this benchmark.
-  //
-  // NOTE: This function should only be used when *exact* iteration control is
-  //   needed and never to control or limit how long a benchmark runs, where
-  // `--benchmark_min_time=N` or `MinTime(...)` should be used instead.
-  Benchmark* Iterations(size_t n);
-
-  // Specify the amount of times to repeat this benchmark. This option overrides
-  // the `benchmark_repetitions` flag.
-  // REQUIRES: `n > 0`
-  Benchmark* Repetitions(int n);
-
-  // Specify if each repetition of the benchmark should be reported separately
-  // or if only the final statistics should be reported. If the benchmark
-  // is not repeated then the single result is always reported.
-  Benchmark* ReportAggregatesOnly(bool v = true);
-
-  // If a particular benchmark is I/O bound, runs multiple threads internally or
-  // if for some reason CPU timings are not representative, call this method. If
-  // called, the elapsed time will be used to control how many iterations are
-  // run, and in the printing of items/second or MB/seconds values.  If not
-  // called, the cpu time used by the benchmark will be used.
-  Benchmark* UseRealTime();
-
-  // If a benchmark must measure time manually (e.g. if GPU execution time is
-  // being
-  // measured), call this method. If called, each benchmark iteration should
-  // call
-  // SetIterationTime(seconds) to report the measured time, which will be used
-  // to control how many iterations are run, and in the printing of items/second
-  // or MB/second values.
-  Benchmark* UseManualTime();
-
-  // Set the asymptotic computational complexity for the benchmark. If called
-  // the asymptotic computational complexity will be shown on the output.
-  Benchmark* Complexity(BigO complexity = benchmark::oAuto);
-
-  // Set the asymptotic computational complexity for the benchmark. If called
-  // the asymptotic computational complexity will be shown on the output.
-  Benchmark* Complexity(BigOFunc* complexity);
-
-  // Support for running multiple copies of the same benchmark concurrently
-  // in multiple threads.  This may be useful when measuring the scaling
-  // of some piece of code.
-
-  // Run one instance of this benchmark concurrently in t threads.
-  Benchmark* Threads(int t);
-
-  // Pick a set of values T from [min_threads,max_threads].
-  // min_threads and max_threads are always included in T.  Run this
-  // benchmark once for each value in T.  The benchmark run for a
-  // particular value t consists of t threads running the benchmark
-  // function concurrently.  For example, consider:
-  //    BENCHMARK(Foo)->ThreadRange(1,16);
-  // This will run the following benchmarks:
-  //    Foo in 1 thread
-  //    Foo in 2 threads
-  //    Foo in 4 threads
-  //    Foo in 8 threads
-  //    Foo in 16 threads
-  Benchmark* ThreadRange(int min_threads, int max_threads);
-
-  // For each value n in the range, run this benchmark once using n threads.
-  // min_threads and max_threads are always included in the range.
-  // stride specifies the increment. E.g. DenseThreadRange(1, 8, 3) starts
-  // a benchmark with 1, 4, 7 and 8 threads.
-  Benchmark* DenseThreadRange(int min_threads, int max_threads, int stride = 1);
-
-  // Equivalent to ThreadRange(NumCPUs(), NumCPUs())
-  Benchmark* ThreadPerCpu();
-
-  virtual void Run(State& state) = 0;
-
-  // Used inside the benchmark implementation
-  struct Instance;
-
- protected:
-  explicit Benchmark(const char* name);
-  Benchmark(Benchmark const&);
-  void SetName(const char* name);
-
-  int ArgsCnt() const;
-
-  static void AddRange(std::vector<int>* dst, int lo, int hi, int mult);
-
- private:
-  friend class BenchmarkFamilies;
-
-  std::string name_;
-  ReportMode report_mode_;
-  std::vector<std::string> arg_names_;   // Args for all benchmark runs
-  std::vector<std::vector<int> > args_;  // Args for all benchmark runs
-  TimeUnit time_unit_;
-  int range_multiplier_;
-  double min_time_;
-  size_t iterations_;
-  int repetitions_;
-  bool use_real_time_;
-  bool use_manual_time_;
-  BigO complexity_;
-  BigOFunc* complexity_lambda_;
-  std::vector<int> thread_counts_;
-
-  Benchmark& operator=(Benchmark const&);
-};
-
-}  // namespace internal
-
-// Create and register a benchmark with the specified 'name' that invokes
-// the specified functor 'fn'.
-//
-// RETURNS: A pointer to the registered benchmark.
-internal::Benchmark* RegisterBenchmark(const char* name,
-                                       internal::Function* fn);
-
-#if defined(BENCHMARK_HAS_CXX11)
-template <class Lambda>
-internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn);
-#endif
-
-namespace internal {
-// The class used to hold all Benchmarks created from static function.
-// (ie those created using the BENCHMARK(...) macros.
-class FunctionBenchmark : public Benchmark {
- public:
-  FunctionBenchmark(const char* name, Function* func)
-      : Benchmark(name), func_(func) {}
-
-  virtual void Run(State& st);
-
- private:
-  Function* func_;
-};
-
-#ifdef BENCHMARK_HAS_CXX11
-template <class Lambda>
-class LambdaBenchmark : public Benchmark {
- public:
-  virtual void Run(State& st) { lambda_(st); }
-
- private:
-  template <class OLambda>
-  LambdaBenchmark(const char* name, OLambda&& lam)
-      : Benchmark(name), lambda_(std::forward<OLambda>(lam)) {}
-
-  LambdaBenchmark(LambdaBenchmark const&) = delete;
-
- private:
-  template <class Lam>
-  friend Benchmark* ::benchmark::RegisterBenchmark(const char*, Lam&&);
-
-  Lambda lambda_;
-};
-#endif
-
-}  // end namespace internal
-
-inline internal::Benchmark* RegisterBenchmark(const char* name,
-                                              internal::Function* fn) {
-  return internal::RegisterBenchmarkInternal(
-      ::new internal::FunctionBenchmark(name, fn));
-}
-
-#ifdef BENCHMARK_HAS_CXX11
-template <class Lambda>
-internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn) {
-  using BenchType =
-      internal::LambdaBenchmark<typename std::decay<Lambda>::type>;
-  return internal::RegisterBenchmarkInternal(
-      ::new BenchType(name, std::forward<Lambda>(fn)));
-}
-#endif
-
-#if defined(BENCHMARK_HAS_CXX11) && \
-    (!defined(BENCHMARK_GCC_VERSION) || BENCHMARK_GCC_VERSION >= 409)
-template <class Lambda, class... Args>
-internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn,
-                                       Args&&... args) {
-  return benchmark::RegisterBenchmark(
-      name, [=](benchmark::State& st) { fn(st, args...); });
-}
-#else
-#define BENCHMARK_HAS_NO_VARIADIC_REGISTER_BENCHMARK
-#endif
-
-// The base class for all fixture tests.
-class Fixture : public internal::Benchmark {
- public:
-  Fixture() : internal::Benchmark("") {}
-
-  virtual void Run(State& st) {
-    this->SetUp(st);
-    this->BenchmarkCase(st);
-    this->TearDown(st);
-  }
-
-  // These will be deprecated ...
-  virtual void SetUp(const State&) {}
-  virtual void TearDown(const State&) {}
-  // ... In favor of these.
-  virtual void SetUp(State& st) { SetUp(const_cast<const State&>(st)); }
-  virtual void TearDown(State& st) { TearDown(const_cast<const State&>(st)); }
-
- protected:
-  virtual void BenchmarkCase(State&) = 0;
-};
-
-}  // end namespace benchmark
-
-// ------------------------------------------------------
-// Macro to register benchmarks
-
-// Check that __COUNTER__ is defined and that __COUNTER__ increases by 1
-// every time it is expanded. X + 1 == X + 0 is used in case X is defined to be
-// empty. If X is empty the expression becomes (+1 == +0).
-#if defined(__COUNTER__) && (__COUNTER__ + 1 == __COUNTER__ + 0)
-#define BENCHMARK_PRIVATE_UNIQUE_ID __COUNTER__
-#else
-#define BENCHMARK_PRIVATE_UNIQUE_ID __LINE__
-#endif
-
-// Helpers for generating unique variable names
-#define BENCHMARK_PRIVATE_NAME(n) \
-  BENCHMARK_PRIVATE_CONCAT(_benchmark_, BENCHMARK_PRIVATE_UNIQUE_ID, n)
-#define BENCHMARK_PRIVATE_CONCAT(a, b, c) BENCHMARK_PRIVATE_CONCAT2(a, b, c)
-#define BENCHMARK_PRIVATE_CONCAT2(a, b, c) a##b##c
-
-#define BENCHMARK_PRIVATE_DECLARE(n)                                 \
-  static ::benchmark::internal::Benchmark* BENCHMARK_PRIVATE_NAME(n) \
-      BENCHMARK_UNUSED
-
-#define BENCHMARK(n)                                     \
-  BENCHMARK_PRIVATE_DECLARE(n) =                         \
-      (::benchmark::internal::RegisterBenchmarkInternal( \
-          new ::benchmark::internal::FunctionBenchmark(#n, n)))
-
-// Old-style macros
-#define BENCHMARK_WITH_ARG(n, a) BENCHMARK(n)->Arg((a))
-#define BENCHMARK_WITH_ARG2(n, a1, a2) BENCHMARK(n)->Args({(a1), (a2)})
-#define BENCHMARK_WITH_UNIT(n, t) BENCHMARK(n)->Unit((t))
-#define BENCHMARK_RANGE(n, lo, hi) BENCHMARK(n)->Range((lo), (hi))
-#define BENCHMARK_RANGE2(n, l1, h1, l2, h2) \
-  BENCHMARK(n)->RangePair({{(l1), (h1)}, {(l2), (h2)}})
-
-#if __cplusplus >= 201103L
-
-// Register a benchmark which invokes the function specified by `func`
-// with the additional arguments specified by `...`.
-//
-// For example:
-//
-// template <class ...ExtraArgs>`
-// void BM_takes_args(benchmark::State& state, ExtraArgs&&... extra_args) {
-//  [...]
-//}
-// /* Registers a benchmark named "BM_takes_args/int_string_test` */
-// BENCHMARK_CAPTURE(BM_takes_args, int_string_test, 42, std::string("abc"));
-#define BENCHMARK_CAPTURE(func, test_case_name, ...)     \
-  BENCHMARK_PRIVATE_DECLARE(func) =                      \
-      (::benchmark::internal::RegisterBenchmarkInternal( \
-          new ::benchmark::internal::FunctionBenchmark(  \
-              #func "/" #test_case_name,                 \
-              [](::benchmark::State& st) { func(st, __VA_ARGS__); })))
-
-#endif  // __cplusplus >= 11
-
-// This will register a benchmark for a templatized function.  For example:
-//
-// template<int arg>
-// void BM_Foo(int iters);
-//
-// BENCHMARK_TEMPLATE(BM_Foo, 1);
-//
-// will register BM_Foo<1> as a benchmark.
-#define BENCHMARK_TEMPLATE1(n, a)                        \
-  BENCHMARK_PRIVATE_DECLARE(n) =                         \
-      (::benchmark::internal::RegisterBenchmarkInternal( \
-          new ::benchmark::internal::FunctionBenchmark(#n "<" #a ">", n<a>)))
-
-#define BENCHMARK_TEMPLATE2(n, a, b)                                         \
-  BENCHMARK_PRIVATE_DECLARE(n) =                                             \
-      (::benchmark::internal::RegisterBenchmarkInternal(                     \
-          new ::benchmark::internal::FunctionBenchmark(#n "<" #a "," #b ">", \
-                                                       n<a, b>)))
-
-#if __cplusplus >= 201103L
-#define BENCHMARK_TEMPLATE(n, ...)                       \
-  BENCHMARK_PRIVATE_DECLARE(n) =                         \
-      (::benchmark::internal::RegisterBenchmarkInternal( \
-          new ::benchmark::internal::FunctionBenchmark(  \
-              #n "<" #__VA_ARGS__ ">", n<__VA_ARGS__>)))
-#else
-#define BENCHMARK_TEMPLATE(n, a) BENCHMARK_TEMPLATE1(n, a)
-#endif
-
-#define BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method)        \
-  class BaseClass##_##Method##_Benchmark : public BaseClass { \
-   public:                                                    \
-    BaseClass##_##Method##_Benchmark() : BaseClass() {        \
-      this->SetName(#BaseClass "/" #Method);                  \
-    }                                                         \
-                                                              \
-   protected:                                                 \
-    virtual void BenchmarkCase(::benchmark::State&);          \
-  };
-
-#define BENCHMARK_DEFINE_F(BaseClass, Method)    \
-  BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
-  void BaseClass##_##Method##_Benchmark::BenchmarkCase
-
-#define BENCHMARK_REGISTER_F(BaseClass, Method) \
-  BENCHMARK_PRIVATE_REGISTER_F(BaseClass##_##Method##_Benchmark)
-
-#define BENCHMARK_PRIVATE_REGISTER_F(TestName) \
-  BENCHMARK_PRIVATE_DECLARE(TestName) =        \
-      (::benchmark::internal::RegisterBenchmarkInternal(new TestName()))
-
-// This macro will define and register a benchmark within a fixture class.
-#define BENCHMARK_F(BaseClass, Method)           \
-  BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
-  BENCHMARK_REGISTER_F(BaseClass, Method);       \
-  void BaseClass##_##Method##_Benchmark::BenchmarkCase
-
-// Helper macro to create a main routine in a test that runs the benchmarks
-#define BENCHMARK_MAIN()                   \
-  int main(int argc, char** argv) {        \
-    ::benchmark::Initialize(&argc, argv);  \
-    if (::benchmark::ReportUnrecognizedArguments(argc, argv)) return 1; \
-    ::benchmark::RunSpecifiedBenchmarks(); \
-  }
-
-#endif  // BENCHMARK_BENCHMARK_API_H_
diff --git a/libcxx/utils/google-benchmark/include/benchmark/macros.h b/libcxx/utils/google-benchmark/include/benchmark/macros.h
deleted file mode 100644
index 2466fd3fad0..00000000000
--- a/libcxx/utils/google-benchmark/include/benchmark/macros.h
+++ /dev/null
@@ -1,66 +0,0 @@
-// Copyright 2015 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#ifndef BENCHMARK_MACROS_H_
-#define BENCHMARK_MACROS_H_
-
-#if __cplusplus >= 201103L
-#define BENCHMARK_HAS_CXX11
-#endif
-
-#ifndef BENCHMARK_HAS_CXX11
-#define BENCHMARK_DISALLOW_COPY_AND_ASSIGN(TypeName) \
-  TypeName(const TypeName&);                         \
-  TypeName& operator=(const TypeName&)
-#else
-#define BENCHMARK_DISALLOW_COPY_AND_ASSIGN(TypeName) \
-  TypeName(const TypeName&) = delete;                \
-  TypeName& operator=(const TypeName&) = delete
-#endif
-
-#if defined(__GNUC__)
-#define BENCHMARK_UNUSED __attribute__((unused))
-#define BENCHMARK_ALWAYS_INLINE __attribute__((always_inline))
-#define BENCHMARK_NOEXCEPT noexcept
-#define BENCHMARK_NOEXCEPT_OP(x) noexcept(x)
-#elif defined(_MSC_VER) && !defined(__clang__)
-#define BENCHMARK_UNUSED
-#define BENCHMARK_ALWAYS_INLINE __forceinline
-#if _MSC_VER >= 1900
-#define BENCHMARK_NOEXCEPT noexcept
-#define BENCHMARK_NOEXCEPT_OP(x) noexcept(x)
-#else
-#define BENCHMARK_NOEXCEPT
-#define BENCHMARK_NOEXCEPT_OP(x)
-#endif
-#define __func__ __FUNCTION__
-#else
-#define BENCHMARK_UNUSED
-#define BENCHMARK_ALWAYS_INLINE
-#define BENCHMARK_NOEXCEPT
-#define BENCHMARK_NOEXCEPT_OP(x)
-#endif
-
-#if defined(__GNUC__)
-#define BENCHMARK_BUILTIN_EXPECT(x, y) __builtin_expect(x, y)
-#define BENCHMARK_DEPRECATED_MSG(msg) __attribute__((deprecated(msg)))
-#else
-#define BENCHMARK_BUILTIN_EXPECT(x, y) x
-#define BENCHMARK_DEPRECATED_MSG(msg)
-#endif
-
-#if defined(__GNUC__) && !defined(__clang__)
-#define BENCHMARK_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
-#endif
-
-#endif  // BENCHMARK_MACROS_H_
diff --git a/libcxx/utils/google-benchmark/include/benchmark/reporter.h b/libcxx/utils/google-benchmark/include/benchmark/reporter.h
deleted file mode 100644
index 789124ba813..00000000000
--- a/libcxx/utils/google-benchmark/include/benchmark/reporter.h
+++ /dev/null
@@ -1,230 +0,0 @@
-// Copyright 2015 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#ifndef BENCHMARK_REPORTER_H_
-#define BENCHMARK_REPORTER_H_
-
-#include <cassert>
-#include <iosfwd>
-#include <string>
-#include <utility>
-#include <vector>
-#include <set>
-
-#include "benchmark_api.h"  // For forward declaration of BenchmarkReporter
-
-namespace benchmark {
-
-// Interface for custom benchmark result printers.
-// By default, benchmark reports are printed to stdout. However an application
-// can control the destination of the reports by calling
-// RunSpecifiedBenchmarks and passing it a custom reporter object.
-// The reporter object must implement the following interface.
-class BenchmarkReporter {
- public:
-  struct Context {
-    int num_cpus;
-    double mhz_per_cpu;
-    bool cpu_scaling_enabled;
-
-    // The number of chars in the longest benchmark name.
-    size_t name_field_width;
-  };
-
-  struct Run {
-    Run()
-        : error_occurred(false),
-          iterations(1),
-          time_unit(kNanosecond),
-          real_accumulated_time(0),
-          cpu_accumulated_time(0),
-          bytes_per_second(0),
-          items_per_second(0),
-          max_heapbytes_used(0),
-          complexity(oNone),
-          complexity_lambda(),
-          complexity_n(0),
-          report_big_o(false),
-          report_rms(false),
-          counters() {}
-
-    std::string benchmark_name;
-    std::string report_label;  // Empty if not set by benchmark.
-    bool error_occurred;
-    std::string error_message;
-
-    int64_t iterations;
-    TimeUnit time_unit;
-    double real_accumulated_time;
-    double cpu_accumulated_time;
-
-    // Return a value representing the real time per iteration in the unit
-    // specified by 'time_unit'.
-    // NOTE: If 'iterations' is zero the returned value represents the
-    // accumulated time.
-    double GetAdjustedRealTime() const;
-
-    // Return a value representing the cpu time per iteration in the unit
-    // specified by 'time_unit'.
-    // NOTE: If 'iterations' is zero the returned value represents the
-    // accumulated time.
-    double GetAdjustedCPUTime() const;
-
-    // Zero if not set by benchmark.
-    double bytes_per_second;
-    double items_per_second;
-
-    // This is set to 0.0 if memory tracing is not enabled.
-    double max_heapbytes_used;
-
-    // Keep track of arguments to compute asymptotic complexity
-    BigO complexity;
-    BigOFunc* complexity_lambda;
-    int complexity_n;
-
-    // Inform print function whether the current run is a complexity report
-    bool report_big_o;
-    bool report_rms;
-
-    UserCounters counters;
-  };
-
-  // Construct a BenchmarkReporter with the output stream set to 'std::cout'
-  // and the error stream set to 'std::cerr'
-  BenchmarkReporter();
-
-  // Called once for every suite of benchmarks run.
-  // The parameter "context" contains information that the
-  // reporter may wish to use when generating its report, for example the
-  // platform under which the benchmarks are running. The benchmark run is
-  // never started if this function returns false, allowing the reporter
-  // to skip runs based on the context information.
-  virtual bool ReportContext(const Context& context) = 0;
-
-  // Called once for each group of benchmark runs, gives information about
-  // cpu-time and heap memory usage during the benchmark run. If the group
-  // of runs contained more than two entries then 'report' contains additional
-  // elements representing the mean and standard deviation of those runs.
-  // Additionally if this group of runs was the last in a family of benchmarks
-  // 'reports' contains additional entries representing the asymptotic
-  // complexity and RMS of that benchmark family.
-  virtual void ReportRuns(const std::vector<Run>& report) = 0;
-
-  // Called once and only once after ever group of benchmarks is run and
-  // reported.
-  virtual void Finalize() {}
-
-  // REQUIRES: The object referenced by 'out' is valid for the lifetime
-  // of the reporter.
-  void SetOutputStream(std::ostream* out) {
-    assert(out);
-    output_stream_ = out;
-  }
-
-  // REQUIRES: The object referenced by 'err' is valid for the lifetime
-  // of the reporter.
-  void SetErrorStream(std::ostream* err) {
-    assert(err);
-    error_stream_ = err;
-  }
-
-  std::ostream& GetOutputStream() const { return *output_stream_; }
-
-  std::ostream& GetErrorStream() const { return *error_stream_; }
-
-  virtual ~BenchmarkReporter();
-
-  // Write a human readable string to 'out' representing the specified
-  // 'context'.
-  // REQUIRES: 'out' is non-null.
-  static void PrintBasicContext(std::ostream* out, Context const& context);
-
- private:
-  std::ostream* output_stream_;
-  std::ostream* error_stream_;
-};
-
-// Simple reporter that outputs benchmark data to the console. This is the
-// default reporter used by RunSpecifiedBenchmarks().
-class ConsoleReporter : public BenchmarkReporter {
- public:
-  enum OutputOptions { OO_None, OO_Color };
-  explicit ConsoleReporter(OutputOptions color_output = OO_Color)
-      : name_field_width_(0), color_output_(color_output == OO_Color) {}
-
-  virtual bool ReportContext(const Context& context);
-  virtual void ReportRuns(const std::vector<Run>& reports);
-
- protected:
-  virtual void PrintRunData(const Run& report);
-  virtual void PrintHeader(const Run& report);
-
-  size_t name_field_width_;
-  bool printed_header_;
-
- private:
-  bool color_output_;
-};
-
-class JSONReporter : public BenchmarkReporter {
- public:
-  JSONReporter() : first_report_(true) {}
-  virtual bool ReportContext(const Context& context);
-  virtual void ReportRuns(const std::vector<Run>& reports);
-  virtual void Finalize();
-
- private:
-  void PrintRunData(const Run& report);
-
-  bool first_report_;
-};
-
-class CSVReporter : public BenchmarkReporter {
- public:
-  CSVReporter() : printed_header_(false) {}
-  virtual bool ReportContext(const Context& context);
-  virtual void ReportRuns(const std::vector<Run>& reports);
-
- private:
-  void PrintRunData(const Run& report);
-
-  bool printed_header_;
-  std::set< std::string > user_counter_names_;
-};
-
-inline const char* GetTimeUnitString(TimeUnit unit) {
-  switch (unit) {
-    case kMillisecond:
-      return "ms";
-    case kMicrosecond:
-      return "us";
-    case kNanosecond:
-    default:
-      return "ns";
-  }
-}
-
-inline double GetTimeUnitMultiplier(TimeUnit unit) {
-  switch (unit) {
-    case kMillisecond:
-      return 1e3;
-    case kMicrosecond:
-      return 1e6;
-    case kNanosecond:
-    default:
-      return 1e9;
-  }
-}
-
-}  // end namespace benchmark
-#endif  // BENCHMARK_REPORTER_H_
diff --git a/libcxx/utils/google-benchmark/mingw.py b/libcxx/utils/google-benchmark/mingw.py
deleted file mode 100644
index 706ad559db9..00000000000
--- a/libcxx/utils/google-benchmark/mingw.py
+++ /dev/null
@@ -1,320 +0,0 @@
-#! /usr/bin/env python
-# encoding: utf-8
-
-import argparse
-import errno
-import logging
-import os
-import platform
-import re
-import sys
-import subprocess
-import tempfile
-
-try:
-    import winreg
-except ImportError:
-    import _winreg as winreg
-try:
-    import urllib.request as request
-except ImportError:
-    import urllib as request
-try:
-    import urllib.parse as parse
-except ImportError:
-    import urlparse as parse
-
-class EmptyLogger(object):
-    '''
-    Provides an implementation that performs no logging
-    '''
-    def debug(self, *k, **kw):
-        pass
-    def info(self, *k, **kw):
-        pass
-    def warn(self, *k, **kw):
-        pass
-    def error(self, *k, **kw):
-        pass
-    def critical(self, *k, **kw):
-        pass
-    def setLevel(self, *k, **kw):
-        pass
-
-urls = (
-    'http://downloads.sourceforge.net/project/mingw-w64/Toolchains%20'
-        'targetting%20Win32/Personal%20Builds/mingw-builds/installer/'
-        'repository.txt',
-    'http://downloads.sourceforge.net/project/mingwbuilds/host-windows/'
-        'repository.txt'
-)
-'''
-A list of mingw-build repositories
-'''
-
-def repository(urls = urls, log = EmptyLogger()):
-    '''
-    Downloads and parse mingw-build repository files and parses them
-    '''
-    log.info('getting mingw-builds repository')
-    versions = {}
-    re_sourceforge = re.compile(r'http://sourceforge.net/projects/([^/]+)/files')
-    re_sub = r'http://downloads.sourceforge.net/project/\1'
-    for url in urls:
-        log.debug(' - requesting: %s', url)
-        socket = request.urlopen(url)
-        repo = socket.read()
-        if not isinstance(repo, str):
-            repo = repo.decode();
-        socket.close()
-        for entry in repo.split('\n')[:-1]:
-            value = entry.split('|')
-            version = tuple([int(n) for n in value[0].strip().split('.')])
-            version = versions.setdefault(version, {})
-            arch = value[1].strip()
-            if arch == 'x32':
-                arch = 'i686'
-            elif arch == 'x64':
-                arch = 'x86_64'
-            arch = version.setdefault(arch, {})
-            threading = arch.setdefault(value[2].strip(), {})
-            exceptions = threading.setdefault(value[3].strip(), {})
-            revision = exceptions.setdefault(int(value[4].strip()[3:]),
-                re_sourceforge.sub(re_sub, value[5].strip()))
-    return versions
-
-def find_in_path(file, path=None):
-    '''
-    Attempts to find an executable in the path
-    '''
-    if platform.system() == 'Windows':
-        file += '.exe'
-    if path is None:
-        path = os.environ.get('PATH', '')
-    if type(path) is type(''):
-        path = path.split(os.pathsep)
-    return list(filter(os.path.exists,
-        map(lambda dir, file=file: os.path.join(dir, file), path)))
-
-def find_7zip(log = EmptyLogger()):
-    '''
-    Attempts to find 7zip for unpacking the mingw-build archives
-    '''
-    log.info('finding 7zip')
-    path = find_in_path('7z')
-    if not path:
-        key = winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, r'SOFTWARE\7-Zip')
-        path, _ = winreg.QueryValueEx(key, 'Path')
-        path = [os.path.join(path, '7z.exe')]
-    log.debug('found \'%s\'', path[0])
-    return path[0]
-
-find_7zip()
-
-def unpack(archive, location, log = EmptyLogger()):
-    '''
-    Unpacks a mingw-builds archive
-    '''
-    sevenzip = find_7zip(log)
-    log.info('unpacking %s', os.path.basename(archive))
-    cmd = [sevenzip, 'x', archive, '-o' + location, '-y']
-    log.debug(' - %r', cmd)
-    with open(os.devnull, 'w') as devnull:
-        subprocess.check_call(cmd, stdout = devnull)
-
-def download(url, location, log = EmptyLogger()):
-    '''
-    Downloads and unpacks a mingw-builds archive
-    '''
-    log.info('downloading MinGW')
-    log.debug(' - url: %s', url)
-    log.debug(' - location: %s', location)
-
-    re_content = re.compile(r'attachment;[ \t]*filename=(")?([^"]*)(")?[\r\n]*')
-
-    stream = request.urlopen(url)
-    try:
-        content = stream.getheader('Content-Disposition') or ''
-    except AttributeError:
-        content = stream.headers.getheader('Content-Disposition') or ''
-    matches = re_content.match(content)
-    if matches:
-        filename = matches.group(2)
-    else:
-        parsed = parse.urlparse(stream.geturl())
-        filename = os.path.basename(parsed.path)
-
-    try:
-        os.makedirs(location)
-    except OSError as e:
-        if e.errno == errno.EEXIST and os.path.isdir(location):
-            pass
-        else:
-            raise
-
-    archive = os.path.join(location, filename)
-    with open(archive, 'wb') as out:
-        while True:
-            buf = stream.read(1024)
-            if not buf:
-                break
-            out.write(buf)
-    unpack(archive, location, log = log)
-    os.remove(archive)
-
-    possible = os.path.join(location, 'mingw64')
-    if not os.path.exists(possible):
-        possible = os.path.join(location, 'mingw32')
-        if not os.path.exists(possible):
-            raise ValueError('Failed to find unpacked MinGW: ' + possible)
-    return possible
-
-def root(location = None, arch = None, version = None, threading = None,
-        exceptions = None, revision = None, log = EmptyLogger()):
-    '''
-    Returns the root folder of a specific version of the mingw-builds variant
-    of gcc. Will download the compiler if needed
-    '''
-
-    # Get the repository if we don't have all the information
-    if not (arch and version and threading and exceptions and revision):
-        versions = repository(log = log)
-
-    # Determine some defaults
-    version = version or max(versions.keys())
-    if not arch:
-        arch = platform.machine().lower()
-        if arch == 'x86':
-            arch = 'i686'
-        elif arch == 'amd64':
-            arch = 'x86_64'
-    if not threading:
-        keys = versions[version][arch].keys()
-        if 'posix' in keys:
-            threading = 'posix'
-        elif 'win32' in keys:
-            threading = 'win32'
-        else:
-            threading = keys[0]
-    if not exceptions:
-        keys = versions[version][arch][threading].keys()
-        if 'seh' in keys:
-            exceptions = 'seh'
-        elif 'sjlj' in keys:
-            exceptions = 'sjlj'
-        else:
-            exceptions = keys[0]
-    if revision == None:
-        revision = max(versions[version][arch][threading][exceptions].keys())
-    if not location:
-        location = os.path.join(tempfile.gettempdir(), 'mingw-builds')
-
-    # Get the download url
-    url = versions[version][arch][threading][exceptions][revision]
-
-    # Tell the user whatzzup
-    log.info('finding MinGW %s', '.'.join(str(v) for v in version))
-    log.debug(' - arch: %s', arch)
-    log.debug(' - threading: %s', threading)
-    log.debug(' - exceptions: %s', exceptions)
-    log.debug(' - revision: %s', revision)
-    log.debug(' - url: %s', url)
-
-    # Store each specific revision differently
-    slug = '{version}-{arch}-{threading}-{exceptions}-rev{revision}'
-    slug = slug.format(
-        version = '.'.join(str(v) for v in version),
-        arch = arch,
-        threading = threading,
-        exceptions = exceptions,
-        revision = revision
-    )
-    if arch == 'x86_64':
-        root_dir = os.path.join(location, slug, 'mingw64')
-    elif arch == 'i686':
-        root_dir = os.path.join(location, slug, 'mingw32')
-    else:
-        raise ValueError('Unknown MinGW arch: ' + arch)
-
-    # Download if needed
-    if not os.path.exists(root_dir):
-        downloaded = download(url, os.path.join(location, slug), log = log)
-        if downloaded != root_dir:
-            raise ValueError('The location of mingw did not match\n%s\n%s'
-                % (downloaded, root_dir))
-
-    return root_dir
-
-def str2ver(string):
-    '''
-    Converts a version string into a tuple
-    '''
-    try:
-        version = tuple(int(v) for v in string.split('.'))
-        if len(version) is not 3:
-            raise ValueError()
-    except ValueError:
-        raise argparse.ArgumentTypeError(
-            'please provide a three digit version string')
-    return version
-
-def main():
-    '''
-    Invoked when the script is run directly by the python interpreter
-    '''
-    parser = argparse.ArgumentParser(
-        description = 'Downloads a specific version of MinGW',
-        formatter_class = argparse.ArgumentDefaultsHelpFormatter
-    )
-    parser.add_argument('--location',
-        help = 'the location to download the compiler to',
-        default = os.path.join(tempfile.gettempdir(), 'mingw-builds'))
-    parser.add_argument('--arch', required = True, choices = ['i686', 'x86_64'],
-        help = 'the target MinGW architecture string')
-    parser.add_argument('--version', type = str2ver,
-        help = 'the version of GCC to download')
-    parser.add_argument('--threading', choices = ['posix', 'win32'],
-        help = 'the threading type of the compiler')
-    parser.add_argument('--exceptions', choices = ['sjlj', 'seh', 'dwarf'],
-        help = 'the method to throw exceptions')
-    parser.add_argument('--revision', type=int,
-        help = 'the revision of the MinGW release')
-    group = parser.add_mutually_exclusive_group()
-    group.add_argument('-v', '--verbose', action='store_true',
-        help='increase the script output verbosity')
-    group.add_argument('-q', '--quiet', action='store_true',
-        help='only print errors and warning')
-    args = parser.parse_args()
-
-    # Create the logger
-    logger = logging.getLogger('mingw')
-    handler = logging.StreamHandler()
-    formatter = logging.Formatter('%(message)s')
-    handler.setFormatter(formatter)
-    logger.addHandler(handler)
-    logger.setLevel(logging.INFO)
-    if args.quiet:
-        logger.setLevel(logging.WARN)
-    if args.verbose:
-        logger.setLevel(logging.DEBUG)
-
-    # Get MinGW
-    root_dir = root(location = args.location, arch = args.arch,
-        version = args.version, threading = args.threading,
-        exceptions = args.exceptions, revision = args.revision,
-        log = logger)
-
-    sys.stdout.write('%s\n' % os.path.join(root_dir, 'bin'))
-
-if __name__ == '__main__':
-    try:
-        main()
-    except IOError as e:
-        sys.stderr.write('IO error: %s\n' % e)
-        sys.exit(1)
-    except OSError as e:
-        sys.stderr.write('OS error: %s\n' % e)
-        sys.exit(1)
-    except KeyboardInterrupt as e:
-        sys.stderr.write('Killed\n')
-        sys.exit(1)
diff --git a/libcxx/utils/google-benchmark/src/CMakeLists.txt b/libcxx/utils/google-benchmark/src/CMakeLists.txt
index 7707773930c..e22620a7291 100644
--- a/libcxx/utils/google-benchmark/src/CMakeLists.txt
+++ b/libcxx/utils/google-benchmark/src/CMakeLists.txt
@@ -18,6 +18,9 @@ set_target_properties(benchmark PROPERTIES
   VERSION ${GENERIC_LIB_VERSION}
   SOVERSION ${GENERIC_LIB_SOVERSION}
 )
+target_include_directories(benchmark PUBLIC
+    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include>
+    )
 
 # Link threads.
 target_link_libraries(benchmark  ${BENCHMARK_CXX_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
@@ -35,11 +38,13 @@ set(include_install_dir "include")
 set(lib_install_dir "lib/")
 set(bin_install_dir "bin/")
 set(config_install_dir "lib/cmake/${PROJECT_NAME}")
+set(pkgconfig_install_dir "lib/pkgconfig")
 
 set(generated_dir "${CMAKE_CURRENT_BINARY_DIR}/generated")
 
 set(version_config "${generated_dir}/${PROJECT_NAME}ConfigVersion.cmake")
 set(project_config "${generated_dir}/${PROJECT_NAME}Config.cmake")
+set(pkg_config "${generated_dir}/${PROJECT_NAME}.pc")
 set(targets_export_name "${PROJECT_NAME}Targets")
 
 set(namespace "${PROJECT_NAME}::")
@@ -50,26 +55,33 @@ write_basic_package_version_file(
 )
 
 configure_file("${PROJECT_SOURCE_DIR}/cmake/Config.cmake.in" "${project_config}" @ONLY)
+configure_file("${PROJECT_SOURCE_DIR}/cmake/benchmark.pc.in" "${pkg_config}" @ONLY)
 
-# Install target (will install the library to specified CMAKE_INSTALL_PREFIX variable)
-install(
-  TARGETS benchmark
-  EXPORT ${targets_export_name}
-  ARCHIVE DESTINATION ${lib_install_dir}
-  LIBRARY DESTINATION ${lib_install_dir}
-  RUNTIME DESTINATION ${bin_install_dir}
-  INCLUDES DESTINATION ${include_install_dir})
-
-install(
-  DIRECTORY "${PROJECT_SOURCE_DIR}/include/benchmark"
-  DESTINATION ${include_install_dir}
-  FILES_MATCHING PATTERN "*.*h")
-
-install(
-    FILES "${project_config}" "${version_config}"
-    DESTINATION "${config_install_dir}")
-
-install(
-    EXPORT "${targets_export_name}"
-    NAMESPACE "${namespace}"
-    DESTINATION "${config_install_dir}")
+if (BENCHMARK_ENABLE_INSTALL)
+  # Install target (will install the library to specified CMAKE_INSTALL_PREFIX variable)
+  install(
+    TARGETS benchmark
+    EXPORT ${targets_export_name}
+    ARCHIVE DESTINATION ${lib_install_dir}
+    LIBRARY DESTINATION ${lib_install_dir}
+    RUNTIME DESTINATION ${bin_install_dir}
+    INCLUDES DESTINATION ${include_install_dir})
+
+  install(
+    DIRECTORY "${PROJECT_SOURCE_DIR}/include/benchmark"
+    DESTINATION ${include_install_dir}
+    FILES_MATCHING PATTERN "*.*h")
+
+  install(
+      FILES "${project_config}" "${version_config}"
+      DESTINATION "${config_install_dir}")
+
+  install(
+      FILES "${pkg_config}"
+      DESTINATION "${pkgconfig_install_dir}")
+
+  install(
+      EXPORT "${targets_export_name}"
+      NAMESPACE "${namespace}"
+      DESTINATION "${config_install_dir}")
+endif()
diff --git a/libcxx/utils/google-benchmark/src/benchmark.cc b/libcxx/utils/google-benchmark/src/benchmark.cc
index 00ffa07ff56..1a7d218283c 100644
--- a/libcxx/utils/google-benchmark/src/benchmark.cc
+++ b/libcxx/utils/google-benchmark/src/benchmark.cc
@@ -38,12 +38,12 @@
 #include "commandlineflags.h"
 #include "complexity.h"
 #include "counter.h"
+#include "internal_macros.h"
 #include "log.h"
 #include "mutex.h"
 #include "re.h"
-#include "stat.h"
+#include "statistics.h"
 #include "string_util.h"
-#include "sysinfo.h"
 #include "timers.h"
 
 DEFINE_bool(benchmark_list_tests, false,
@@ -91,23 +91,23 @@ DEFINE_string(benchmark_color, "auto",
               "environment variable is set to a terminal type that supports "
               "colors.");
 
+DEFINE_bool(benchmark_counters_tabular, false,
+            "Whether to use tabular format when printing user counters to "
+            "the console.  Valid values: 'true'/'yes'/1, 'false'/'no'/0."
+            "Defaults to false.");
+
 DEFINE_int32(v, 0, "The level of verbose logging to output");
 
 namespace benchmark {
-namespace internal {
-
-void UseCharPointer(char const volatile*) {}
-
-}  // end namespace internal
 
 namespace {
-
 static const size_t kMaxIterations = 1000000000;
-
 }  // end namespace
 
 namespace internal {
 
+void UseCharPointer(char const volatile*) {}
+
 class ThreadManager {
  public:
   ThreadManager(int num_threads)
@@ -175,7 +175,9 @@ class ThreadTimer {
     CHECK(running_);
     running_ = false;
     real_time_used_ += ChronoClockNow() - start_real_time_;
-    cpu_time_used_ += ThreadCPUUsage() - start_cpu_time_;
+    // Floating point error can result in the subtraction producing a negative
+    // time. Guard against that.
+    cpu_time_used_ += std::max<double>(ThreadCPUUsage() - start_cpu_time_, 0);
   }
 
   // Called by each thread
@@ -251,7 +253,9 @@ BenchmarkReporter::Run CreateRunReport(
     report.complexity_n = results.complexity_n;
     report.complexity = b.complexity;
     report.complexity_lambda = b.complexity_lambda;
+    report.statistics = b.statistics;
     report.counters = results.counters;
+    internal::Finish(&report.counters, seconds, b.threads);
   }
   return report;
 }
@@ -395,7 +399,7 @@ State::State(size_t max_iters, const std::vector<int>& ranges, int thread_i,
              internal::ThreadManager* manager)
     : started_(false),
       finished_(false),
-      total_iterations_(0),
+      total_iterations_(max_iters + 1),
       range_(ranges),
       bytes_processed_(0),
       items_processed_(0),
@@ -408,6 +412,7 @@ State::State(size_t max_iters, const std::vector<int>& ranges, int thread_i,
       timer_(timer),
       manager_(manager) {
   CHECK(max_iterations != 0) << "At least one iteration must be run";
+  CHECK(total_iterations_ != 0) << "max iterations wrapped around";
   CHECK_LT(thread_index, threads) << "thread_index must be less than threads";
 }
 
@@ -432,7 +437,7 @@ void State::SkipWithError(const char* msg) {
       manager_->results.has_error_ = true;
     }
   }
-  total_iterations_ = max_iterations;
+  total_iterations_ = 1;
   if (timer_->running()) timer_->StopTimer();
 }
 
@@ -457,8 +462,8 @@ void State::FinishKeepRunning() {
   if (!error_occurred_) {
     PauseTiming();
   }
-  // Total iterations now is one greater than max iterations. Fix this.
-  total_iterations_ = max_iterations;
+  // Total iterations has now wrapped around zero. Fix this.
+  total_iterations_ = 1;
   finished_ = true;
   manager_->StartStopBarrier();
 }
@@ -475,19 +480,19 @@ void RunBenchmarks(const std::vector<Benchmark::Instance>& benchmarks,
   // Determine the width of the name field using a minimum width of 10.
   bool has_repetitions = FLAGS_benchmark_repetitions > 1;
   size_t name_field_width = 10;
+  size_t stat_field_width = 0;
   for (const Benchmark::Instance& benchmark : benchmarks) {
     name_field_width =
         std::max<size_t>(name_field_width, benchmark.name.size());
     has_repetitions |= benchmark.repetitions > 1;
+
+    for(const auto& Stat : *benchmark.statistics)
+      stat_field_width = std::max<size_t>(stat_field_width, Stat.name_.size());
   }
-  if (has_repetitions) name_field_width += std::strlen("_stddev");
+  if (has_repetitions) name_field_width += 1 + stat_field_width;
 
   // Print header here
   BenchmarkReporter::Context context;
-  context.num_cpus = NumCPUs();
-  context.mhz_per_cpu = CyclesPerSecond() / 1000000.0f;
-
-  context.cpu_scaling_enabled = CpuScalingEnabled();
   context.name_field_width = name_field_width;
 
   // Keep track of runing times of all instances of current benchmark
@@ -521,10 +526,10 @@ void RunBenchmarks(const std::vector<Benchmark::Instance>& benchmarks,
 }
 
 std::unique_ptr<BenchmarkReporter> CreateReporter(
-    std::string const& name, ConsoleReporter::OutputOptions allow_color) {
+    std::string const& name, ConsoleReporter::OutputOptions output_opts) {
   typedef std::unique_ptr<BenchmarkReporter> PtrType;
   if (name == "console") {
-    return PtrType(new ConsoleReporter(allow_color));
+    return PtrType(new ConsoleReporter(output_opts));
   } else if (name == "json") {
     return PtrType(new JSONReporter);
   } else if (name == "csv") {
@@ -536,6 +541,30 @@ std::unique_ptr<BenchmarkReporter> CreateReporter(
 }
 
 }  // end namespace
+
+bool IsZero(double n) {
+  return std::abs(n) < std::numeric_limits<double>::epsilon();
+}
+
+ConsoleReporter::OutputOptions GetOutputOptions(bool force_no_color) {
+  int output_opts = ConsoleReporter::OO_Defaults;
+  if ((FLAGS_benchmark_color == "auto" && IsColorTerminal()) ||
+      IsTruthyFlagValue(FLAGS_benchmark_color)) {
+    output_opts |= ConsoleReporter::OO_Color;
+  } else {
+    output_opts &= ~ConsoleReporter::OO_Color;
+  }
+  if(force_no_color) {
+    output_opts &= ~ConsoleReporter::OO_Color;
+  }
+  if(FLAGS_benchmark_counters_tabular) {
+    output_opts |= ConsoleReporter::OO_Tabular;
+  } else {
+    output_opts &= ~ConsoleReporter::OO_Tabular;
+  }
+  return static_cast< ConsoleReporter::OutputOptions >(output_opts);
+}
+
 }  // end namespace internal
 
 size_t RunSpecifiedBenchmarks() {
@@ -557,29 +586,21 @@ size_t RunSpecifiedBenchmarks(BenchmarkReporter* console_reporter,
   std::unique_ptr<BenchmarkReporter> default_console_reporter;
   std::unique_ptr<BenchmarkReporter> default_file_reporter;
   if (!console_reporter) {
-    auto output_opts = ConsoleReporter::OO_None;
-    if (FLAGS_benchmark_color == "auto")
-      output_opts = IsColorTerminal() ? ConsoleReporter::OO_Color
-                                      : ConsoleReporter::OO_None;
-    else
-      output_opts = IsTruthyFlagValue(FLAGS_benchmark_color)
-                        ? ConsoleReporter::OO_Color
-                        : ConsoleReporter::OO_None;
-    default_console_reporter =
-        internal::CreateReporter(FLAGS_benchmark_format, output_opts);
+    default_console_reporter = internal::CreateReporter(
+          FLAGS_benchmark_format, internal::GetOutputOptions());
     console_reporter = default_console_reporter.get();
   }
   auto& Out = console_reporter->GetOutputStream();
   auto& Err = console_reporter->GetErrorStream();
 
   std::string const& fname = FLAGS_benchmark_out;
-  if (fname == "" && file_reporter) {
+  if (fname.empty() && file_reporter) {
     Err << "A custom file reporter was provided but "
            "--benchmark_out=<file> was not specified."
         << std::endl;
     std::exit(1);
   }
-  if (fname != "") {
+  if (!fname.empty()) {
     output_file.open(fname);
     if (!output_file.is_open()) {
       Err << "invalid file name: '" << fname << std::endl;
@@ -625,6 +646,7 @@ void PrintUsageAndExit() {
           "          [--benchmark_out=<filename>]\n"
           "          [--benchmark_out_format=<json|console|csv>]\n"
           "          [--benchmark_color={auto|true|false}]\n"
+          "          [--benchmark_counters_tabular={true|false}]\n"
           "          [--v=<verbosity>]\n");
   exit(0);
 }
@@ -649,6 +671,8 @@ void ParseCommandLineFlags(int* argc, char** argv) {
         // "color_print" is the deprecated name for "benchmark_color".
         // TODO: Remove this.
         ParseStringFlag(argv[i], "color_print", &FLAGS_benchmark_color) ||
+        ParseBoolFlag(argv[i], "benchmark_counters_tabular",
+                        &FLAGS_benchmark_counters_tabular) ||
         ParseInt32Flag(argv[i], "v", &FLAGS_v)) {
       for (int j = i; j != *argc - 1; ++j) argv[j] = argv[j + 1];
 
diff --git a/libcxx/utils/google-benchmark/src/benchmark_api_internal.h b/libcxx/utils/google-benchmark/src/benchmark_api_internal.h
index 828ed12165f..d481dc52864 100644
--- a/libcxx/utils/google-benchmark/src/benchmark_api_internal.h
+++ b/libcxx/utils/google-benchmark/src/benchmark_api_internal.h
@@ -1,7 +1,7 @@
 #ifndef BENCHMARK_API_INTERNAL_H
 #define BENCHMARK_API_INTERNAL_H
 
-#include "benchmark/benchmark_api.h"
+#include "benchmark/benchmark.h"
 
 #include <cmath>
 #include <iosfwd>
@@ -25,6 +25,7 @@ struct Benchmark::Instance {
   BigO complexity;
   BigOFunc* complexity_lambda;
   UserCounters counters;
+  const std::vector<Statistics>* statistics;
   bool last_benchmark_instance;
   int repetitions;
   double min_time;
@@ -36,13 +37,10 @@ bool FindBenchmarksInternal(const std::string& re,
                             std::vector<Benchmark::Instance>* benchmarks,
                             std::ostream* Err);
 
-namespace {
+bool IsZero(double n);
 
-bool IsZero(double n) {
-  return std::abs(n) < std::numeric_limits<double>::epsilon();
-}
+ConsoleReporter::OutputOptions GetOutputOptions(bool force_no_color = false);
 
-}  // end namespace
 }  // end namespace internal
 }  // end namespace benchmark
 
diff --git a/libcxx/utils/google-benchmark/src/benchmark_register.cc b/libcxx/utils/google-benchmark/src/benchmark_register.cc
index fe373204189..d5746a3632a 100644
--- a/libcxx/utils/google-benchmark/src/benchmark_register.cc
+++ b/libcxx/utils/google-benchmark/src/benchmark_register.cc
@@ -31,17 +31,17 @@
 #include <fstream>
 #include <iostream>
 #include <memory>
+#include <sstream>
 #include <thread>
 
 #include "check.h"
 #include "commandlineflags.h"
 #include "complexity.h"
+#include "statistics.h"
 #include "log.h"
 #include "mutex.h"
 #include "re.h"
-#include "stat.h"
 #include "string_util.h"
-#include "sysinfo.h"
 #include "timers.h"
 
 namespace benchmark {
@@ -69,6 +69,9 @@ class BenchmarkFamilies {
   // Registers a benchmark family and returns the index assigned to it.
   size_t AddBenchmark(std::unique_ptr<Benchmark> family);
 
+  // Clear all registered benchmark families.
+  void ClearBenchmarks();
+
   // Extract the list of benchmark instances that match the specified
   // regular expression.
   bool FindBenchmarks(const std::string& re,
@@ -94,6 +97,12 @@ size_t BenchmarkFamilies::AddBenchmark(std::unique_ptr<Benchmark> family) {
   return index;
 }
 
+void BenchmarkFamilies::ClearBenchmarks() {
+  MutexLock l(mutex_);
+  families_.clear();
+  families_.shrink_to_fit();
+}
+
 bool BenchmarkFamilies::FindBenchmarks(
     const std::string& spec, std::vector<Benchmark::Instance>* benchmarks,
     std::ostream* ErrStream) {
@@ -149,6 +158,7 @@ bool BenchmarkFamilies::FindBenchmarks(
         instance.use_manual_time = family->use_manual_time_;
         instance.complexity = family->complexity_;
         instance.complexity_lambda = family->complexity_lambda_;
+        instance.statistics = &family->statistics_;
         instance.threads = num_threads;
 
         // Add arguments to instance name
@@ -163,8 +173,8 @@ bool BenchmarkFamilies::FindBenchmarks(
                   StringPrintF("%s:", family->arg_names_[arg_i].c_str());
             }
           }
-
-          instance.name += std::to_string(arg);
+          
+          instance.name += StringPrintF("%d", arg);
           ++arg_i;
         }
 
@@ -226,7 +236,11 @@ Benchmark::Benchmark(const char* name)
       use_real_time_(false),
       use_manual_time_(false),
       complexity_(oNone),
-      complexity_lambda_(nullptr) {}
+      complexity_lambda_(nullptr) {
+  ComputeStatistics("mean", StatisticsMean);
+  ComputeStatistics("median", StatisticsMedian);
+  ComputeStatistics("stddev", StatisticsStdDev);
+}
 
 Benchmark::~Benchmark() {}
 
@@ -399,6 +413,12 @@ Benchmark* Benchmark::Complexity(BigOFunc* complexity) {
   return this;
 }
 
+Benchmark* Benchmark::ComputeStatistics(std::string name,
+                                        StatisticsFunc* statistics) {
+  statistics_.emplace_back(name, statistics);
+  return this;
+}
+
 Benchmark* Benchmark::Threads(int t) {
   CHECK_GT(t, 0);
   thread_counts_.push_back(t);
@@ -427,8 +447,7 @@ Benchmark* Benchmark::DenseThreadRange(int min_threads, int max_threads,
 }
 
 Benchmark* Benchmark::ThreadPerCpu() {
-  static int num_cpus = NumCPUs();
-  thread_counts_.push_back(num_cpus);
+  thread_counts_.push_back(CPUInfo::Get().num_cpus);
   return this;
 }
 
@@ -449,4 +468,9 @@ int Benchmark::ArgsCnt() const {
 void FunctionBenchmark::Run(State& st) { func_(st); }
 
 }  // end namespace internal
+
+void ClearRegisteredBenchmarks() {
+  internal::BenchmarkFamilies::GetInstance()->ClearBenchmarks();
+}
+
 }  // end namespace benchmark
diff --git a/libcxx/utils/google-benchmark/src/check.h b/libcxx/utils/google-benchmark/src/check.h
index 6f1fe0cf860..73bead2fb55 100644
--- a/libcxx/utils/google-benchmark/src/check.h
+++ b/libcxx/utils/google-benchmark/src/check.h
@@ -3,6 +3,7 @@
 
 #include <cstdlib>
 #include <ostream>
+#include <cmath>
 
 #include "internal_macros.h"
 #include "log.h"
@@ -68,4 +69,11 @@ class CheckHandler {
 #define CHECK_GT(a, b) CHECK((a) > (b))
 #define CHECK_LT(a, b) CHECK((a) < (b))
 
+#define CHECK_FLOAT_EQ(a, b, eps) CHECK(std::fabs((a) - (b)) <  (eps))
+#define CHECK_FLOAT_NE(a, b, eps) CHECK(std::fabs((a) - (b)) >= (eps))
+#define CHECK_FLOAT_GE(a, b, eps) CHECK((a) - (b) > -(eps))
+#define CHECK_FLOAT_LE(a, b, eps) CHECK((b) - (a) > -(eps))
+#define CHECK_FLOAT_GT(a, b, eps) CHECK((a) - (b) >  (eps))
+#define CHECK_FLOAT_LT(a, b, eps) CHECK((b) - (a) >  (eps))
+
 #endif  // CHECK_H_
diff --git a/libcxx/utils/google-benchmark/src/colorprint.cc b/libcxx/utils/google-benchmark/src/colorprint.cc
index 513376b14b1..2dec4a8b28b 100644
--- a/libcxx/utils/google-benchmark/src/colorprint.cc
+++ b/libcxx/utils/google-benchmark/src/colorprint.cc
@@ -89,7 +89,7 @@ std::string FormatString(const char* msg, va_list args) {
 
   std::size_t size = 256;
   char local_buff[256];
-  auto ret = std::vsnprintf(local_buff, size, msg, args_cp);
+  auto ret = vsnprintf(local_buff, size, msg, args_cp);
 
   va_end(args_cp);
 
@@ -104,7 +104,7 @@ std::string FormatString(const char* msg, va_list args) {
     // we did not provide a long enough buffer on our first attempt.
     size = (size_t)ret + 1;  // + 1 for the null byte
     std::unique_ptr<char[]> buff(new char[size]);
-    ret = std::vsnprintf(buff.get(), size, msg, args);
+    ret = vsnprintf(buff.get(), size, msg, args);
     CHECK(ret > 0 && ((size_t)ret) < size);
     return buff.get();
   }
diff --git a/libcxx/utils/google-benchmark/src/commandlineflags.cc b/libcxx/utils/google-benchmark/src/commandlineflags.cc
index 72534e022a8..2fc92517a32 100644
--- a/libcxx/utils/google-benchmark/src/commandlineflags.cc
+++ b/libcxx/utils/google-benchmark/src/commandlineflags.cc
@@ -209,9 +209,9 @@ bool IsFlag(const char* str, const char* flag) {
   return (ParseFlagValue(str, flag, true) != nullptr);
 }
 
-bool IsTruthyFlagValue(const std::string& str) {
-  if (str.empty()) return true;
-  char ch = str[0];
+bool IsTruthyFlagValue(const std::string& value) {
+  if (value.empty()) return true;
+  char ch = value[0];
   return isalnum(ch) &&
          !(ch == '0' || ch == 'f' || ch == 'F' || ch == 'n' || ch == 'N');
 }
diff --git a/libcxx/utils/google-benchmark/src/complexity.cc b/libcxx/utils/google-benchmark/src/complexity.cc
index 02adbef6292..88832698ef6 100644
--- a/libcxx/utils/google-benchmark/src/complexity.cc
+++ b/libcxx/utils/google-benchmark/src/complexity.cc
@@ -15,13 +15,12 @@
 // Source project : https://github.com/ismaelJimenez/cpp.leastsq
 // Adapted to be used with google benchmark
 
-#include "benchmark/benchmark_api.h"
+#include "benchmark/benchmark.h"
 
 #include <algorithm>
 #include <cmath>
 #include "check.h"
 #include "complexity.h"
-#include "stat.h"
 
 namespace benchmark {
 
@@ -35,9 +34,9 @@ BigOFunc* FittingCurve(BigO complexity) {
     case oNCubed:
       return [](int n) -> double { return std::pow(n, 3); };
     case oLogN:
-      return [](int n) { return std::log2(n); };
+      return [](int n) { return log2(n); };
     case oNLogN:
-      return [](int n) { return n * std::log2(n); };
+      return [](int n) { return n * log2(n); };
     case o1:
     default:
       return [](int) { return 1.0; };
@@ -150,109 +149,6 @@ LeastSq MinimalLeastSq(const std::vector<int>& n,
   return best_fit;
 }
 
-std::vector<BenchmarkReporter::Run> ComputeStats(
-    const std::vector<BenchmarkReporter::Run>& reports) {
-  typedef BenchmarkReporter::Run Run;
-  std::vector<Run> results;
-
-  auto error_count =
-      std::count_if(reports.begin(), reports.end(),
-                    [](Run const& run) { return run.error_occurred; });
-
-  if (reports.size() - error_count < 2) {
-    // We don't report aggregated data if there was a single run.
-    return results;
-  }
-  // Accumulators.
-  Stat1_d real_accumulated_time_stat;
-  Stat1_d cpu_accumulated_time_stat;
-  Stat1_d bytes_per_second_stat;
-  Stat1_d items_per_second_stat;
-  // All repetitions should be run with the same number of iterations so we
-  // can take this information from the first benchmark.
-  int64_t const run_iterations = reports.front().iterations;
-  // create stats for user counters
-  struct CounterStat {
-    Counter c;
-    Stat1_d s;
-  };
-  std::map< std::string, CounterStat > counter_stats;
-  for(Run const& r : reports) {
-    for(auto const& cnt : r.counters) {
-      auto it = counter_stats.find(cnt.first);
-      if(it == counter_stats.end()) {
-        counter_stats.insert({cnt.first, {cnt.second, Stat1_d{}}});
-      } else {
-        CHECK_EQ(counter_stats[cnt.first].c.flags, cnt.second.flags);
-      }
-    }
-  }
-
-  // Populate the accumulators.
-  for (Run const& run : reports) {
-    CHECK_EQ(reports[0].benchmark_name, run.benchmark_name);
-    CHECK_EQ(run_iterations, run.iterations);
-    if (run.error_occurred) continue;
-    real_accumulated_time_stat +=
-        Stat1_d(run.real_accumulated_time / run.iterations, run.iterations);
-    cpu_accumulated_time_stat +=
-        Stat1_d(run.cpu_accumulated_time / run.iterations, run.iterations);
-    items_per_second_stat += Stat1_d(run.items_per_second, run.iterations);
-    bytes_per_second_stat += Stat1_d(run.bytes_per_second, run.iterations);
-    // user counters
-    for(auto const& cnt : run.counters) {
-      auto it = counter_stats.find(cnt.first);
-      CHECK_NE(it, counter_stats.end());
-      it->second.s += Stat1_d(cnt.second, run.iterations);
-    }
-  }
-
-  // Get the data from the accumulator to BenchmarkReporter::Run's.
-  Run mean_data;
-  mean_data.benchmark_name = reports[0].benchmark_name + "_mean";
-  mean_data.iterations = run_iterations;
-  mean_data.real_accumulated_time =
-      real_accumulated_time_stat.Mean() * run_iterations;
-  mean_data.cpu_accumulated_time =
-      cpu_accumulated_time_stat.Mean() * run_iterations;
-  mean_data.bytes_per_second = bytes_per_second_stat.Mean();
-  mean_data.items_per_second = items_per_second_stat.Mean();
-  mean_data.time_unit = reports[0].time_unit;
-  // user counters
-  for(auto const& kv : counter_stats) {
-    auto c = Counter(kv.second.s.Mean(), counter_stats[kv.first].c.flags);
-    mean_data.counters[kv.first] = c;
-  }
-
-  // Only add label to mean/stddev if it is same for all runs
-  mean_data.report_label = reports[0].report_label;
-  for (std::size_t i = 1; i < reports.size(); i++) {
-    if (reports[i].report_label != reports[0].report_label) {
-      mean_data.report_label = "";
-      break;
-    }
-  }
-
-  Run stddev_data;
-  stddev_data.benchmark_name = reports[0].benchmark_name + "_stddev";
-  stddev_data.report_label = mean_data.report_label;
-  stddev_data.iterations = 0;
-  stddev_data.real_accumulated_time = real_accumulated_time_stat.StdDev();
-  stddev_data.cpu_accumulated_time = cpu_accumulated_time_stat.StdDev();
-  stddev_data.bytes_per_second = bytes_per_second_stat.StdDev();
-  stddev_data.items_per_second = items_per_second_stat.StdDev();
-  stddev_data.time_unit = reports[0].time_unit;
-  // user counters
-  for(auto const& kv : counter_stats) {
-    auto c = Counter(kv.second.s.StdDev(), counter_stats[kv.first].c.flags);
-    stddev_data.counters[kv.first] = c;
-  }
-
-  results.push_back(mean_data);
-  results.push_back(stddev_data);
-  return results;
-}
-
 std::vector<BenchmarkReporter::Run> ComputeBigO(
     const std::vector<BenchmarkReporter::Run>& reports) {
   typedef BenchmarkReporter::Run Run;
diff --git a/libcxx/utils/google-benchmark/src/complexity.h b/libcxx/utils/google-benchmark/src/complexity.h
index 23cd9bbc8c7..df29b48d29b 100644
--- a/libcxx/utils/google-benchmark/src/complexity.h
+++ b/libcxx/utils/google-benchmark/src/complexity.h
@@ -21,17 +21,10 @@
 #include <string>
 #include <vector>
 
-#include "benchmark/benchmark_api.h"
-#include "benchmark/reporter.h"
+#include "benchmark/benchmark.h"
 
 namespace benchmark {
 
-// Return a vector containing the mean and standard devation information for
-// the specified list of reports. If 'reports' contains less than two
-// non-errored runs an empty vector is returned
-std::vector<BenchmarkReporter::Run> ComputeStats(
-    const std::vector<BenchmarkReporter::Run>& reports);
-
 // Return a vector containing the bigO and RMS information for the specified
 // list of reports. If 'reports.size() < 2' an empty vector is returned.
 std::vector<BenchmarkReporter::Run> ComputeBigO(
@@ -58,4 +51,5 @@ struct LeastSq {
 std::string GetBigOString(BigO complexity);
 
 }  // end namespace benchmark
+
 #endif  // COMPLEXITY_H_
diff --git a/libcxx/utils/google-benchmark/src/console_reporter.cc b/libcxx/utils/google-benchmark/src/console_reporter.cc
index 3f3de02945a..48920ca7829 100644
--- a/libcxx/utils/google-benchmark/src/console_reporter.cc
+++ b/libcxx/utils/google-benchmark/src/console_reporter.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "benchmark/reporter.h"
+#include "benchmark/benchmark.h"
 #include "complexity.h"
 #include "counter.h"
 
@@ -36,15 +36,16 @@ namespace benchmark {
 bool ConsoleReporter::ReportContext(const Context& context) {
   name_field_width_ = context.name_field_width;
   printed_header_ = false;
+  prev_counters_.clear();
 
   PrintBasicContext(&GetErrorStream(), context);
 
 #ifdef BENCHMARK_OS_WINDOWS
-  if (color_output_ && &std::cout != &GetOutputStream()) {
+  if ((output_options_ & OO_Color) && &std::cout != &GetOutputStream()) {
     GetErrorStream()
         << "Color printing is only supported for stdout on windows."
            " Disabling color printing\n";
-    color_output_ = false;
+    output_options_ = static_cast< OutputOptions >(output_options_ & ~OO_Color);
   }
 #endif
 
@@ -52,25 +53,39 @@ bool ConsoleReporter::ReportContext(const Context& context) {
 }
 
 void ConsoleReporter::PrintHeader(const Run& run) {
-  std::string str =
-      FormatString("%-*s %13s %13s %10s\n", static_cast<int>(name_field_width_),
-                   "Benchmark", "Time", "CPU", "Iterations");
+  std::string str = FormatString("%-*s %13s %13s %10s", static_cast<int>(name_field_width_),
+                                 "Benchmark", "Time", "CPU", "Iterations");
   if(!run.counters.empty()) {
-    str += " UserCounters...";
+    if(output_options_ & OO_Tabular) {
+      for(auto const& c : run.counters) {
+        str += FormatString(" %10s", c.first.c_str());
+      }
+    } else {
+      str += " UserCounters...";
+    }
   }
+  str += "\n";
   std::string line = std::string(str.length(), '-');
   GetOutputStream() << line << "\n" << str << line << "\n";
 }
 
 void ConsoleReporter::ReportRuns(const std::vector<Run>& reports) {
   for (const auto& run : reports) {
-    // print the header if none was printed yet
-    if (!printed_header_) {
+    // print the header:
+    // --- if none was printed yet
+    bool print_header = !printed_header_;
+    // --- or if the format is tabular and this run
+    //     has different fields from the prev header
+    print_header |= (output_options_ & OO_Tabular) &&
+                    (!internal::SameNames(run.counters, prev_counters_));
+    if (print_header) {
       printed_header_ = true;
+      prev_counters_ = run.counters;
       PrintHeader(run);
     }
     // As an alternative to printing the headers like this, we could sort
-    // the benchmarks by header and then print like that.
+    // the benchmarks by header and then print. But this would require
+    // waiting for the full results before printing, or printing twice.
     PrintRunData(run);
   }
 }
@@ -86,8 +101,8 @@ static void IgnoreColorPrint(std::ostream& out, LogColor, const char* fmt,
 void ConsoleReporter::PrintRunData(const Run& result) {
   typedef void(PrinterFn)(std::ostream&, LogColor, const char*, ...);
   auto& Out = GetOutputStream();
-  PrinterFn* printer =
-      color_output_ ? (PrinterFn*)ColorPrintf : IgnoreColorPrint;
+  PrinterFn* printer = (output_options_ & OO_Color) ?
+                         (PrinterFn*)ColorPrintf : IgnoreColorPrint;
   auto name_color =
       (result.report_big_o || result.report_rms) ? COLOR_BLUE : COLOR_GREEN;
   printer(Out, name_color, "%-*s ", name_field_width_,
@@ -133,8 +148,20 @@ void ConsoleReporter::PrintRunData(const Run& result) {
   }
 
   for (auto& c : result.counters) {
-    auto const& s = HumanReadableNumber(c.second.value);
-    printer(Out, COLOR_DEFAULT, " %s=%s", c.first.c_str(), s.c_str());
+    const std::size_t cNameLen = std::max(std::string::size_type(10),
+                                          c.first.length());
+    auto const& s = HumanReadableNumber(c.second.value, 1000);
+    if (output_options_ & OO_Tabular) {
+      if (c.second.flags & Counter::kIsRate) {
+        printer(Out, COLOR_DEFAULT, " %*s/s", cNameLen - 2, s.c_str());
+      } else {
+        printer(Out, COLOR_DEFAULT, " %*s", cNameLen, s.c_str());
+      }
+    } else {
+      const char* unit = (c.second.flags & Counter::kIsRate) ? "/s" : "";
+      printer(Out, COLOR_DEFAULT, " %s=%s%s", c.first.c_str(), s.c_str(),
+              unit);
+    }
   }
 
   if (!rate.empty()) {
diff --git a/libcxx/utils/google-benchmark/src/counter.cc b/libcxx/utils/google-benchmark/src/counter.cc
index 307863d3c4d..ed1aa044ee7 100644
--- a/libcxx/utils/google-benchmark/src/counter.cc
+++ b/libcxx/utils/google-benchmark/src/counter.cc
@@ -30,7 +30,7 @@ double Finish(Counter const& c, double cpu_time, double num_threads) {
 
 void Finish(UserCounters *l, double cpu_time, double num_threads) {
   for (auto &c : *l) {
-    c.second = Finish(c.second, cpu_time, num_threads);
+    c.second.value = Finish(c.second, cpu_time, num_threads);
   }
 }
 
@@ -39,7 +39,7 @@ void Increment(UserCounters *l, UserCounters const& r) {
   for (auto &c : *l) {
     auto it = r.find(c.first);
     if (it != r.end()) {
-      c.second = c.second + it->second;
+      c.second.value = c.second + it->second;
     }
   }
   // add counters present in r, but not in *l
@@ -57,7 +57,7 @@ bool SameNames(UserCounters const& l, UserCounters const& r) {
     return false;
   }
   for (auto const& c : l) {
-    if ( r.find(c.first) == r.end()) {
+    if (r.find(c.first) == r.end()) {
       return false;
     }
   }
diff --git a/libcxx/utils/google-benchmark/src/counter.h b/libcxx/utils/google-benchmark/src/counter.h
index bbb92d9a298..dd6865a31d7 100644
--- a/libcxx/utils/google-benchmark/src/counter.h
+++ b/libcxx/utils/google-benchmark/src/counter.h
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "benchmark/benchmark_api.h"
+#include "benchmark/benchmark.h"
 
 namespace benchmark {
 
diff --git a/libcxx/utils/google-benchmark/src/csv_reporter.cc b/libcxx/utils/google-benchmark/src/csv_reporter.cc
index 6779815b3c3..35510645b08 100644
--- a/libcxx/utils/google-benchmark/src/csv_reporter.cc
+++ b/libcxx/utils/google-benchmark/src/csv_reporter.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "benchmark/reporter.h"
+#include "benchmark/benchmark.h"
 #include "complexity.h"
 
 #include <algorithm>
@@ -35,7 +35,7 @@ std::vector<std::string> elements = {
     "name",           "iterations",       "real_time",        "cpu_time",
     "time_unit",      "bytes_per_second", "items_per_second", "label",
     "error_occurred", "error_message"};
-}
+}  // namespace
 
 bool CSVReporter::ReportContext(const Context& context) {
   PrintBasicContext(&GetErrorStream(), context);
@@ -137,8 +137,11 @@ void CSVReporter::PrintRunData(const Run & run) {
   // Print user counters
   for (const auto &ucn : user_counter_names_) {
     auto it = run.counters.find(ucn);
-    CHECK(it != run.counters.end());
-    Out << "," << it->second;
+    if(it == run.counters.end()) {
+      Out << ",";
+    } else {
+      Out << "," << it->second;
+    }
   }
   Out << '\n';
 }
diff --git a/libcxx/utils/google-benchmark/src/cycleclock.h b/libcxx/utils/google-benchmark/src/cycleclock.h
index e0f9b01f9d3..4251fe4c32a 100644
--- a/libcxx/utils/google-benchmark/src/cycleclock.h
+++ b/libcxx/utils/google-benchmark/src/cycleclock.h
@@ -23,7 +23,7 @@
 
 #include <cstdint>
 
-#include "benchmark/macros.h"
+#include "benchmark/benchmark.h"
 #include "internal_macros.h"
 
 #if defined(BENCHMARK_OS_MACOSX)
diff --git a/libcxx/utils/google-benchmark/src/internal_macros.h b/libcxx/utils/google-benchmark/src/internal_macros.h
index ab9dd85c102..c34f5716e61 100644
--- a/libcxx/utils/google-benchmark/src/internal_macros.h
+++ b/libcxx/utils/google-benchmark/src/internal_macros.h
@@ -1,36 +1,45 @@
 #ifndef BENCHMARK_INTERNAL_MACROS_H_
 #define BENCHMARK_INTERNAL_MACROS_H_
 
-#include "benchmark/macros.h"
+#include "benchmark/benchmark.h"
 
 #ifndef __has_feature
 #define __has_feature(x) 0
 #endif
+#ifndef __has_builtin
+#define __has_builtin(x) 0
+#endif
 
 #if defined(__clang__)
-#define COMPILER_CLANG
+  #if !defined(COMPILER_CLANG)
+    #define COMPILER_CLANG
+  #endif
 #elif defined(_MSC_VER)
-#define COMPILER_MSVC
+  #if !defined(COMPILER_MSVC)
+    #define COMPILER_MSVC
+  #endif
 #elif defined(__GNUC__)
-#define COMPILER_GCC
+  #if !defined(COMPILER_GCC)
+    #define COMPILER_GCC
+  #endif
 #endif
 
 #if __has_feature(cxx_attributes)
-#define BENCHMARK_NORETURN [[noreturn]]
+  #define BENCHMARK_NORETURN [[noreturn]]
 #elif defined(__GNUC__)
-#define BENCHMARK_NORETURN __attribute__((noreturn))
+  #define BENCHMARK_NORETURN __attribute__((noreturn))
 #elif defined(COMPILER_MSVC)
-#define BENCHMARK_NORETURN __declspec(noreturn)
+  #define BENCHMARK_NORETURN __declspec(noreturn)
 #else
-#define BENCHMARK_NORETURN
+  #define BENCHMARK_NORETURN
 #endif
 
 #if defined(__CYGWIN__)
-#define BENCHMARK_OS_CYGWIN 1
+  #define BENCHMARK_OS_CYGWIN 1
 #elif defined(_WIN32)
-#define BENCHMARK_OS_WINDOWS 1
+  #define BENCHMARK_OS_WINDOWS 1
 #elif defined(__APPLE__)
-#include "TargetConditionals.h"
+  #include "TargetConditionals.h"
   #if defined(TARGET_OS_MAC)
     #define BENCHMARK_OS_MACOSX 1
     #if defined(TARGET_OS_IPHONE)
@@ -38,18 +47,36 @@
     #endif
   #endif
 #elif defined(__FreeBSD__)
-#define BENCHMARK_OS_FREEBSD 1
+  #define BENCHMARK_OS_FREEBSD 1
+#elif defined(__NetBSD__)
+  #define BENCHMARK_OS_NETBSD 1
 #elif defined(__linux__)
-#define BENCHMARK_OS_LINUX 1
+  #define BENCHMARK_OS_LINUX 1
 #elif defined(__native_client__)
-#define BENCHMARK_OS_NACL 1
+  #define BENCHMARK_OS_NACL 1
 #elif defined(EMSCRIPTEN)
-#define BENCHMARK_OS_EMSCRIPTEN 1
+  #define BENCHMARK_OS_EMSCRIPTEN 1
+#elif defined(__rtems__)
+  #define BENCHMARK_OS_RTEMS 1
 #endif
 
 #if !__has_feature(cxx_exceptions) && !defined(__cpp_exceptions) \
      && !defined(__EXCEPTIONS)
-#define BENCHMARK_HAS_NO_EXCEPTIONS
+  #define BENCHMARK_HAS_NO_EXCEPTIONS
+#endif
+
+#if defined(COMPILER_CLANG) || defined(COMPILER_GCC)
+  #define BENCHMARK_MAYBE_UNUSED __attribute__((unused))
+#else
+  #define BENCHMARK_MAYBE_UNUSED
+#endif
+
+#if defined(COMPILER_GCC) || __has_builtin(__builtin_unreachable)
+  #define BENCHMARK_UNREACHABLE() __builtin_unreachable()
+#elif defined(COMPILER_MSVC)
+  #define BENCHMARK_UNREACHABLE() __assume(false)
+#else
+  #define BENCHMARK_UNREACHABLE() ((void)0)
 #endif
 
 #endif  // BENCHMARK_INTERNAL_MACROS_H_
diff --git a/libcxx/utils/google-benchmark/src/json_reporter.cc b/libcxx/utils/google-benchmark/src/json_reporter.cc
index 5a653088e5b..b5ae302ad48 100644
--- a/libcxx/utils/google-benchmark/src/json_reporter.cc
+++ b/libcxx/utils/google-benchmark/src/json_reporter.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "benchmark/reporter.h"
+#include "benchmark/benchmark.h"
 #include "complexity.h"
 
 #include <algorithm>
@@ -21,6 +21,8 @@
 #include <string>
 #include <tuple>
 #include <vector>
+#include <iomanip> // for setprecision
+#include <limits>
 
 #include "string_util.h"
 #include "timers.h"
@@ -48,7 +50,14 @@ std::string FormatKV(std::string const& key, int64_t value) {
 }
 
 std::string FormatKV(std::string const& key, double value) {
-  return StringPrintF("\"%s\": %.2f", key.c_str(), value);
+  std::stringstream ss;
+  ss << '"' << key << "\": ";
+
+  const auto max_digits10 = std::numeric_limits<decltype (value)>::max_digits10;
+  const auto max_fractional_digits10 = max_digits10 - 1;
+
+  ss << std::scientific << std::setprecision(max_fractional_digits10) << value;
+  return ss.str();
 }
 
 int64_t RoundDouble(double v) { return static_cast<int64_t>(v + 0.5); }
@@ -68,13 +77,37 @@ bool JSONReporter::ReportContext(const Context& context) {
   std::string walltime_value = LocalDateTimeString();
   out << indent << FormatKV("date", walltime_value) << ",\n";
 
-  out << indent << FormatKV("num_cpus", static_cast<int64_t>(context.num_cpus))
+  CPUInfo const& info = context.cpu_info;
+  out << indent << FormatKV("num_cpus", static_cast<int64_t>(info.num_cpus))
       << ",\n";
-  out << indent << FormatKV("mhz_per_cpu", RoundDouble(context.mhz_per_cpu))
+  out << indent
+      << FormatKV("mhz_per_cpu",
+                  RoundDouble(info.cycles_per_second / 1000000.0))
       << ",\n";
-  out << indent << FormatKV("cpu_scaling_enabled", context.cpu_scaling_enabled)
+  out << indent << FormatKV("cpu_scaling_enabled", info.scaling_enabled)
       << ",\n";
 
+  out << indent << "\"caches\": [\n";
+  indent = std::string(6, ' ');
+  std::string cache_indent(8, ' ');
+  for (size_t i = 0; i < info.caches.size(); ++i) {
+    auto& CI = info.caches[i];
+    out << indent << "{\n";
+    out << cache_indent << FormatKV("type", CI.type) << ",\n";
+    out << cache_indent << FormatKV("level", static_cast<int64_t>(CI.level))
+        << ",\n";
+    out << cache_indent
+        << FormatKV("size", static_cast<int64_t>(CI.size) * 1000u) << ",\n";
+    out << cache_indent
+        << FormatKV("num_sharing", static_cast<int64_t>(CI.num_sharing))
+        << "\n";
+    out << indent << "}";
+    if (i != info.caches.size() - 1) out << ",";
+    out << "\n";
+  }
+  indent = std::string(4, ' ');
+  out << indent << "],\n";
+
 #if defined(NDEBUG)
   const char build_type[] = "release";
 #else
@@ -125,18 +158,18 @@ void JSONReporter::PrintRunData(Run const& run) {
   if (!run.report_big_o && !run.report_rms) {
     out << indent << FormatKV("iterations", run.iterations) << ",\n";
     out << indent
-        << FormatKV("real_time", RoundDouble(run.GetAdjustedRealTime()))
+        << FormatKV("real_time", run.GetAdjustedRealTime())
         << ",\n";
     out << indent
-        << FormatKV("cpu_time", RoundDouble(run.GetAdjustedCPUTime()));
+        << FormatKV("cpu_time", run.GetAdjustedCPUTime());
     out << ",\n"
         << indent << FormatKV("time_unit", GetTimeUnitString(run.time_unit));
   } else if (run.report_big_o) {
     out << indent
-        << FormatKV("cpu_coefficient", RoundDouble(run.GetAdjustedCPUTime()))
+        << FormatKV("cpu_coefficient", run.GetAdjustedCPUTime())
         << ",\n";
     out << indent
-        << FormatKV("real_coefficient", RoundDouble(run.GetAdjustedRealTime()))
+        << FormatKV("real_coefficient", run.GetAdjustedRealTime())
         << ",\n";
     out << indent << FormatKV("big_o", GetBigOString(run.complexity)) << ",\n";
     out << indent << FormatKV("time_unit", GetTimeUnitString(run.time_unit));
@@ -147,17 +180,17 @@ void JSONReporter::PrintRunData(Run const& run) {
   if (run.bytes_per_second > 0.0) {
     out << ",\n"
         << indent
-        << FormatKV("bytes_per_second", RoundDouble(run.bytes_per_second));
+        << FormatKV("bytes_per_second", run.bytes_per_second);
   }
   if (run.items_per_second > 0.0) {
     out << ",\n"
         << indent
-        << FormatKV("items_per_second", RoundDouble(run.items_per_second));
+        << FormatKV("items_per_second", run.items_per_second);
   }
   for(auto &c : run.counters) {
     out << ",\n"
         << indent
-        << FormatKV(c.first, RoundDouble(c.second));
+        << FormatKV(c.first, c.second);
   }
   if (!run.report_label.empty()) {
     out << ",\n" << indent << FormatKV("label", run.report_label);
diff --git a/libcxx/utils/google-benchmark/src/log.h b/libcxx/utils/google-benchmark/src/log.h
index 978cb0b4c8c..d06e1031db1 100644
--- a/libcxx/utils/google-benchmark/src/log.h
+++ b/libcxx/utils/google-benchmark/src/log.h
@@ -4,7 +4,7 @@
 #include <iostream>
 #include <ostream>
 
-#include "benchmark/macros.h"
+#include "benchmark/benchmark.h"
 
 namespace benchmark {
 namespace internal {
@@ -70,4 +70,4 @@ inline LogType& GetLogInstanceForLevel(int level) {
   (::benchmark::internal::GetLogInstanceForLevel(x) << "-- LOG(" << x << "):" \
                                                                          " ")
 
-#endif
-\ No newline at end of file
+#endif
diff --git a/libcxx/utils/google-benchmark/src/reporter.cc b/libcxx/utils/google-benchmark/src/reporter.cc
index 64742426cd3..5d2fa05a2b2 100644
--- a/libcxx/utils/google-benchmark/src/reporter.cc
+++ b/libcxx/utils/google-benchmark/src/reporter.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "benchmark/reporter.h"
+#include "benchmark/benchmark.h"
 #include "timers.h"
 
 #include <cstdlib>
@@ -22,7 +22,6 @@
 #include <vector>
 
 #include "check.h"
-#include "stat.h"
 
 namespace benchmark {
 
@@ -31,17 +30,29 @@ BenchmarkReporter::BenchmarkReporter()
 
 BenchmarkReporter::~BenchmarkReporter() {}
 
-void BenchmarkReporter::PrintBasicContext(std::ostream *out_ptr,
+void BenchmarkReporter::PrintBasicContext(std::ostream *out,
                                           Context const &context) {
-  CHECK(out_ptr) << "cannot be null";
-  auto &Out = *out_ptr;
-
-  Out << "Run on (" << context.num_cpus << " X " << context.mhz_per_cpu
-      << " MHz CPU " << ((context.num_cpus > 1) ? "s" : "") << ")\n";
+  CHECK(out) << "cannot be null";
+  auto &Out = *out;
 
   Out << LocalDateTimeString() << "\n";
 
-  if (context.cpu_scaling_enabled) {
+  const CPUInfo &info = context.cpu_info;
+  Out << "Run on (" << info.num_cpus << " X "
+      << (info.cycles_per_second / 1000000.0) << " MHz CPU "
+      << ((info.num_cpus > 1) ? "s" : "") << ")\n";
+  if (info.caches.size() != 0) {
+    Out << "CPU Caches:\n";
+    for (auto &CInfo : info.caches) {
+      Out << "  L" << CInfo.level << " " << CInfo.type << " "
+          << (CInfo.size / 1000) << "K";
+      if (CInfo.num_sharing != 0)
+        Out << " (x" << (info.num_cpus / CInfo.num_sharing) << ")";
+      Out << "\n";
+    }
+  }
+
+  if (info.scaling_enabled) {
     Out << "***WARNING*** CPU scaling is enabled, the benchmark "
            "real time measurements may be noisy and will incur extra "
            "overhead.\n";
@@ -53,6 +64,8 @@ void BenchmarkReporter::PrintBasicContext(std::ostream *out_ptr,
 #endif
 }
 
+BenchmarkReporter::Context::Context() : cpu_info(CPUInfo::Get()) {}
+
 double BenchmarkReporter::Run::GetAdjustedRealTime() const {
   double new_time = real_accumulated_time * GetTimeUnitMultiplier(time_unit);
   if (iterations != 0) new_time /= static_cast<double>(iterations);
diff --git a/libcxx/utils/google-benchmark/src/stat.h b/libcxx/utils/google-benchmark/src/stat.h
deleted file mode 100644
index 136c3aa8d61..00000000000
--- a/libcxx/utils/google-benchmark/src/stat.h
+++ /dev/null
@@ -1,306 +0,0 @@
-#ifndef BENCHMARK_STAT_H_
-#define BENCHMARK_STAT_H_
-
-#include <cmath>
-#include <limits>
-#include <ostream>
-#include <type_traits>
-
-namespace benchmark {
-
-template <typename VType, typename NumType>
-class Stat1;
-
-template <typename VType, typename NumType>
-class Stat1MinMax;
-
-typedef Stat1<float, int64_t> Stat1_f;
-typedef Stat1<double, int64_t> Stat1_d;
-typedef Stat1MinMax<float, int64_t> Stat1MinMax_f;
-typedef Stat1MinMax<double, int64_t> Stat1MinMax_d;
-
-template <typename VType>
-class Vector2;
-template <typename VType>
-class Vector3;
-template <typename VType>
-class Vector4;
-
-template <typename VType, typename NumType>
-class Stat1 {
- public:
-  typedef Stat1<VType, NumType> Self;
-
-  Stat1() { Clear(); }
-  // Create a sample of value dat and weight 1
-  explicit Stat1(const VType &dat) {
-    sum_ = dat;
-    sum_squares_ = Sqr(dat);
-    numsamples_ = 1;
-  }
-  // Create statistics for all the samples between begin (included)
-  // and end(excluded)
-  explicit Stat1(const VType *begin, const VType *end) {
-    Clear();
-    for (const VType *item = begin; item < end; ++item) {
-      (*this) += Stat1(*item);
-    }
-  }
-  // Create a sample of value dat and weight w
-  Stat1(const VType &dat, const NumType &w) {
-    sum_ = w * dat;
-    sum_squares_ = w * Sqr(dat);
-    numsamples_ = w;
-  }
-  // Copy operator
-  Stat1(const Self &stat) {
-    sum_ = stat.sum_;
-    sum_squares_ = stat.sum_squares_;
-    numsamples_ = stat.numsamples_;
-  }
-
-  void Clear() {
-    numsamples_ = NumType();
-    sum_squares_ = sum_ = VType();
-  }
-
-  Self &operator=(const Self &stat) {
-    sum_ = stat.sum_;
-    sum_squares_ = stat.sum_squares_;
-    numsamples_ = stat.numsamples_;
-    return (*this);
-  }
-  // Merge statistics from two sample sets.
-  Self &operator+=(const Self &stat) {
-    sum_ += stat.sum_;
-    sum_squares_ += stat.sum_squares_;
-    numsamples_ += stat.numsamples_;
-    return (*this);
-  }
-  // The operation opposite to +=
-  Self &operator-=(const Self &stat) {
-    sum_ -= stat.sum_;
-    sum_squares_ -= stat.sum_squares_;
-    numsamples_ -= stat.numsamples_;
-    return (*this);
-  }
-  // Multiply the weight of the set of samples by a factor k
-  Self &operator*=(const VType &k) {
-    sum_ *= k;
-    sum_squares_ *= k;
-    numsamples_ *= k;
-    return (*this);
-  }
-
-  // Merge statistics from two sample sets.
-  Self operator+(const Self &stat) const { return Self(*this) += stat; }
-
-  // The operation opposite to +
-  Self operator-(const Self &stat) const { return Self(*this) -= stat; }
-
-  // Multiply the weight of the set of samples by a factor k
-  Self operator*(const VType &k) const { return Self(*this) *= k; }
-
-  // Return the total weight of this sample set
-  NumType numSamples() const { return numsamples_; }
-
-  // Return the sum of this sample set
-  VType Sum() const { return sum_; }
-
-  // Return the mean of this sample set
-  VType Mean() const {
-    if (numsamples_ == 0) return VType();
-    return sum_ * (1.0 / numsamples_);
-  }
-
-  // Return the mean of this sample set and compute the standard deviation at
-  // the same time.
-  VType Mean(VType *stddev) const {
-    if (numsamples_ == 0) return VType();
-    VType mean = sum_ * (1.0 / numsamples_);
-    if (stddev) {
-      VType avg_squares = sum_squares_ * (1.0 / numsamples_);
-      *stddev = Sqrt(avg_squares - Sqr(mean));
-    }
-    return mean;
-  }
-
-  // Return the standard deviation of the sample set
-  VType StdDev() const {
-    if (numsamples_ == 0) return VType();
-    VType mean = Mean();
-    VType avg_squares = sum_squares_ * (1.0 / numsamples_);
-    return Sqrt(avg_squares - Sqr(mean));
-  }
-
- private:
-  static_assert(std::is_integral<NumType>::value &&
-                    !std::is_same<NumType, bool>::value,
-                "NumType must be an integral type that is not bool.");
-  // Let i be the index of the samples provided (using +=)
-  // and weight[i],value[i] be the data of sample #i
-  // then the variables have the following meaning:
-  NumType numsamples_;  // sum of weight[i];
-  VType sum_;           // sum of weight[i]*value[i];
-  VType sum_squares_;   // sum of weight[i]*value[i]^2;
-
-  // Template function used to square a number.
-  // For a vector we square all components
-  template <typename SType>
-  static inline SType Sqr(const SType &dat) {
-    return dat * dat;
-  }
-
-  template <typename SType>
-  static inline Vector2<SType> Sqr(const Vector2<SType> &dat) {
-    return dat.MulComponents(dat);
-  }
-
-  template <typename SType>
-  static inline Vector3<SType> Sqr(const Vector3<SType> &dat) {
-    return dat.MulComponents(dat);
-  }
-
-  template <typename SType>
-  static inline Vector4<SType> Sqr(const Vector4<SType> &dat) {
-    return dat.MulComponents(dat);
-  }
-
-  // Template function used to take the square root of a number.
-  // For a vector we square all components
-  template <typename SType>
-  static inline SType Sqrt(const SType &dat) {
-    // Avoid NaN due to imprecision in the calculations
-    if (dat < 0) return 0;
-    return sqrt(dat);
-  }
-
-  template <typename SType>
-  static inline Vector2<SType> Sqrt(const Vector2<SType> &dat) {
-    // Avoid NaN due to imprecision in the calculations
-    return Max(dat, Vector2<SType>()).Sqrt();
-  }
-
-  template <typename SType>
-  static inline Vector3<SType> Sqrt(const Vector3<SType> &dat) {
-    // Avoid NaN due to imprecision in the calculations
-    return Max(dat, Vector3<SType>()).Sqrt();
-  }
-
-  template <typename SType>
-  static inline Vector4<SType> Sqrt(const Vector4<SType> &dat) {
-    // Avoid NaN due to imprecision in the calculations
-    return Max(dat, Vector4<SType>()).Sqrt();
-  }
-};
-
-// Useful printing function
-template <typename VType, typename NumType>
-std::ostream &operator<<(std::ostream &out, const Stat1<VType, NumType> &s) {
-  out << "{ avg = " << s.Mean() << " std = " << s.StdDev()
-      << " nsamples = " << s.NumSamples() << "}";
-  return out;
-}
-
-// Stat1MinMax: same as Stat1, but it also
-// keeps the Min and Max values; the "-"
-// operator is disabled because it cannot be implemented
-// efficiently
-template <typename VType, typename NumType>
-class Stat1MinMax : public Stat1<VType, NumType> {
- public:
-  typedef Stat1MinMax<VType, NumType> Self;
-
-  Stat1MinMax() { Clear(); }
-  // Create a sample of value dat and weight 1
-  explicit Stat1MinMax(const VType &dat) : Stat1<VType, NumType>(dat) {
-    max_ = dat;
-    min_ = dat;
-  }
-  // Create statistics for all the samples between begin (included)
-  // and end(excluded)
-  explicit Stat1MinMax(const VType *begin, const VType *end) {
-    Clear();
-    for (const VType *item = begin; item < end; ++item) {
-      (*this) += Stat1MinMax(*item);
-    }
-  }
-  // Create a sample of value dat and weight w
-  Stat1MinMax(const VType &dat, const NumType &w)
-      : Stat1<VType, NumType>(dat, w) {
-    max_ = dat;
-    min_ = dat;
-  }
-  // Copy operator
-  Stat1MinMax(const Self &stat) : Stat1<VType, NumType>(stat) {
-    max_ = stat.max_;
-    min_ = stat.min_;
-  }
-
-  void Clear() {
-    Stat1<VType, NumType>::Clear();
-    if (std::numeric_limits<VType>::has_infinity) {
-      min_ = std::numeric_limits<VType>::infinity();
-      max_ = -std::numeric_limits<VType>::infinity();
-    } else {
-      min_ = std::numeric_limits<VType>::max();
-      max_ = std::numeric_limits<VType>::min();
-    }
-  }
-
-  Self &operator=(const Self &stat) {
-    this->Stat1<VType, NumType>::operator=(stat);
-    max_ = stat.max_;
-    min_ = stat.min_;
-    return (*this);
-  }
-  // Merge statistics from two sample sets.
-  Self &operator+=(const Self &stat) {
-    this->Stat1<VType, NumType>::operator+=(stat);
-    if (stat.max_ > max_) max_ = stat.max_;
-    if (stat.min_ < min_) min_ = stat.min_;
-    return (*this);
-  }
-  // Multiply the weight of the set of samples by a factor k
-  Self &operator*=(const VType &stat) {
-    this->Stat1<VType, NumType>::operator*=(stat);
-    return (*this);
-  }
-  // Merge statistics from two sample sets.
-  Self operator+(const Self &stat) const { return Self(*this) += stat; }
-  // Multiply the weight of the set of samples by a factor k
-  Self operator*(const VType &k) const { return Self(*this) *= k; }
-
-  // Return the maximal value in this sample set
-  VType Max() const { return max_; }
-  // Return the minimal value in this sample set
-  VType Min() const { return min_; }
-
- private:
-  // The - operation makes no sense with Min/Max
-  // unless we keep the full list of values (but we don't)
-  // make it private, and let it undefined so nobody can call it
-  Self &operator-=(const Self &stat);  // senseless. let it undefined.
-
-  // The operation opposite to -
-  Self operator-(const Self &stat) const;  // senseless. let it undefined.
-
-  // Let i be the index of the samples provided (using +=)
-  // and weight[i],value[i] be the data of sample #i
-  // then the variables have the following meaning:
-  VType max_;  // max of value[i]
-  VType min_;  // min of value[i]
-};
-
-// Useful printing function
-template <typename VType, typename NumType>
-std::ostream &operator<<(std::ostream &out,
-                         const Stat1MinMax<VType, NumType> &s) {
-  out << "{ avg = " << s.Mean() << " std = " << s.StdDev()
-      << " nsamples = " << s.NumSamples() << " min = " << s.Min()
-      << " max = " << s.Max() << "}";
-  return out;
-}
-}  // end namespace benchmark
-
-#endif  // BENCHMARK_STAT_H_
diff --git a/libcxx/utils/google-benchmark/src/statistics.cc b/libcxx/utils/google-benchmark/src/statistics.cc
new file mode 100644
index 00000000000..5932ad43860
--- /dev/null
+++ b/libcxx/utils/google-benchmark/src/statistics.cc
@@ -0,0 +1,175 @@
+// Copyright 2016 Ismael Jimenez Martinez. All rights reserved.
+// Copyright 2017 Roman Lebedev. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "benchmark/benchmark.h"
+
+#include <algorithm>
+#include <cmath>
+#include <string>
+#include <vector>
+#include <numeric>
+#include "check.h"
+#include "statistics.h"
+
+namespace benchmark {
+
+auto StatisticsSum = [](const std::vector<double>& v) {
+  return std::accumulate(v.begin(), v.end(), 0.0);
+};
+
+double StatisticsMean(const std::vector<double>& v) {
+  if (v.size() == 0) return 0.0;
+  return StatisticsSum(v) * (1.0 / v.size());
+}
+
+double StatisticsMedian(const std::vector<double>& v) {
+  if (v.size() < 3) return StatisticsMean(v);
+  std::vector<double> partial;
+  // we need roundDown(count/2)+1 slots
+  partial.resize(1 + (v.size() / 2));
+  std::partial_sort_copy(v.begin(), v.end(), partial.begin(), partial.end());
+  // did we have odd number of samples?
+  // if yes, then the last element of partially-sorted vector is the median
+  // it no, then the average of the last two elements is the median
+  if(v.size() % 2 == 1)
+    return partial.back();
+  return (partial[partial.size() - 2] + partial[partial.size() - 1]) / 2.0;
+}
+
+// Return the sum of the squares of this sample set
+auto SumSquares = [](const std::vector<double>& v) {
+  return std::inner_product(v.begin(), v.end(), v.begin(), 0.0);
+};
+
+auto Sqr = [](const double dat) { return dat * dat; };
+auto Sqrt = [](const double dat) {
+  // Avoid NaN due to imprecision in the calculations
+  if (dat < 0.0) return 0.0;
+  return std::sqrt(dat);
+};
+
+double StatisticsStdDev(const std::vector<double>& v) {
+  const auto mean = StatisticsMean(v);
+  if (v.size() == 0) return mean;
+
+  // Sample standard deviation is undefined for n = 1
+  if (v.size() == 1)
+    return 0.0;
+
+  const double avg_squares = SumSquares(v) * (1.0 / v.size());
+  return Sqrt(v.size() / (v.size() - 1.0) * (avg_squares - Sqr(mean)));
+}
+
+std::vector<BenchmarkReporter::Run> ComputeStats(
+    const std::vector<BenchmarkReporter::Run>& reports) {
+  typedef BenchmarkReporter::Run Run;
+  std::vector<Run> results;
+
+  auto error_count =
+      std::count_if(reports.begin(), reports.end(),
+                    [](Run const& run) { return run.error_occurred; });
+
+  if (reports.size() - error_count < 2) {
+    // We don't report aggregated data if there was a single run.
+    return results;
+  }
+
+  // Accumulators.
+  std::vector<double> real_accumulated_time_stat;
+  std::vector<double> cpu_accumulated_time_stat;
+  std::vector<double> bytes_per_second_stat;
+  std::vector<double> items_per_second_stat;
+
+  real_accumulated_time_stat.reserve(reports.size());
+  cpu_accumulated_time_stat.reserve(reports.size());
+  bytes_per_second_stat.reserve(reports.size());
+  items_per_second_stat.reserve(reports.size());
+
+  // All repetitions should be run with the same number of iterations so we
+  // can take this information from the first benchmark.
+  int64_t const run_iterations = reports.front().iterations;
+  // create stats for user counters
+  struct CounterStat {
+    Counter c;
+    std::vector<double> s;
+  };
+  std::map< std::string, CounterStat > counter_stats;
+  for(Run const& r : reports) {
+    for(auto const& cnt : r.counters) {
+      auto it = counter_stats.find(cnt.first);
+      if(it == counter_stats.end()) {
+        counter_stats.insert({cnt.first, {cnt.second, std::vector<double>{}}});
+        it = counter_stats.find(cnt.first);
+        it->second.s.reserve(reports.size());
+      } else {
+        CHECK_EQ(counter_stats[cnt.first].c.flags, cnt.second.flags);
+      }
+    }
+  }
+
+  // Populate the accumulators.
+  for (Run const& run : reports) {
+    CHECK_EQ(reports[0].benchmark_name, run.benchmark_name);
+    CHECK_EQ(run_iterations, run.iterations);
+    if (run.error_occurred) continue;
+    real_accumulated_time_stat.emplace_back(run.real_accumulated_time);
+    cpu_accumulated_time_stat.emplace_back(run.cpu_accumulated_time);
+    items_per_second_stat.emplace_back(run.items_per_second);
+    bytes_per_second_stat.emplace_back(run.bytes_per_second);
+    // user counters
+    for(auto const& cnt : run.counters) {
+      auto it = counter_stats.find(cnt.first);
+      CHECK_NE(it, counter_stats.end());
+      it->second.s.emplace_back(cnt.second);
+    }
+  }
+
+  // Only add label if it is same for all runs
+  std::string report_label = reports[0].report_label;
+  for (std::size_t i = 1; i < reports.size(); i++) {
+    if (reports[i].report_label != report_label) {
+      report_label = "";
+      break;
+    }
+  }
+
+  for(const auto& Stat : *reports[0].statistics) {
+    // Get the data from the accumulator to BenchmarkReporter::Run's.
+    Run data;
+    data.benchmark_name = reports[0].benchmark_name + "_" + Stat.name_;
+    data.report_label = report_label;
+    data.iterations = run_iterations;
+
+    data.real_accumulated_time = Stat.compute_(real_accumulated_time_stat);
+    data.cpu_accumulated_time = Stat.compute_(cpu_accumulated_time_stat);
+    data.bytes_per_second = Stat.compute_(bytes_per_second_stat);
+    data.items_per_second = Stat.compute_(items_per_second_stat);
+
+    data.time_unit = reports[0].time_unit;
+
+    // user counters
+    for(auto const& kv : counter_stats) {
+      const auto uc_stat = Stat.compute_(kv.second.s);
+      auto c = Counter(uc_stat, counter_stats[kv.first].c.flags);
+      data.counters[kv.first] = c;
+    }
+
+    results.push_back(data);
+  }
+
+  return results;
+}
+
+}  // end namespace benchmark
diff --git a/libcxx/utils/google-benchmark/src/statistics.h b/libcxx/utils/google-benchmark/src/statistics.h
new file mode 100644
index 00000000000..7eccc85536a
--- /dev/null
+++ b/libcxx/utils/google-benchmark/src/statistics.h
@@ -0,0 +1,37 @@
+// Copyright 2016 Ismael Jimenez Martinez. All rights reserved.
+// Copyright 2017 Roman Lebedev. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef STATISTICS_H_
+#define STATISTICS_H_
+
+#include <vector>
+
+#include "benchmark/benchmark.h"
+
+namespace benchmark {
+
+// Return a vector containing the mean, median and standard devation information
+// (and any user-specified info) for the specified list of reports. If 'reports'
+// contains less than two non-errored runs an empty vector is returned
+std::vector<BenchmarkReporter::Run> ComputeStats(
+    const std::vector<BenchmarkReporter::Run>& reports);
+
+double StatisticsMean(const std::vector<double>& v);
+double StatisticsMedian(const std::vector<double>& v);
+double StatisticsStdDev(const std::vector<double>& v);
+
+}  // end namespace benchmark
+
+#endif  // STATISTICS_H_
diff --git a/libcxx/utils/google-benchmark/src/string_util.cc b/libcxx/utils/google-benchmark/src/string_util.cc
index cd4e7cfde57..29edb2a4683 100644
--- a/libcxx/utils/google-benchmark/src/string_util.cc
+++ b/libcxx/utils/google-benchmark/src/string_util.cc
@@ -27,8 +27,6 @@ static_assert(arraysize(kSmallSIUnits) == arraysize(kBigSIUnits),
 
 static const int64_t kUnitsSize = arraysize(kBigSIUnits);
 
-}  // end anonymous namespace
-
 void ToExponentAndMantissa(double val, double thresh, int precision,
                            double one_k, std::string* mantissa,
                            int64_t* exponent) {
@@ -100,14 +98,16 @@ std::string ExponentToPrefix(int64_t exponent, bool iec) {
 }
 
 std::string ToBinaryStringFullySpecified(double value, double threshold,
-                                         int precision) {
+                                         int precision, double one_k = 1024.0) {
   std::string mantissa;
   int64_t exponent;
-  ToExponentAndMantissa(value, threshold, precision, 1024.0, &mantissa,
+  ToExponentAndMantissa(value, threshold, precision, one_k, &mantissa,
                         &exponent);
   return mantissa + ExponentToPrefix(exponent, false);
 }
 
+}  // end namespace
+
 void AppendHumanReadable(int n, std::string* str) {
   std::stringstream ss;
   // Round down to the nearest SI prefix.
@@ -115,11 +115,11 @@ void AppendHumanReadable(int n, std::string* str) {
   *str += ss.str();
 }
 
-std::string HumanReadableNumber(double n) {
+std::string HumanReadableNumber(double n, double one_k) {
   // 1.1 means that figures up to 1.1k should be shown with the next unit down;
   // this softens edge effects.
   // 1 means that we should show one decimal place of precision.
-  return ToBinaryStringFullySpecified(n, 1.1, 1);
+  return ToBinaryStringFullySpecified(n, 1.1, 1, one_k);
 }
 
 std::string StringPrintFImp(const char* msg, va_list args) {
diff --git a/libcxx/utils/google-benchmark/src/string_util.h b/libcxx/utils/google-benchmark/src/string_util.h
index 0b190b91a16..c3d53bfd334 100644
--- a/libcxx/utils/google-benchmark/src/string_util.h
+++ b/libcxx/utils/google-benchmark/src/string_util.h
@@ -10,7 +10,7 @@ namespace benchmark {
 
 void AppendHumanReadable(int n, std::string* str);
 
-std::string HumanReadableNumber(double n);
+std::string HumanReadableNumber(double n, double one_k = 1024.0);
 
 std::string StringPrintF(const char* format, ...);
 
diff --git a/libcxx/utils/google-benchmark/src/sysinfo.cc b/libcxx/utils/google-benchmark/src/sysinfo.cc
index 7feb79e65f2..2520ad5aeda 100644
--- a/libcxx/utils/google-benchmark/src/sysinfo.cc
+++ b/libcxx/utils/google-benchmark/src/sysinfo.cc
@@ -12,7 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "sysinfo.h"
 #include "internal_macros.h"
 
 #ifdef BENCHMARK_OS_WINDOWS
@@ -25,21 +24,29 @@
 #include <sys/time.h>
 #include <sys/types.h>  // this header must be included before 'sys/sysctl.h' to avoid compilation error on FreeBSD
 #include <unistd.h>
-#if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_MACOSX
+#if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_MACOSX || \
+    defined BENCHMARK_OS_NETBSD
+#define BENCHMARK_HAS_SYSCTL
 #include <sys/sysctl.h>
 #endif
 #endif
 
+#include <algorithm>
+#include <array>
+#include <bitset>
 #include <cerrno>
+#include <climits>
 #include <cstdint>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
+#include <fstream>
 #include <iostream>
+#include <iterator>
 #include <limits>
-#include <mutex>
+#include <memory>
+#include <sstream>
 
-#include "arraysize.h"
 #include "check.h"
 #include "cycleclock.h"
 #include "internal_macros.h"
@@ -49,214 +56,431 @@
 
 namespace benchmark {
 namespace {
-std::once_flag cpuinfo_init;
-double cpuinfo_cycles_per_second = 1.0;
-int cpuinfo_num_cpus = 1;  // Conservative guess
 
-#if !defined BENCHMARK_OS_MACOSX
-const int64_t estimate_time_ms = 1000;
+void PrintImp(std::ostream& out) { out << std::endl; }
 
-// Helper function estimates cycles/sec by observing cycles elapsed during
-// sleep(). Using small sleep time decreases accuracy significantly.
-int64_t EstimateCyclesPerSecond() {
-  const int64_t start_ticks = cycleclock::Now();
-  SleepForMilliseconds(estimate_time_ms);
-  return cycleclock::Now() - start_ticks;
+template <class First, class... Rest>
+void PrintImp(std::ostream& out, First&& f, Rest&&... rest) {
+  out << std::forward<First>(f);
+  PrintImp(out, std::forward<Rest>(rest)...);
+}
+
+template <class... Args>
+BENCHMARK_NORETURN void PrintErrorAndDie(Args&&... args) {
+  PrintImp(std::cerr, std::forward<Args>(args)...);
+  std::exit(EXIT_FAILURE);
+}
+
+#ifdef BENCHMARK_HAS_SYSCTL
+
+/// ValueUnion - A type used to correctly alias the byte-for-byte output of
+/// `sysctl` with the result type it's to be interpreted as.
+struct ValueUnion {
+  union DataT {
+    uint32_t uint32_value;
+    uint64_t uint64_value;
+    // For correct aliasing of union members from bytes.
+    char bytes[8];
+  };
+  using DataPtr = std::unique_ptr<DataT, decltype(&std::free)>;
+
+  // The size of the data union member + its trailing array size.
+  size_t Size;
+  DataPtr Buff;
+
+ public:
+  ValueUnion() : Size(0), Buff(nullptr, &std::free) {}
+
+  explicit ValueUnion(size_t BuffSize)
+      : Size(sizeof(DataT) + BuffSize),
+        Buff(::new (std::malloc(Size)) DataT(), &std::free) {}
+
+  ValueUnion(ValueUnion&& other) = default;
+
+  explicit operator bool() const { return bool(Buff); }
+
+  char* data() const { return Buff->bytes; }
+
+  std::string GetAsString() const { return std::string(data()); }
+
+  int64_t GetAsInteger() const {
+    if (Size == sizeof(Buff->uint32_value))
+      return static_cast<int32_t>(Buff->uint32_value);
+    else if (Size == sizeof(Buff->uint64_value))
+      return static_cast<int64_t>(Buff->uint64_value);
+    BENCHMARK_UNREACHABLE();
+  }
+
+  uint64_t GetAsUnsigned() const {
+    if (Size == sizeof(Buff->uint32_value))
+      return Buff->uint32_value;
+    else if (Size == sizeof(Buff->uint64_value))
+      return Buff->uint64_value;
+    BENCHMARK_UNREACHABLE();
+  }
+
+  template <class T, int N>
+  std::array<T, N> GetAsArray() {
+    const int ArrSize = sizeof(T) * N;
+    CHECK_LE(ArrSize, Size);
+    std::array<T, N> Arr;
+    std::memcpy(Arr.data(), data(), ArrSize);
+    return Arr;
+  }
+};
+
+ValueUnion GetSysctlImp(std::string const& Name) {
+  size_t CurBuffSize = 0;
+  if (sysctlbyname(Name.c_str(), nullptr, &CurBuffSize, nullptr, 0) == -1)
+    return ValueUnion();
+
+  ValueUnion buff(CurBuffSize);
+  if (sysctlbyname(Name.c_str(), buff.data(), &buff.Size, nullptr, 0) == 0)
+    return buff;
+  return ValueUnion();
+}
+
+BENCHMARK_MAYBE_UNUSED
+bool GetSysctl(std::string const& Name, std::string* Out) {
+  Out->clear();
+  auto Buff = GetSysctlImp(Name);
+  if (!Buff) return false;
+  Out->assign(Buff.data());
+  return true;
+}
+
+template <class Tp,
+          class = typename std::enable_if<std::is_integral<Tp>::value>::type>
+bool GetSysctl(std::string const& Name, Tp* Out) {
+  *Out = 0;
+  auto Buff = GetSysctlImp(Name);
+  if (!Buff) return false;
+  *Out = static_cast<Tp>(Buff.GetAsUnsigned());
+  return true;
+}
+
+template <class Tp, size_t N>
+bool GetSysctl(std::string const& Name, std::array<Tp, N>* Out) {
+  auto Buff = GetSysctlImp(Name);
+  if (!Buff) return false;
+  *Out = Buff.GetAsArray<Tp, N>();
+  return true;
 }
 #endif
 
-#if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN
-// Helper function for reading an int from a file. Returns true if successful
-// and the memory location pointed to by value is set to the value read.
-bool ReadIntFromFile(const char* file, long* value) {
-  bool ret = false;
-  int fd = open(file, O_RDONLY);
-  if (fd != -1) {
-    char line[1024];
-    char* err;
-    memset(line, '\0', sizeof(line));
-    ssize_t read_err = read(fd, line, sizeof(line) - 1);
-    ((void)read_err); // prevent unused warning
-    CHECK(read_err >= 0);
-    const long temp_value = strtol(line, &err, 10);
-    if (line[0] != '\0' && (*err == '\n' || *err == '\0')) {
-      *value = temp_value;
-      ret = true;
+template <class ArgT>
+bool ReadFromFile(std::string const& fname, ArgT* arg) {
+  *arg = ArgT();
+  std::ifstream f(fname.c_str());
+  if (!f.is_open()) return false;
+  f >> *arg;
+  return f.good();
+}
+
+bool CpuScalingEnabled(int num_cpus) {
+  // We don't have a valid CPU count, so don't even bother.
+  if (num_cpus <= 0) return false;
+#ifndef BENCHMARK_OS_WINDOWS
+  // On Linux, the CPUfreq subsystem exposes CPU information as files on the
+  // local file system. If reading the exported files fails, then we may not be
+  // running on Linux, so we silently ignore all the read errors.
+  std::string res;
+  for (int cpu = 0; cpu < num_cpus; ++cpu) {
+    std::string governor_file =
+        StrCat("/sys/devices/system/cpu/cpu", cpu, "/cpufreq/scaling_governor");
+    if (ReadFromFile(governor_file, &res) && res != "performance") return true;
+  }
+#endif
+  return false;
+}
+
+int CountSetBitsInCPUMap(std::string Val) {
+  auto CountBits = [](std::string Part) {
+    using CPUMask = std::bitset<sizeof(std::uintptr_t) * CHAR_BIT>;
+    Part = "0x" + Part;
+    CPUMask Mask(std::stoul(Part, nullptr, 16));
+    return static_cast<int>(Mask.count());
+  };
+  size_t Pos;
+  int total = 0;
+  while ((Pos = Val.find(',')) != std::string::npos) {
+    total += CountBits(Val.substr(0, Pos));
+    Val = Val.substr(Pos + 1);
+  }
+  if (!Val.empty()) {
+    total += CountBits(Val);
+  }
+  return total;
+}
+
+BENCHMARK_MAYBE_UNUSED
+std::vector<CPUInfo::CacheInfo> GetCacheSizesFromKVFS() {
+  std::vector<CPUInfo::CacheInfo> res;
+  std::string dir = "/sys/devices/system/cpu/cpu0/cache/";
+  int Idx = 0;
+  while (true) {
+    CPUInfo::CacheInfo info;
+    std::string FPath = StrCat(dir, "index", Idx++, "/");
+    std::ifstream f(StrCat(FPath, "size").c_str());
+    if (!f.is_open()) break;
+    std::string suffix;
+    f >> info.size;
+    if (f.fail())
+      PrintErrorAndDie("Failed while reading file '", FPath, "size'");
+    if (f.good()) {
+      f >> suffix;
+      if (f.bad())
+        PrintErrorAndDie(
+            "Invalid cache size format: failed to read size suffix");
+      else if (f && suffix != "K")
+        PrintErrorAndDie("Invalid cache size format: Expected bytes ", suffix);
+      else if (suffix == "K")
+        info.size *= 1000;
     }
-    close(fd);
+    if (!ReadFromFile(StrCat(FPath, "type"), &info.type))
+      PrintErrorAndDie("Failed to read from file ", FPath, "type");
+    if (!ReadFromFile(StrCat(FPath, "level"), &info.level))
+      PrintErrorAndDie("Failed to read from file ", FPath, "level");
+    std::string map_str;
+    if (!ReadFromFile(StrCat(FPath, "shared_cpu_map"), &map_str))
+      PrintErrorAndDie("Failed to read from file ", FPath, "shared_cpu_map");
+    info.num_sharing = CountSetBitsInCPUMap(map_str);
+    res.push_back(info);
   }
-  return ret;
+
+  return res;
 }
-#endif
 
-#if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN
-static std::string convertToLowerCase(std::string s) {
-  for (auto& ch : s)
-    ch = std::tolower(ch);
-  return s;
+#ifdef BENCHMARK_OS_MACOSX
+std::vector<CPUInfo::CacheInfo> GetCacheSizesMacOSX() {
+  std::vector<CPUInfo::CacheInfo> res;
+  std::array<uint64_t, 4> CacheCounts{{0, 0, 0, 0}};
+  GetSysctl("hw.cacheconfig", &CacheCounts);
+
+  struct {
+    std::string name;
+    std::string type;
+    int level;
+    size_t num_sharing;
+  } Cases[] = {{"hw.l1dcachesize", "Data", 1, CacheCounts[1]},
+               {"hw.l1icachesize", "Instruction", 1, CacheCounts[1]},
+               {"hw.l2cachesize", "Unified", 2, CacheCounts[2]},
+               {"hw.l3cachesize", "Unified", 3, CacheCounts[3]}};
+  for (auto& C : Cases) {
+    int val;
+    if (!GetSysctl(C.name, &val)) continue;
+    CPUInfo::CacheInfo info;
+    info.type = C.type;
+    info.level = C.level;
+    info.size = val;
+    info.num_sharing = static_cast<int>(C.num_sharing);
+    res.push_back(std::move(info));
+  }
+  return res;
 }
-static bool startsWithKey(std::string Value, std::string Key,
-                          bool IgnoreCase = true) {
-  if (IgnoreCase) {
-    Key = convertToLowerCase(std::move(Key));
-    Value = convertToLowerCase(std::move(Value));
+#elif defined(BENCHMARK_OS_WINDOWS)
+std::vector<CPUInfo::CacheInfo> GetCacheSizesWindows() {
+  std::vector<CPUInfo::CacheInfo> res;
+  DWORD buffer_size = 0;
+  using PInfo = SYSTEM_LOGICAL_PROCESSOR_INFORMATION;
+  using CInfo = CACHE_DESCRIPTOR;
+
+  using UPtr = std::unique_ptr<PInfo, decltype(&std::free)>;
+  GetLogicalProcessorInformation(nullptr, &buffer_size);
+  UPtr buff((PInfo*)malloc(buffer_size), &std::free);
+  if (!GetLogicalProcessorInformation(buff.get(), &buffer_size))
+    PrintErrorAndDie("Failed during call to GetLogicalProcessorInformation: ",
+                     GetLastError());
+
+  PInfo* it = buff.get();
+  PInfo* end = buff.get() + (buffer_size / sizeof(PInfo));
+
+  for (; it != end; ++it) {
+    if (it->Relationship != RelationCache) continue;
+    using BitSet = std::bitset<sizeof(ULONG_PTR) * CHAR_BIT>;
+    BitSet B(it->ProcessorMask);
+    // To prevent duplicates, only consider caches where CPU 0 is specified
+    if (!B.test(0)) continue;
+    CInfo* Cache = &it->Cache;
+    CPUInfo::CacheInfo C;
+    C.num_sharing = B.count();
+    C.level = Cache->Level;
+    C.size = Cache->Size;
+    switch (Cache->Type) {
+      case CacheUnified:
+        C.type = "Unified";
+        break;
+      case CacheInstruction:
+        C.type = "Instruction";
+        break;
+      case CacheData:
+        C.type = "Data";
+        break;
+      case CacheTrace:
+        C.type = "Trace";
+        break;
+      default:
+        C.type = "Unknown";
+        break;
+    }
+    res.push_back(C);
   }
-  return Value.compare(0, Key.size(), Key) == 0;
+  return res;
 }
 #endif
 
-void InitializeSystemInfo() {
+std::vector<CPUInfo::CacheInfo> GetCacheSizes() {
+#ifdef BENCHMARK_OS_MACOSX
+  return GetCacheSizesMacOSX();
+#elif defined(BENCHMARK_OS_WINDOWS)
+  return GetCacheSizesWindows();
+#else
+  return GetCacheSizesFromKVFS();
+#endif
+}
+
+int GetNumCPUs() {
+#ifdef BENCHMARK_HAS_SYSCTL
+  int NumCPU = -1;
+  if (GetSysctl("hw.ncpu", &NumCPU)) return NumCPU;
+  fprintf(stderr, "Err: %s\n", strerror(errno));
+  std::exit(EXIT_FAILURE);
+#elif defined(BENCHMARK_OS_WINDOWS)
+  SYSTEM_INFO sysinfo;
+  // Use memset as opposed to = {} to avoid GCC missing initializer false
+  // positives.
+  std::memset(&sysinfo, 0, sizeof(SYSTEM_INFO));
+  GetSystemInfo(&sysinfo);
+  return sysinfo.dwNumberOfProcessors;  // number of logical
+                                        // processors in the current
+                                        // group
+#else
+  int NumCPUs = 0;
+  int MaxID = -1;
+  std::ifstream f("/proc/cpuinfo");
+  if (!f.is_open()) {
+    std::cerr << "failed to open /proc/cpuinfo\n";
+    return -1;
+  }
+  const std::string Key = "processor";
+  std::string ln;
+  while (std::getline(f, ln)) {
+    if (ln.empty()) continue;
+    size_t SplitIdx = ln.find(':');
+    std::string value;
+    if (SplitIdx != std::string::npos) value = ln.substr(SplitIdx + 1);
+    if (ln.size() >= Key.size() && ln.compare(0, Key.size(), Key) == 0) {
+      NumCPUs++;
+      if (!value.empty()) {
+        int CurID = std::stoi(value);
+        MaxID = std::max(CurID, MaxID);
+      }
+    }
+  }
+  if (f.bad()) {
+    std::cerr << "Failure reading /proc/cpuinfo\n";
+    return -1;
+  }
+  if (!f.eof()) {
+    std::cerr << "Failed to read to end of /proc/cpuinfo\n";
+    return -1;
+  }
+  f.close();
+
+  if ((MaxID + 1) != NumCPUs) {
+    fprintf(stderr,
+            "CPU ID assignments in /proc/cpuinfo seem messed up."
+            " This is usually caused by a bad BIOS.\n");
+  }
+  return NumCPUs;
+#endif
+  BENCHMARK_UNREACHABLE();
+}
+
+double GetCPUCyclesPerSecond() {
 #if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN
-  char line[1024];
-  char* err;
   long freq;
 
-  bool saw_mhz = false;
-
   // If the kernel is exporting the tsc frequency use that. There are issues
   // where cpuinfo_max_freq cannot be relied on because the BIOS may be
   // exporintg an invalid p-state (on x86) or p-states may be used to put the
   // processor in a new mode (turbo mode). Essentially, those frequencies
   // cannot always be relied upon. The same reasons apply to /proc/cpuinfo as
   // well.
-  if (!saw_mhz &&
-      ReadIntFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq)) {
+  if (ReadFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq)
+      // If CPU scaling is in effect, we want to use the *maximum* frequency,
+      // not whatever CPU speed some random processor happens to be using now.
+      || ReadFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
+                      &freq)) {
     // The value is in kHz (as the file name suggests).  For example, on a
     // 2GHz warpstation, the file contains the value "2000000".
-    cpuinfo_cycles_per_second = freq * 1000.0;
-    saw_mhz = true;
+    return freq * 1000.0;
   }
 
-  // If CPU scaling is in effect, we want to use the *maximum* frequency,
-  // not whatever CPU speed some random processor happens to be using now.
-  if (!saw_mhz &&
-      ReadIntFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
-                      &freq)) {
-    // The value is in kHz.  For example, on a 2GHz warpstation, the file
-    // contains the value "2000000".
-    cpuinfo_cycles_per_second = freq * 1000.0;
-    saw_mhz = true;
-  }
+  const double error_value = -1;
+  double bogo_clock = error_value;
 
-  // Read /proc/cpuinfo for other values, and if there is no cpuinfo_max_freq.
-  const char* pname = "/proc/cpuinfo";
-  int fd = open(pname, O_RDONLY);
-  if (fd == -1) {
-    perror(pname);
-    if (!saw_mhz) {
-      cpuinfo_cycles_per_second =
-          static_cast<double>(EstimateCyclesPerSecond());
-    }
-    return;
+  std::ifstream f("/proc/cpuinfo");
+  if (!f.is_open()) {
+    std::cerr << "failed to open /proc/cpuinfo\n";
+    return error_value;
   }
 
-  double bogo_clock = 1.0;
-  bool saw_bogo = false;
-  long max_cpu_id = 0;
-  int num_cpus = 0;
-  line[0] = line[1] = '\0';
-  size_t chars_read = 0;
-  do {  // we'll exit when the last read didn't read anything
-    // Move the next line to the beginning of the buffer
-    const size_t oldlinelen = strlen(line);
-    if (sizeof(line) == oldlinelen + 1)  // oldlinelen took up entire line
-      line[0] = '\0';
-    else  // still other lines left to save
-      memmove(line, line + oldlinelen + 1, sizeof(line) - (oldlinelen + 1));
-    // Terminate the new line, reading more if we can't find the newline
-    char* newline = strchr(line, '\n');
-    if (newline == nullptr) {
-      const size_t linelen = strlen(line);
-      const size_t bytes_to_read = sizeof(line) - 1 - linelen;
-      CHECK(bytes_to_read > 0);  // because the memmove recovered >=1 bytes
-      chars_read = read(fd, line + linelen, bytes_to_read);
-      line[linelen + chars_read] = '\0';
-      newline = strchr(line, '\n');
-    }
-    if (newline != nullptr) *newline = '\0';
+  auto startsWithKey = [](std::string const& Value, std::string const& Key) {
+    if (Key.size() > Value.size()) return false;
+    auto Cmp = [&](char X, char Y) {
+      return std::tolower(X) == std::tolower(Y);
+    };
+    return std::equal(Key.begin(), Key.end(), Value.begin(), Cmp);
+  };
 
+  std::string ln;
+  while (std::getline(f, ln)) {
+    if (ln.empty()) continue;
+    size_t SplitIdx = ln.find(':');
+    std::string value;
+    if (SplitIdx != std::string::npos) value = ln.substr(SplitIdx + 1);
     // When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only
     // accept postive values. Some environments (virtual machines) report zero,
     // which would cause infinite looping in WallTime_Init.
-    if (!saw_mhz && startsWithKey(line, "cpu MHz")) {
-      const char* freqstr = strchr(line, ':');
-      if (freqstr) {
-        cpuinfo_cycles_per_second = strtod(freqstr + 1, &err) * 1000000.0;
-        if (freqstr[1] != '\0' && *err == '\0' && cpuinfo_cycles_per_second > 0)
-          saw_mhz = true;
+    if (startsWithKey(ln, "cpu MHz")) {
+      if (!value.empty()) {
+        double cycles_per_second = std::stod(value) * 1000000.0;
+        if (cycles_per_second > 0) return cycles_per_second;
       }
-    } else if (startsWithKey(line, "bogomips")) {
-      const char* freqstr = strchr(line, ':');
-      if (freqstr) {
-        bogo_clock = strtod(freqstr + 1, &err) * 1000000.0;
-        if (freqstr[1] != '\0' && *err == '\0' && bogo_clock > 0)
-          saw_bogo = true;
+    } else if (startsWithKey(ln, "bogomips")) {
+      if (!value.empty()) {
+        bogo_clock = std::stod(value) * 1000000.0;
+        if (bogo_clock < 0.0) bogo_clock = error_value;
       }
-    } else if (startsWithKey(line, "processor", /*IgnoreCase*/false)) {
-      // The above comparison is case-sensitive because ARM kernels often
-      // include a "Processor" line that tells you about the CPU, distinct
-      // from the usual "processor" lines that give you CPU ids. No current
-      // Linux architecture is using "Processor" for CPU ids.
-      num_cpus++;  // count up every time we see an "processor :" entry
-      const char* id_str = strchr(line, ':');
-      if (id_str) {
-        const long cpu_id = strtol(id_str + 1, &err, 10);
-        if (id_str[1] != '\0' && *err == '\0' && max_cpu_id < cpu_id)
-          max_cpu_id = cpu_id;
-      }
-    }
-  } while (chars_read > 0);
-  close(fd);
-
-  if (!saw_mhz) {
-    if (saw_bogo) {
-      // If we didn't find anything better, we'll use bogomips, but
-      // we're not happy about it.
-      cpuinfo_cycles_per_second = bogo_clock;
-    } else {
-      // If we don't even have bogomips, we'll use the slow estimation.
-      cpuinfo_cycles_per_second =
-          static_cast<double>(EstimateCyclesPerSecond());
     }
   }
-  if (num_cpus == 0) {
-    fprintf(stderr, "Failed to read num. CPUs correctly from /proc/cpuinfo\n");
-  } else {
-    if ((max_cpu_id + 1) != num_cpus) {
-      fprintf(stderr,
-              "CPU ID assignments in /proc/cpuinfo seem messed up."
-              " This is usually caused by a bad BIOS.\n");
-    }
-    cpuinfo_num_cpus = num_cpus;
+  if (f.bad()) {
+    std::cerr << "Failure reading /proc/cpuinfo\n";
+    return error_value;
   }
+  if (!f.eof()) {
+    std::cerr << "Failed to read to end of /proc/cpuinfo\n";
+    return error_value;
+  }
+  f.close();
+  // If we found the bogomips clock, but nothing better, we'll use it (but
+  // we're not happy about it); otherwise, fallback to the rough estimation
+  // below.
+  if (bogo_clock >= 0.0) return bogo_clock;
 
-#elif defined BENCHMARK_OS_FREEBSD
-// For this sysctl to work, the machine must be configured without
-// SMP, APIC, or APM support.  hz should be 64-bit in freebsd 7.0
-// and later.  Before that, it's a 32-bit quantity (and gives the
-// wrong answer on machines faster than 2^32 Hz).  See
-//  http://lists.freebsd.org/pipermail/freebsd-i386/2004-November/001846.html
-// But also compare FreeBSD 7.0:
-//  http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG70#L223
-//  231         error = sysctl_handle_quad(oidp, &freq, 0, req);
-// To FreeBSD 6.3 (it's the same in 6-STABLE):
-//  http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG6#L131
-//  139         error = sysctl_handle_int(oidp, &freq, sizeof(freq), req);
-#if __FreeBSD__ >= 7
-  uint64_t hz = 0;
+#elif defined BENCHMARK_HAS_SYSCTL
+  constexpr auto* FreqStr =
+#if defined(BENCHMARK_OS_FREEBSD) || defined(BENCHMARK_OS_NETBSD)
+      "machdep.tsc_freq";
 #else
-  unsigned int hz = 0;
+      "hw.cpufrequency";
 #endif
-  size_t sz = sizeof(hz);
-  const char* sysctl_path = "machdep.tsc_freq";
-  if (sysctlbyname(sysctl_path, &hz, &sz, nullptr, 0) != 0) {
-    fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n",
-            sysctl_path, strerror(errno));
-    cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond());
-  } else {
-    cpuinfo_cycles_per_second = hz;
-  }
-// TODO: also figure out cpuinfo_num_cpus
+  unsigned long long hz = 0;
+  if (GetSysctl(FreqStr, &hz)) return hz;
+
+  fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n",
+          FreqStr, strerror(errno));
 
 #elif defined BENCHMARK_OS_WINDOWS
   // In NT, read MHz from the registry. If we fail to do so or we're in win9x
@@ -267,89 +491,27 @@ void InitializeSystemInfo() {
           SHGetValueA(HKEY_LOCAL_MACHINE,
                       "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0",
                       "~MHz", nullptr, &data, &data_size)))
-    cpuinfo_cycles_per_second =
-        static_cast<double>((int64_t)data * (int64_t)(1000 * 1000));  // was mhz
-  else
-    cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond());
-
-  SYSTEM_INFO sysinfo;
-  // Use memset as opposed to = {} to avoid GCC missing initializer false
-  // positives.
-  std::memset(&sysinfo, 0, sizeof(SYSTEM_INFO));
-  GetSystemInfo(&sysinfo);
-  cpuinfo_num_cpus = sysinfo.dwNumberOfProcessors;  // number of logical
-                                                    // processors in the current
-                                                    // group
-
-#elif defined BENCHMARK_OS_MACOSX
-  int32_t num_cpus = 0;
-  size_t size = sizeof(num_cpus);
-  if (::sysctlbyname("hw.ncpu", &num_cpus, &size, nullptr, 0) == 0 &&
-      (size == sizeof(num_cpus))) {
-    cpuinfo_num_cpus = num_cpus;
-  } else {
-    fprintf(stderr, "%s\n", strerror(errno));
-    std::exit(EXIT_FAILURE);
-  }
-  int64_t cpu_freq = 0;
-  size = sizeof(cpu_freq);
-  if (::sysctlbyname("hw.cpufrequency", &cpu_freq, &size, nullptr, 0) == 0 &&
-      (size == sizeof(cpu_freq))) {
-    cpuinfo_cycles_per_second = cpu_freq;
-  } else {
-    #if defined BENCHMARK_OS_IOS
-    fprintf(stderr, "CPU frequency cannot be detected. \n");
-    cpuinfo_cycles_per_second = 0;
-    #else
-    fprintf(stderr, "%s\n", strerror(errno));
-    std::exit(EXIT_FAILURE);
-    #endif
-  }
-#else
-  // Generic cycles per second counter
-  cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond());
+    return static_cast<double>((int64_t)data *
+                               (int64_t)(1000 * 1000));  // was mhz
 #endif
+  // If we've fallen through, attempt to roughly estimate the CPU clock rate.
+  const int estimate_time_ms = 1000;
+  const auto start_ticks = cycleclock::Now();
+  SleepForMilliseconds(estimate_time_ms);
+  return static_cast<double>(cycleclock::Now() - start_ticks);
 }
 
 }  // end namespace
 
-double CyclesPerSecond(void) {
-  std::call_once(cpuinfo_init, InitializeSystemInfo);
-  return cpuinfo_cycles_per_second;
+const CPUInfo& CPUInfo::Get() {
+  static const CPUInfo* info = new CPUInfo();
+  return *info;
 }
 
-int NumCPUs(void) {
-  std::call_once(cpuinfo_init, InitializeSystemInfo);
-  return cpuinfo_num_cpus;
-}
-
-// The ""'s catch people who don't pass in a literal for "str"
-#define strliterallen(str) (sizeof("" str "") - 1)
-
-// Must use a string literal for prefix.
-#define memprefix(str, len, prefix)                       \
-  ((((len) >= strliterallen(prefix)) &&                   \
-    std::memcmp(str, prefix, strliterallen(prefix)) == 0) \
-       ? str + strliterallen(prefix)                      \
-       : nullptr)
-
-bool CpuScalingEnabled() {
-#ifndef BENCHMARK_OS_WINDOWS
-  // On Linux, the CPUfreq subsystem exposes CPU information as files on the
-  // local file system. If reading the exported files fails, then we may not be
-  // running on Linux, so we silently ignore all the read errors.
-  for (int cpu = 0, num_cpus = NumCPUs(); cpu < num_cpus; ++cpu) {
-    std::string governor_file =
-        StrCat("/sys/devices/system/cpu/cpu", cpu, "/cpufreq/scaling_governor");
-    FILE* file = fopen(governor_file.c_str(), "r");
-    if (!file) break;
-    char buff[16];
-    size_t bytes_read = fread(buff, 1, sizeof(buff), file);
-    fclose(file);
-    if (memprefix(buff, bytes_read, "performance") == nullptr) return true;
-  }
-#endif
-  return false;
-}
+CPUInfo::CPUInfo()
+    : num_cpus(GetNumCPUs()),
+      cycles_per_second(GetCPUCyclesPerSecond()),
+      caches(GetCacheSizes()),
+      scaling_enabled(CpuScalingEnabled(num_cpus)) {}
 
 }  // end namespace benchmark
diff --git a/libcxx/utils/google-benchmark/src/sysinfo.h b/libcxx/utils/google-benchmark/src/sysinfo.h
deleted file mode 100644
index c5d9916d2dd..00000000000
--- a/libcxx/utils/google-benchmark/src/sysinfo.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef BENCHMARK_SYSINFO_H_
-#define BENCHMARK_SYSINFO_H_
-
-namespace benchmark {
-int NumCPUs();
-double CyclesPerSecond();
-bool CpuScalingEnabled();
-}  // end namespace benchmark
-
-#endif  // BENCHMARK_SYSINFO_H_
diff --git a/libcxx/utils/google-benchmark/src/timers.cc b/libcxx/utils/google-benchmark/src/timers.cc
index 8d56e8adf8d..817272d00bc 100644
--- a/libcxx/utils/google-benchmark/src/timers.cc
+++ b/libcxx/utils/google-benchmark/src/timers.cc
@@ -158,6 +158,10 @@ double ThreadCPUUsage() {
 #elif defined(BENCHMARK_OS_EMSCRIPTEN)
   // Emscripten doesn't support traditional threads
   return ProcessCPUUsage();
+#elif defined(BENCHMARK_OS_RTEMS)
+  // RTEMS doesn't support CLOCK_THREAD_CPUTIME_ID. See
+  // https://github.com/RTEMS/rtems/blob/master/cpukit/posix/src/clockgettime.c
+  return ProcessCPUUsage();
 #elif defined(CLOCK_THREAD_CPUTIME_ID)
   struct timespec ts;
   if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts) == 0) return MakeTime(ts);
diff --git a/libcxx/utils/google-benchmark/test/CMakeLists.txt b/libcxx/utils/google-benchmark/test/CMakeLists.txt
index 14ba7a6e2da..efce3ba524a 100644
--- a/libcxx/utils/google-benchmark/test/CMakeLists.txt
+++ b/libcxx/utils/google-benchmark/test/CMakeLists.txt
@@ -1,6 +1,7 @@
 # Enable the tests
 
 find_package(Threads REQUIRED)
+include(CheckCXXCompilerFlag)
 
 # NOTE: Some tests use `<cassert>` to perform the test. Therefore we must
 # strip -DNDEBUG from the default CMake flags in DEBUG mode.
@@ -27,7 +28,7 @@ if (DEFINED BENCHMARK_CXX_LINKER_FLAGS)
   list(APPEND CMAKE_EXE_LINKER_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS})
 endif()
 
-add_library(output_test_helper STATIC output_test_helper.cc)
+add_library(output_test_helper STATIC output_test_helper.cc output_test.h)
 
 macro(compile_benchmark_test name)
   add_executable(${name} "${name}.cc")
@@ -41,7 +42,6 @@ macro(compile_output_test name)
           ${BENCHMARK_CXX_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
 endmacro(compile_output_test)
 
-
 # Demonstration executable
 compile_benchmark_test(benchmark_test)
 add_test(benchmark benchmark_test --benchmark_min_time=0.01)
@@ -75,6 +75,11 @@ compile_benchmark_test(skip_with_error_test)
 add_test(skip_with_error_test skip_with_error_test --benchmark_min_time=0.01)
 
 compile_benchmark_test(donotoptimize_test)
+# Some of the issues with DoNotOptimize only occur when optimization is enabled
+check_cxx_compiler_flag(-O3 BENCHMARK_HAS_O3_FLAG)
+if (BENCHMARK_HAS_O3_FLAG)
+  set_target_properties(donotoptimize_test PROPERTIES COMPILE_FLAGS "-O3")
+endif()
 add_test(donotoptimize_test donotoptimize_test --benchmark_min_time=0.01)
 
 compile_benchmark_test(fixture_test)
@@ -92,15 +97,31 @@ add_test(multiple_ranges_test multiple_ranges_test --benchmark_min_time=0.01)
 compile_output_test(reporter_output_test)
 add_test(reporter_output_test reporter_output_test --benchmark_min_time=0.01)
 
+compile_output_test(templated_fixture_test)
+add_test(templated_fixture_test templated_fixture_test --benchmark_min_time=0.01)
+
+compile_output_test(user_counters_test)
+add_test(user_counters_test user_counters_test --benchmark_min_time=0.01)
+
+compile_output_test(user_counters_tabular_test)
+add_test(user_counters_tabular_test user_counters_tabular_test --benchmark_counters_tabular=true --benchmark_min_time=0.01)
+
 check_cxx_compiler_flag(-std=c++03 BENCHMARK_HAS_CXX03_FLAG)
 if (BENCHMARK_HAS_CXX03_FLAG)
-  set(CXX03_FLAGS "${CMAKE_CXX_FLAGS}")
-  string(REPLACE "-std=c++11" "-std=c++03" CXX03_FLAGS "${CXX03_FLAGS}")
-  string(REPLACE "-std=c++0x" "-std=c++03" CXX03_FLAGS "${CXX03_FLAGS}")
-
   compile_benchmark_test(cxx03_test)
   set_target_properties(cxx03_test
-      PROPERTIES COMPILE_FLAGS "${CXX03_FLAGS}")
+      PROPERTIES
+      COMPILE_FLAGS "-std=c++03")
+  # libstdc++ provides different definitions within <map> between dialects. When
+  # LTO is enabled and -Werror is specified GCC diagnoses this ODR violation
+  # causing the test to fail to compile. To prevent this we explicitly disable
+  # the warning.
+  check_cxx_compiler_flag(-Wno-odr BENCHMARK_HAS_WNO_ODR)
+  if (BENCHMARK_ENABLE_LTO AND BENCHMARK_HAS_WNO_ODR)
+    set_target_properties(cxx03_test
+        PROPERTIES
+        LINK_FLAGS "-Wno-odr")
+  endif()
   add_test(cxx03 cxx03_test --benchmark_min_time=0.01)
 endif()
 
@@ -113,6 +134,29 @@ endif()
 compile_output_test(complexity_test)
 add_test(complexity_benchmark complexity_test --benchmark_min_time=${COMPLEXITY_MIN_TIME})
 
+###############################################################################
+# GoogleTest Unit Tests
+###############################################################################
+
+if (BENCHMARK_ENABLE_GTEST_TESTS)
+  macro(compile_gtest name)
+    add_executable(${name} "${name}.cc")
+    if (TARGET googletest)
+      add_dependencies(${name} googletest)
+    endif()
+    target_link_libraries(${name} benchmark
+        "${GTEST_BOTH_LIBRARIES}" ${CMAKE_THREAD_LIBS_INIT})
+  endmacro(compile_gtest)
+
+  macro(add_gtest name)
+    compile_gtest(${name})
+    add_test(${name} ${name})
+  endmacro()
+
+  add_gtest(statistics_test)
+endif(BENCHMARK_ENABLE_GTEST_TESTS)
+
+
 # Add the coverage command(s)
 if(CMAKE_BUILD_TYPE)
   string(TOLOWER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_LOWER)
diff --git a/libcxx/utils/google-benchmark/test/basic_test.cc b/libcxx/utils/google-benchmark/test/basic_test.cc
index 22de007cb6d..3348781ce72 100644
--- a/libcxx/utils/google-benchmark/test/basic_test.cc
+++ b/libcxx/utils/google-benchmark/test/basic_test.cc
@@ -1,10 +1,10 @@
 
-#include "benchmark/benchmark_api.h"
+#include "benchmark/benchmark.h"
 
 #define BASIC_BENCHMARK_TEST(x) BENCHMARK(x)->Arg(8)->Arg(512)->Arg(8192)
 
 void BM_empty(benchmark::State& state) {
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
     benchmark::DoNotOptimize(state.iterations());
   }
 }
@@ -12,7 +12,7 @@ BENCHMARK(BM_empty);
 BENCHMARK(BM_empty)->ThreadPerCpu();
 
 void BM_spin_empty(benchmark::State& state) {
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
     for (int x = 0; x < state.range(0); ++x) {
       benchmark::DoNotOptimize(x);
     }
@@ -25,7 +25,7 @@ void BM_spin_pause_before(benchmark::State& state) {
   for (int i = 0; i < state.range(0); ++i) {
     benchmark::DoNotOptimize(i);
   }
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
     for (int i = 0; i < state.range(0); ++i) {
       benchmark::DoNotOptimize(i);
     }
@@ -35,7 +35,7 @@ BASIC_BENCHMARK_TEST(BM_spin_pause_before);
 BASIC_BENCHMARK_TEST(BM_spin_pause_before)->ThreadPerCpu();
 
 void BM_spin_pause_during(benchmark::State& state) {
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
     state.PauseTiming();
     for (int i = 0; i < state.range(0); ++i) {
       benchmark::DoNotOptimize(i);
@@ -50,7 +50,7 @@ BASIC_BENCHMARK_TEST(BM_spin_pause_during);
 BASIC_BENCHMARK_TEST(BM_spin_pause_during)->ThreadPerCpu();
 
 void BM_pause_during(benchmark::State& state) {
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
     state.PauseTiming();
     state.ResumeTiming();
   }
@@ -61,7 +61,7 @@ BENCHMARK(BM_pause_during)->UseRealTime();
 BENCHMARK(BM_pause_during)->UseRealTime()->ThreadPerCpu();
 
 void BM_spin_pause_after(benchmark::State& state) {
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
     for (int i = 0; i < state.range(0); ++i) {
       benchmark::DoNotOptimize(i);
     }
@@ -77,7 +77,7 @@ void BM_spin_pause_before_and_after(benchmark::State& state) {
   for (int i = 0; i < state.range(0); ++i) {
     benchmark::DoNotOptimize(i);
   }
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
     for (int i = 0; i < state.range(0); ++i) {
       benchmark::DoNotOptimize(i);
     }
@@ -90,10 +90,29 @@ BASIC_BENCHMARK_TEST(BM_spin_pause_before_and_after);
 BASIC_BENCHMARK_TEST(BM_spin_pause_before_and_after)->ThreadPerCpu();
 
 void BM_empty_stop_start(benchmark::State& state) {
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
   }
 }
 BENCHMARK(BM_empty_stop_start);
 BENCHMARK(BM_empty_stop_start)->ThreadPerCpu();
 
-BENCHMARK_MAIN()
+
+void BM_KeepRunning(benchmark::State& state) {
+  size_t iter_count = 0;
+  while (state.KeepRunning()) {
+    ++iter_count;
+  }
+  assert(iter_count == state.max_iterations);
+}
+BENCHMARK(BM_KeepRunning);
+
+void BM_RangedFor(benchmark::State& state) {
+  size_t iter_count = 0;
+  for (auto _ : state) {
+    ++iter_count;
+  }
+  assert(iter_count == state.max_iterations);
+}
+BENCHMARK(BM_RangedFor);
+
+BENCHMARK_MAIN();
diff --git a/libcxx/utils/google-benchmark/test/benchmark_test.cc b/libcxx/utils/google-benchmark/test/benchmark_test.cc
index 57731331e6d..78802c8da60 100644
--- a/libcxx/utils/google-benchmark/test/benchmark_test.cc
+++ b/libcxx/utils/google-benchmark/test/benchmark_test.cc
@@ -42,7 +42,7 @@ double CalculatePi(int depth) {
 
 std::set<int> ConstructRandomSet(int size) {
   std::set<int> s;
-  for (int i = 0; i < size; ++i) s.insert(i);
+  for (int i = 0; i < size; ++i) s.insert(s.end(), i);
   return s;
 }
 
@@ -53,7 +53,7 @@ std::vector<int>* test_vector = nullptr;
 
 static void BM_Factorial(benchmark::State& state) {
   int fac_42 = 0;
-  while (state.KeepRunning()) fac_42 = Factorial(8);
+  for (auto _ : state) fac_42 = Factorial(8);
   // Prevent compiler optimizations
   std::stringstream ss;
   ss << fac_42;
@@ -64,7 +64,7 @@ BENCHMARK(BM_Factorial)->UseRealTime();
 
 static void BM_CalculatePiRange(benchmark::State& state) {
   double pi = 0.0;
-  while (state.KeepRunning()) pi = CalculatePi(state.range(0));
+  for (auto _ : state) pi = CalculatePi(state.range(0));
   std::stringstream ss;
   ss << pi;
   state.SetLabel(ss.str());
@@ -73,7 +73,7 @@ BENCHMARK_RANGE(BM_CalculatePiRange, 1, 1024 * 1024);
 
 static void BM_CalculatePi(benchmark::State& state) {
   static const int depth = 1024;
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
     benchmark::DoNotOptimize(CalculatePi(depth));
   }
 }
@@ -82,22 +82,26 @@ BENCHMARK(BM_CalculatePi)->ThreadRange(1, 32);
 BENCHMARK(BM_CalculatePi)->ThreadPerCpu();
 
 static void BM_SetInsert(benchmark::State& state) {
-  while (state.KeepRunning()) {
+  std::set<int> data;
+  for (auto _ : state) {
     state.PauseTiming();
-    std::set<int> data = ConstructRandomSet(state.range(0));
+    data = ConstructRandomSet(state.range(0));
     state.ResumeTiming();
     for (int j = 0; j < state.range(1); ++j) data.insert(rand());
   }
   state.SetItemsProcessed(state.iterations() * state.range(1));
   state.SetBytesProcessed(state.iterations() * state.range(1) * sizeof(int));
 }
-BENCHMARK(BM_SetInsert)->Ranges({{1 << 10, 8 << 10}, {1, 10}});
+
+// Test many inserts at once to reduce the total iterations needed. Otherwise, the slower,
+// non-timed part of each iteration will make the benchmark take forever.
+BENCHMARK(BM_SetInsert)->Ranges({{1 << 10, 8 << 10}, {128, 512}});
 
 template <typename Container,
           typename ValueType = typename Container::value_type>
 static void BM_Sequential(benchmark::State& state) {
   ValueType v = 42;
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
     Container c;
     for (int i = state.range(0); --i;) c.push_back(v);
   }
@@ -109,14 +113,14 @@ BENCHMARK_TEMPLATE2(BM_Sequential, std::vector<int>, int)
     ->Range(1 << 0, 1 << 10);
 BENCHMARK_TEMPLATE(BM_Sequential, std::list<int>)->Range(1 << 0, 1 << 10);
 // Test the variadic version of BENCHMARK_TEMPLATE in C++11 and beyond.
-#if __cplusplus >= 201103L
+#ifdef BENCHMARK_HAS_CXX11
 BENCHMARK_TEMPLATE(BM_Sequential, std::vector<int>, int)->Arg(512);
 #endif
 
 static void BM_StringCompare(benchmark::State& state) {
   std::string s1(state.range(0), '-');
   std::string s2(state.range(0), '-');
-  while (state.KeepRunning()) benchmark::DoNotOptimize(s1.compare(s2));
+  for (auto _ : state) benchmark::DoNotOptimize(s1.compare(s2));
 }
 BENCHMARK(BM_StringCompare)->Range(1, 1 << 20);
 
@@ -126,7 +130,7 @@ static void BM_SetupTeardown(benchmark::State& state) {
     test_vector = new std::vector<int>();
   }
   int i = 0;
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
     std::lock_guard<std::mutex> l(test_vector_mu);
     if (i % 2 == 0)
       test_vector->push_back(i);
@@ -142,7 +146,7 @@ BENCHMARK(BM_SetupTeardown)->ThreadPerCpu();
 
 static void BM_LongTest(benchmark::State& state) {
   double tracker = 0.0;
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
     for (int i = 0; i < state.range(0); ++i)
       benchmark::DoNotOptimize(tracker += i);
   }
@@ -159,7 +163,7 @@ static void BM_ParallelMemset(benchmark::State& state) {
     test_vector = new std::vector<int>(size);
   }
 
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
     for (int i = from; i < to; i++) {
       // No need to lock test_vector_mu as ranges
       // do not overlap between threads.
@@ -179,7 +183,7 @@ static void BM_ManualTiming(benchmark::State& state) {
   std::chrono::duration<double, std::micro> sleep_duration{
       static_cast<double>(microseconds)};
 
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
     auto start = std::chrono::high_resolution_clock::now();
     // Simulate some useful workload with a sleep
     std::this_thread::sleep_for(
@@ -197,11 +201,11 @@ static void BM_ManualTiming(benchmark::State& state) {
 BENCHMARK(BM_ManualTiming)->Range(1, 1 << 14)->UseRealTime();
 BENCHMARK(BM_ManualTiming)->Range(1, 1 << 14)->UseManualTime();
 
-#if __cplusplus >= 201103L
+#ifdef BENCHMARK_HAS_CXX11
 
 template <class... Args>
 void BM_with_args(benchmark::State& state, Args&&...) {
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
   }
 }
 BENCHMARK_CAPTURE(BM_with_args, int_test, 42, 43, 44);
@@ -213,24 +217,7 @@ void BM_non_template_args(benchmark::State& state, int, double) {
 }
 BENCHMARK_CAPTURE(BM_non_template_args, basic_test, 0, 0);
 
-static void BM_UserCounter(benchmark::State& state) {
-  static const int depth = 1024;
-  while (state.KeepRunning()) {
-    benchmark::DoNotOptimize(CalculatePi(depth));
-  }
-  state.counters["Foo"] = 1;
-  state.counters["Bar"] = 2;
-  state.counters["Baz"] = 3;
-  state.counters["Bat"] = 5;
-#ifdef BENCHMARK_HAS_CXX11
-  state.counters.insert({{"Foo", 2}, {"Bar", 3}, {"Baz", 5}, {"Bat", 6}});
-#endif
-}
-BENCHMARK(BM_UserCounter)->Threads(8);
-BENCHMARK(BM_UserCounter)->ThreadRange(1, 32);
-BENCHMARK(BM_UserCounter)->ThreadPerCpu();
-
-#endif  // __cplusplus >= 201103L
+#endif  // BENCHMARK_HAS_CXX11
 
 static void BM_DenseThreadRanges(benchmark::State& st) {
   switch (st.range(0)) {
@@ -254,4 +241,4 @@ BENCHMARK(BM_DenseThreadRanges)->Arg(1)->DenseThreadRange(1, 3);
 BENCHMARK(BM_DenseThreadRanges)->Arg(2)->DenseThreadRange(1, 4, 2);
 BENCHMARK(BM_DenseThreadRanges)->Arg(3)->DenseThreadRange(5, 14, 3);
 
-BENCHMARK_MAIN()
+BENCHMARK_MAIN();
diff --git a/libcxx/utils/google-benchmark/test/complexity_test.cc b/libcxx/utils/google-benchmark/test/complexity_test.cc
index 14e03b06eb1..89dfa580e6b 100644
--- a/libcxx/utils/google-benchmark/test/complexity_test.cc
+++ b/libcxx/utils/google-benchmark/test/complexity_test.cc
@@ -25,8 +25,8 @@ int AddComplexityTest(std::string big_o_test_name, std::string rms_test_name,
        {"^%bigo_name", MR_Not},  // Assert we we didn't only matched a name.
        {"^%rms_name %rms %rms[ ]*$", MR_Next}});
   AddCases(TC_JSONOut, {{"\"name\": \"%bigo_name\",$"},
-                        {"\"cpu_coefficient\": [0-9]+,$", MR_Next},
-                        {"\"real_coefficient\": [0-9]{1,5},$", MR_Next},
+                        {"\"cpu_coefficient\": %float,$", MR_Next},
+                        {"\"real_coefficient\": %float,$", MR_Next},
                         {"\"big_o\": \"%bigo\",$", MR_Next},
                         {"\"time_unit\": \"ns\"$", MR_Next},
                         {"}", MR_Next},
@@ -46,7 +46,7 @@ int AddComplexityTest(std::string big_o_test_name, std::string rms_test_name,
 // ========================================================================= //
 
 void BM_Complexity_O1(benchmark::State& state) {
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
     for (int i = 0; i < 1024; ++i) {
       benchmark::DoNotOptimize(&i);
     }
@@ -94,7 +94,7 @@ void BM_Complexity_O_N(benchmark::State& state) {
   auto v = ConstructRandomVector(state.range(0));
   const int item_not_in_vector =
       state.range(0) * 2;  // Test worst case scenario (item not in vector)
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
     benchmark::DoNotOptimize(std::find(v.begin(), v.end(), item_not_in_vector));
   }
   state.SetComplexityN(state.range(0));
@@ -129,7 +129,7 @@ ADD_COMPLEXITY_CASES(big_o_n_test_name, rms_o_n_test_name, lambda_big_o_n);
 
 static void BM_Complexity_O_N_log_N(benchmark::State& state) {
   auto v = ConstructRandomVector(state.range(0));
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
     std::sort(v.begin(), v.end());
   }
   state.SetComplexityN(state.range(0));
@@ -141,7 +141,7 @@ BENCHMARK(BM_Complexity_O_N_log_N)
 BENCHMARK(BM_Complexity_O_N_log_N)
     ->RangeMultiplier(2)
     ->Range(1 << 10, 1 << 16)
-    ->Complexity([](int n) { return n * std::log2(n); });
+    ->Complexity([](int n) { return n * log2(n); });
 BENCHMARK(BM_Complexity_O_N_log_N)
     ->RangeMultiplier(2)
     ->Range(1 << 10, 1 << 16)
diff --git a/libcxx/utils/google-benchmark/test/cxx03_test.cc b/libcxx/utils/google-benchmark/test/cxx03_test.cc
index a79d964e17b..baa9ed9262b 100644
--- a/libcxx/utils/google-benchmark/test/cxx03_test.cc
+++ b/libcxx/utils/google-benchmark/test/cxx03_test.cc
@@ -8,6 +8,10 @@
 #error C++11 or greater detected. Should be C++03.
 #endif
 
+#ifdef BENCHMARK_HAS_CXX11
+#error C++11 or greater detected by the library. BENCHMARK_HAS_CXX11 is defined.
+#endif
+
 void BM_empty(benchmark::State& state) {
   while (state.KeepRunning()) {
     volatile std::size_t x = state.iterations();
@@ -39,10 +43,21 @@ void BM_template1(benchmark::State& state) {
 BENCHMARK_TEMPLATE(BM_template1, long);
 BENCHMARK_TEMPLATE1(BM_template1, int);
 
+template <class T>
+struct BM_Fixture : public ::benchmark::Fixture {
+};
+
+BENCHMARK_TEMPLATE_F(BM_Fixture, BM_template1, long)(benchmark::State& state) {
+  BM_empty(state);
+}
+BENCHMARK_TEMPLATE1_F(BM_Fixture, BM_template2, int)(benchmark::State& state) {
+  BM_empty(state);
+}
+
 void BM_counters(benchmark::State& state) {
     BM_empty(state);
     state.counters["Foo"] = 2;
 }
 BENCHMARK(BM_counters);
 
-BENCHMARK_MAIN()
+BENCHMARK_MAIN();
diff --git a/libcxx/utils/google-benchmark/test/diagnostics_test.cc b/libcxx/utils/google-benchmark/test/diagnostics_test.cc
index 1046730b0fc..dd64a336553 100644
--- a/libcxx/utils/google-benchmark/test/diagnostics_test.cc
+++ b/libcxx/utils/google-benchmark/test/diagnostics_test.cc
@@ -11,7 +11,7 @@
 #include <stdexcept>
 
 #include "../src/check.h"
-#include "benchmark/benchmark_api.h"
+#include "benchmark/benchmark.h"
 
 #if defined(__GNUC__) && !defined(__EXCEPTIONS)
 #define TEST_HAS_NO_EXCEPTIONS
@@ -47,7 +47,7 @@ void BM_diagnostic_test(benchmark::State& state) {
 
   if (called_once == false) try_invalid_pause_resume(state);
 
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
     benchmark::DoNotOptimize(state.iterations());
   }
 
@@ -57,6 +57,22 @@ void BM_diagnostic_test(benchmark::State& state) {
 }
 BENCHMARK(BM_diagnostic_test);
 
+
+void BM_diagnostic_test_keep_running(benchmark::State& state) {
+  static bool called_once = false;
+
+  if (called_once == false) try_invalid_pause_resume(state);
+
+  while(state.KeepRunning()) {
+    benchmark::DoNotOptimize(state.iterations());
+  }
+
+  if (called_once == false) try_invalid_pause_resume(state);
+
+  called_once = true;
+}
+BENCHMARK(BM_diagnostic_test_keep_running);
+
 int main(int argc, char* argv[]) {
   benchmark::internal::GetAbortHandler() = &TestHandler;
   benchmark::Initialize(&argc, argv);
diff --git a/libcxx/utils/google-benchmark/test/donotoptimize_test.cc b/libcxx/utils/google-benchmark/test/donotoptimize_test.cc
index b21187aadc2..a705654a269 100644
--- a/libcxx/utils/google-benchmark/test/donotoptimize_test.cc
+++ b/libcxx/utils/google-benchmark/test/donotoptimize_test.cc
@@ -9,6 +9,22 @@ std::uint64_t double_up(const std::uint64_t x) __attribute__((const));
 std::uint64_t double_up(const std::uint64_t x) { return x * 2; }
 }
 
+// Using DoNotOptimize on types like BitRef seem to cause a lot of problems
+// with the inline assembly on both GCC and Clang.
+struct BitRef {
+  int index;
+  unsigned char &byte;
+
+public:
+  static BitRef Make() {
+    static unsigned char arr[2] = {};
+    BitRef b(1, arr[0]);
+    return b;
+  }
+private:
+  BitRef(int i, unsigned char& b) : index(i), byte(b) {}
+};
+
 int main(int, char*[]) {
   // this test verifies compilation of DoNotOptimize() for some types
 
@@ -29,5 +45,8 @@ int main(int, char*[]) {
 
   benchmark::DoNotOptimize(double_up(x));
 
-  return 0;
+  // These tests are to e
+  benchmark::DoNotOptimize(BitRef::Make());
+  BitRef lval = BitRef::Make();
+  benchmark::DoNotOptimize(lval);
 }
diff --git a/libcxx/utils/google-benchmark/test/filter_test.cc b/libcxx/utils/google-benchmark/test/filter_test.cc
index 3a205295f09..0e27065c155 100644
--- a/libcxx/utils/google-benchmark/test/filter_test.cc
+++ b/libcxx/utils/google-benchmark/test/filter_test.cc
@@ -36,31 +36,31 @@ class TestReporter : public benchmark::ConsoleReporter {
 }  // end namespace
 
 static void NoPrefix(benchmark::State& state) {
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
   }
 }
 BENCHMARK(NoPrefix);
 
 static void BM_Foo(benchmark::State& state) {
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
   }
 }
 BENCHMARK(BM_Foo);
 
 static void BM_Bar(benchmark::State& state) {
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
   }
 }
 BENCHMARK(BM_Bar);
 
 static void BM_FooBar(benchmark::State& state) {
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
   }
 }
 BENCHMARK(BM_FooBar);
 
 static void BM_FooBa(benchmark::State& state) {
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
   }
 }
 BENCHMARK(BM_FooBa);
diff --git a/libcxx/utils/google-benchmark/test/fixture_test.cc b/libcxx/utils/google-benchmark/test/fixture_test.cc
index bbc2f957902..1462b10f02f 100644
--- a/libcxx/utils/google-benchmark/test/fixture_test.cc
+++ b/libcxx/utils/google-benchmark/test/fixture_test.cc
@@ -28,7 +28,7 @@ class MyFixture : public ::benchmark::Fixture {
 BENCHMARK_F(MyFixture, Foo)(benchmark::State &st) {
   assert(data.get() != nullptr);
   assert(*data == 42);
-  while (st.KeepRunning()) {
+  for (auto _ : st) {
   }
 }
 
@@ -37,7 +37,7 @@ BENCHMARK_DEFINE_F(MyFixture, Bar)(benchmark::State& st) {
     assert(data.get() != nullptr);
     assert(*data == 42);
   }
-  while (st.KeepRunning()) {
+  for (auto _ : st) {
     assert(data.get() != nullptr);
     assert(*data == 42);
   }
@@ -46,4 +46,4 @@ BENCHMARK_DEFINE_F(MyFixture, Bar)(benchmark::State& st) {
 BENCHMARK_REGISTER_F(MyFixture, Bar)->Arg(42);
 BENCHMARK_REGISTER_F(MyFixture, Bar)->Arg(42)->ThreadPerCpu();
 
-BENCHMARK_MAIN()
+BENCHMARK_MAIN();
diff --git a/libcxx/utils/google-benchmark/test/map_test.cc b/libcxx/utils/google-benchmark/test/map_test.cc
index 83457c9981c..311d2d22b80 100644
--- a/libcxx/utils/google-benchmark/test/map_test.cc
+++ b/libcxx/utils/google-benchmark/test/map_test.cc
@@ -18,9 +18,10 @@ std::map<int, int> ConstructRandomMap(int size) {
 // Basic version.
 static void BM_MapLookup(benchmark::State& state) {
   const int size = state.range(0);
-  while (state.KeepRunning()) {
+  std::map<int, int> m;
+  for (auto _ : state) {
     state.PauseTiming();
-    std::map<int, int> m = ConstructRandomMap(size);
+    m = ConstructRandomMap(size);
     state.ResumeTiming();
     for (int i = 0; i < size; ++i) {
       benchmark::DoNotOptimize(m.find(rand() % size));
@@ -44,7 +45,7 @@ class MapFixture : public ::benchmark::Fixture {
 
 BENCHMARK_DEFINE_F(MapFixture, Lookup)(benchmark::State& state) {
   const int size = state.range(0);
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
     for (int i = 0; i < size; ++i) {
       benchmark::DoNotOptimize(m.find(rand() % size));
     }
@@ -53,4 +54,4 @@ BENCHMARK_DEFINE_F(MapFixture, Lookup)(benchmark::State& state) {
 }
 BENCHMARK_REGISTER_F(MapFixture, Lookup)->Range(1 << 3, 1 << 12);
 
-BENCHMARK_MAIN()
+BENCHMARK_MAIN();
diff --git a/libcxx/utils/google-benchmark/test/multiple_ranges_test.cc b/libcxx/utils/google-benchmark/test/multiple_ranges_test.cc
index 8e67b3b2a99..0a82382f3ca 100644
--- a/libcxx/utils/google-benchmark/test/multiple_ranges_test.cc
+++ b/libcxx/utils/google-benchmark/test/multiple_ranges_test.cc
@@ -43,7 +43,7 @@ class MultipleRangesFixture : public ::benchmark::Fixture {
 };
 
 BENCHMARK_DEFINE_F(MultipleRangesFixture, Empty)(benchmark::State& state) {
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
     int product = state.range(0) * state.range(1) * state.range(2);
     for (int x = 0; x < product; x++) {
       benchmark::DoNotOptimize(x);
@@ -60,15 +60,15 @@ void BM_CheckDefaultArgument(benchmark::State& state) {
   // Test that the 'range()' without an argument is the same as 'range(0)'.
   assert(state.range() == state.range(0));
   assert(state.range() != state.range(1));
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
   }
 }
 BENCHMARK(BM_CheckDefaultArgument)->Ranges({{1, 5}, {6, 10}});
 
 static void BM_MultipleRanges(benchmark::State& st) {
-  while (st.KeepRunning()) {
+  for (auto _ : st) {
   }
 }
 BENCHMARK(BM_MultipleRanges)->Ranges({{5, 5}, {6, 6}});
 
-BENCHMARK_MAIN()
+BENCHMARK_MAIN();
diff --git a/libcxx/utils/google-benchmark/test/options_test.cc b/libcxx/utils/google-benchmark/test/options_test.cc
index bbbed288398..fdec69174ee 100644
--- a/libcxx/utils/google-benchmark/test/options_test.cc
+++ b/libcxx/utils/google-benchmark/test/options_test.cc
@@ -1,4 +1,4 @@
-#include "benchmark/benchmark_api.h"
+#include "benchmark/benchmark.h"
 #include <chrono>
 #include <thread>
 
@@ -8,13 +8,13 @@
 #include <cassert>
 
 void BM_basic(benchmark::State& state) {
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
   }
 }
 
 void BM_basic_slow(benchmark::State& state) {
   std::chrono::milliseconds sleep_duration(state.range(0));
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
     std::this_thread::sleep_for(
         std::chrono::duration_cast<std::chrono::nanoseconds>(sleep_duration));
   }
@@ -44,7 +44,7 @@ void CustomArgs(benchmark::internal::Benchmark* b) {
 
 BENCHMARK(BM_basic)->Apply(CustomArgs);
 
-void BM_explicit_iteration_count(benchmark::State& st) {
+void BM_explicit_iteration_count(benchmark::State& state) {
   // Test that benchmarks specified with an explicit iteration count are
   // only run once.
   static bool invoked_before = false;
@@ -52,14 +52,14 @@ void BM_explicit_iteration_count(benchmark::State& st) {
   invoked_before = true;
 
   // Test that the requested iteration count is respected.
-  assert(st.max_iterations == 42);
+  assert(state.max_iterations == 42);
   size_t actual_iterations = 0;
-  while (st.KeepRunning())
+  for (auto _ : state)
     ++actual_iterations;
-  assert(st.iterations() == st.max_iterations);
-  assert(st.iterations() == 42);
+  assert(state.iterations() == state.max_iterations);
+  assert(state.iterations() == 42);
 
 }
 BENCHMARK(BM_explicit_iteration_count)->Iterations(42);
 
-BENCHMARK_MAIN()
+BENCHMARK_MAIN();
diff --git a/libcxx/utils/google-benchmark/test/output_test.h b/libcxx/utils/google-benchmark/test/output_test.h
index 57d4397ad5d..897a13866ba 100644
--- a/libcxx/utils/google-benchmark/test/output_test.h
+++ b/libcxx/utils/google-benchmark/test/output_test.h
@@ -7,6 +7,8 @@
 #include <string>
 #include <utility>
 #include <vector>
+#include <functional>
+#include <sstream>
 
 #include "../src/re.h"
 #include "benchmark/benchmark.h"
@@ -59,6 +61,134 @@ int SetSubstitutions(
 void RunOutputTests(int argc, char* argv[]);
 
 // ========================================================================= //
+// ------------------------- Results checking ------------------------------ //
+// ========================================================================= //
+
+// Call this macro to register a benchmark for checking its results. This
+// should be all that's needed. It subscribes a function to check the (CSV)
+// results of a benchmark. This is done only after verifying that the output
+// strings are really as expected.
+// bm_name_pattern: a name or a regex pattern which will be matched against
+//                  all the benchmark names. Matching benchmarks
+//                  will be the subject of a call to checker_function
+// checker_function: should be of type ResultsCheckFn (see below)
+#define CHECK_BENCHMARK_RESULTS(bm_name_pattern, checker_function) \
+    size_t CONCAT(dummy, __LINE__) = AddChecker(bm_name_pattern, checker_function)
+
+struct Results;
+typedef std::function< void(Results const&) > ResultsCheckFn;
+
+size_t AddChecker(const char* bm_name_pattern, ResultsCheckFn fn);
+
+// Class holding the results of a benchmark.
+// It is passed in calls to checker functions.
+struct Results {
+
+  // the benchmark name
+  std::string name;
+  // the benchmark fields
+  std::map< std::string, std::string > values;
+
+  Results(const std::string& n) : name(n) {}
+
+  int NumThreads() const;
+
+  typedef enum { kCpuTime, kRealTime } BenchmarkTime;
+
+  // get cpu_time or real_time in seconds
+  double GetTime(BenchmarkTime which) const;
+
+  // get the real_time duration of the benchmark in seconds.
+  // it is better to use fuzzy float checks for this, as the float
+  // ASCII formatting is lossy.
+  double DurationRealTime() const {
+    return GetAs< double >("iterations") * GetTime(kRealTime);
+  }
+  // get the cpu_time duration of the benchmark in seconds
+  double DurationCPUTime() const {
+    return GetAs< double >("iterations") * GetTime(kCpuTime);
+  }
+
+  // get the string for a result by name, or nullptr if the name
+  // is not found
+  const std::string* Get(const char* entry_name) const {
+    auto it = values.find(entry_name);
+    if(it == values.end()) return nullptr;
+    return &it->second;
+  }
+
+  // get a result by name, parsed as a specific type.
+  // NOTE: for counters, use GetCounterAs instead.
+  template <class T>
+  T GetAs(const char* entry_name) const;
+
+  // counters are written as doubles, so they have to be read first
+  // as a double, and only then converted to the asked type.
+  template <class T>
+  T GetCounterAs(const char* entry_name) const {
+    double dval = GetAs< double >(entry_name);
+    T tval = static_cast< T >(dval);
+    return tval;
+  }
+};
+
+template <class T>
+T Results::GetAs(const char* entry_name) const {
+  auto *sv = Get(entry_name);
+  CHECK(sv != nullptr && !sv->empty());
+  std::stringstream ss;
+  ss << *sv;
+  T out;
+  ss >> out;
+  CHECK(!ss.fail());
+  return out;
+}
+
+//----------------------------------
+// Macros to help in result checking. Do not use them with arguments causing
+// side-effects.
+
+#define _CHECK_RESULT_VALUE(entry, getfn, var_type, var_name, relationship, value) \
+    CONCAT(CHECK_, relationship)                                        \
+    (entry.getfn< var_type >(var_name), (value)) << "\n"                \
+    << __FILE__ << ":" << __LINE__ << ": " << (entry).name << ":\n"     \
+    << __FILE__ << ":" << __LINE__ << ": "                              \
+    << "expected (" << #var_type << ")" << (var_name)                   \
+    << "=" << (entry).getfn< var_type >(var_name)                       \
+    << " to be " #relationship " to " << (value) << "\n"
+
+// check with tolerance. eps_factor is the tolerance window, which is
+// interpreted relative to value (eg, 0.1 means 10% of value).
+#define _CHECK_FLOAT_RESULT_VALUE(entry, getfn, var_type, var_name, relationship, value, eps_factor) \
+    CONCAT(CHECK_FLOAT_, relationship)                                  \
+    (entry.getfn< var_type >(var_name), (value), (eps_factor) * (value)) << "\n" \
+    << __FILE__ << ":" << __LINE__ << ": " << (entry).name << ":\n"     \
+    << __FILE__ << ":" << __LINE__ << ": "                              \
+    << "expected (" << #var_type << ")" << (var_name)                   \
+    << "=" << (entry).getfn< var_type >(var_name)                       \
+    << " to be " #relationship " to " << (value) << "\n"                \
+    << __FILE__ << ":" << __LINE__ << ": "                              \
+    << "with tolerance of " << (eps_factor) * (value)                   \
+    << " (" << (eps_factor)*100. << "%), "                              \
+    << "but delta was " << ((entry).getfn< var_type >(var_name) - (value)) \
+    << " (" << (((entry).getfn< var_type >(var_name) - (value))         \
+               /                                                        \
+               ((value) > 1.e-5 || value < -1.e-5 ? value : 1.e-5)*100.) \
+    << "%)"
+
+#define CHECK_RESULT_VALUE(entry, var_type, var_name, relationship, value) \
+    _CHECK_RESULT_VALUE(entry, GetAs, var_type, var_name, relationship, value)
+
+#define CHECK_COUNTER_VALUE(entry, var_type, var_name, relationship, value) \
+    _CHECK_RESULT_VALUE(entry, GetCounterAs, var_type, var_name, relationship, value)
+
+#define CHECK_FLOAT_RESULT_VALUE(entry, var_name, relationship, value, eps_factor) \
+    _CHECK_FLOAT_RESULT_VALUE(entry, GetAs, double, var_name, relationship, value, eps_factor)
+
+#define CHECK_FLOAT_COUNTER_VALUE(entry, var_name, relationship, value, eps_factor) \
+    _CHECK_FLOAT_RESULT_VALUE(entry, GetCounterAs, double, var_name, relationship, value, eps_factor)
+
+// ========================================================================= //
 // --------------------------- Misc Utilities ------------------------------ //
 // ========================================================================= //
 
diff --git a/libcxx/utils/google-benchmark/test/output_test_helper.cc b/libcxx/utils/google-benchmark/test/output_test_helper.cc
index 54c028a67ba..24746f6d27f 100644
--- a/libcxx/utils/google-benchmark/test/output_test_helper.cc
+++ b/libcxx/utils/google-benchmark/test/output_test_helper.cc
@@ -2,10 +2,12 @@
 #include <map>
 #include <memory>
 #include <sstream>
+#include <cstring>
 
 #include "../src/check.h"  // NOTE: check.h is for internal use only!
 #include "../src/re.h"     // NOTE: re.h is for internal use only
 #include "output_test.h"
+#include "../src/benchmark_api_internal.h"
 
 // ========================================================================= //
 // ------------------------------ Internals -------------------------------- //
@@ -34,17 +36,25 @@ SubMap& GetSubstitutions() {
   static std::string safe_dec_re = "[0-9]*[.]?[0-9]+([eE][-+][0-9]+)?";
   static SubMap map = {
       {"%float", "[0-9]*[.]?[0-9]+([eE][-+][0-9]+)?"},
+      // human-readable float
+      {"%hrfloat", "[0-9]*[.]?[0-9]+([eE][-+][0-9]+)?[kMGTPEZYmunpfazy]?"},
       {"%int", "[ ]*[0-9]+"},
       {" %s ", "[ ]+"},
       {"%time", "[ ]*[0-9]{1,5} ns"},
       {"%console_report", "[ ]*[0-9]{1,5} ns [ ]*[0-9]{1,5} ns [ ]*[0-9]+"},
       {"%console_us_report", "[ ]*[0-9] us [ ]*[0-9] us [ ]*[0-9]+"},
+      {"%csv_header",
+       "name,iterations,real_time,cpu_time,time_unit,bytes_per_second,"
+       "items_per_second,label,error_occurred,error_message"},
       {"%csv_report", "[0-9]+," + safe_dec_re + "," + safe_dec_re + ",ns,,,,,"},
       {"%csv_us_report", "[0-9]+," + safe_dec_re + "," + safe_dec_re + ",us,,,,,"},
       {"%csv_bytes_report",
        "[0-9]+," + safe_dec_re + "," + safe_dec_re + ",ns," + safe_dec_re + ",,,,"},
       {"%csv_items_report",
        "[0-9]+," + safe_dec_re + "," + safe_dec_re + ",ns,," + safe_dec_re + ",,,"},
+      {"%csv_bytes_items_report",
+       "[0-9]+," + safe_dec_re + "," + safe_dec_re + ",ns," + safe_dec_re +
+       "," + safe_dec_re + ",,,"},
       {"%csv_label_report_begin", "[0-9]+," + safe_dec_re + "," + safe_dec_re + ",ns,,,"},
       {"%csv_label_report_end", ",,"}};
   return map;
@@ -140,8 +150,179 @@ class TestReporter : public benchmark::BenchmarkReporter {
   std::vector<benchmark::BenchmarkReporter *> reporters_;
 };
 }
+
+}  // end namespace internal
+
+// ========================================================================= //
+// -------------------------- Results checking ----------------------------- //
+// ========================================================================= //
+
+namespace internal {
+
+// Utility class to manage subscribers for checking benchmark results.
+// It works by parsing the CSV output to read the results.
+class ResultsChecker {
+ public:
+
+  struct PatternAndFn : public TestCase { // reusing TestCase for its regexes
+    PatternAndFn(const std::string& rx, ResultsCheckFn fn_)
+    : TestCase(rx), fn(fn_) {}
+    ResultsCheckFn fn;
+  };
+
+  std::vector< PatternAndFn > check_patterns;
+  std::vector< Results > results;
+  std::vector< std::string > field_names;
+
+  void Add(const std::string& entry_pattern, ResultsCheckFn fn);
+
+  void CheckResults(std::stringstream& output);
+
+ private:
+
+  void SetHeader_(const std::string& csv_header);
+  void SetValues_(const std::string& entry_csv_line);
+
+  std::vector< std::string > SplitCsv_(const std::string& line);
+
+};
+
+// store the static ResultsChecker in a function to prevent initialization
+// order problems
+ResultsChecker& GetResultsChecker() {
+  static ResultsChecker rc;
+  return rc;
+}
+
+// add a results checker for a benchmark
+void ResultsChecker::Add(const std::string& entry_pattern, ResultsCheckFn fn) {
+  check_patterns.emplace_back(entry_pattern, fn);
+}
+
+// check the results of all subscribed benchmarks
+void ResultsChecker::CheckResults(std::stringstream& output) {
+  // first reset the stream to the start
+  {
+    auto start = std::ios::streampos(0);
+    // clear before calling tellg()
+    output.clear();
+    // seek to zero only when needed
+    if(output.tellg() > start) output.seekg(start);
+    // and just in case
+    output.clear();
+  }
+  // now go over every line and publish it to the ResultsChecker
+  std::string line;
+  bool on_first = true;
+  while (output.eof() == false) {
+    CHECK(output.good());
+    std::getline(output, line);
+    if (on_first) {
+      SetHeader_(line); // this is important
+      on_first = false;
+      continue;
+    }
+    SetValues_(line);
+  }
+  // finally we can call the subscribed check functions
+  for(const auto& p : check_patterns) {
+    VLOG(2) << "--------------------------------\n";
+    VLOG(2) << "checking for benchmarks matching " << p.regex_str << "...\n";
+    for(const auto& r : results) {
+      if(!p.regex->Match(r.name)) {
+        VLOG(2) << p.regex_str << " is not matched by " << r.name << "\n";
+        continue;
+      } else {
+        VLOG(2) << p.regex_str << " is matched by " << r.name << "\n";
+      }
+      VLOG(1) << "Checking results of " << r.name << ": ... \n";
+      p.fn(r);
+      VLOG(1) << "Checking results of " << r.name << ": OK.\n";
+    }
+  }
+}
+
+// prepare for the names in this header
+void ResultsChecker::SetHeader_(const std::string& csv_header) {
+  field_names = SplitCsv_(csv_header);
+}
+
+// set the values for a benchmark
+void ResultsChecker::SetValues_(const std::string& entry_csv_line) {
+  if(entry_csv_line.empty()) return; // some lines are empty
+  CHECK(!field_names.empty());
+  auto vals = SplitCsv_(entry_csv_line);
+  CHECK_EQ(vals.size(), field_names.size());
+  results.emplace_back(vals[0]); // vals[0] is the benchmark name
+  auto &entry = results.back();
+  for (size_t i = 1, e = vals.size(); i < e; ++i) {
+    entry.values[field_names[i]] = vals[i];
+  }
+}
+
+// a quick'n'dirty csv splitter (eliminating quotes)
+std::vector< std::string > ResultsChecker::SplitCsv_(const std::string& line) {
+  std::vector< std::string > out;
+  if(line.empty()) return out;
+  if(!field_names.empty()) out.reserve(field_names.size());
+  size_t prev = 0, pos = line.find_first_of(','), curr = pos;
+  while(pos != line.npos) {
+    CHECK(curr > 0);
+    if(line[prev] == '"') ++prev;
+    if(line[curr-1] == '"') --curr;
+    out.push_back(line.substr(prev, curr-prev));
+    prev = pos + 1;
+    pos = line.find_first_of(',', pos + 1);
+    curr = pos;
+  }
+  curr = line.size();
+  if(line[prev] == '"') ++prev;
+  if(line[curr-1] == '"') --curr;
+  out.push_back(line.substr(prev, curr-prev));
+  return out;
+}
+
 }  // end namespace internal
 
+size_t AddChecker(const char* bm_name, ResultsCheckFn fn)
+{
+  auto &rc = internal::GetResultsChecker();
+  rc.Add(bm_name, fn);
+  return rc.results.size();
+}
+
+int Results::NumThreads() const {
+  auto pos = name.find("/threads:");
+  if(pos == name.npos) return 1;
+  auto end = name.find('/', pos + 9);
+  std::stringstream ss;
+  ss << name.substr(pos + 9, end);
+  int num = 1;
+  ss >> num;
+  CHECK(!ss.fail());
+  return num;
+}
+
+double Results::GetTime(BenchmarkTime which) const {
+  CHECK(which == kCpuTime || which == kRealTime);
+  const char *which_str = which == kCpuTime ? "cpu_time" : "real_time";
+  double val = GetAs< double >(which_str);
+  auto unit = Get("time_unit");
+  CHECK(unit);
+  if(*unit == "ns") {
+    return val * 1.e-9;
+  } else if(*unit == "us") {
+    return val * 1.e-6;
+  } else if(*unit == "ms") {
+    return val * 1.e-3;
+  } else if(*unit == "s") {
+    return val;
+  } else {
+    CHECK(1 == 0) << "unknown time unit: " << *unit;
+    return 0;
+  }
+}
+
 // ========================================================================= //
 // -------------------------- Public API Definitions------------------------ //
 // ========================================================================= //
@@ -186,7 +367,8 @@ int SetSubstitutions(
 void RunOutputTests(int argc, char* argv[]) {
   using internal::GetTestCaseList;
   benchmark::Initialize(&argc, argv);
-  benchmark::ConsoleReporter CR(benchmark::ConsoleReporter::OO_None);
+  auto options = benchmark::internal::GetOutputOptions(/*force_no_color*/true);
+  benchmark::ConsoleReporter CR(options);
   benchmark::JSONReporter JR;
   benchmark::CSVReporter CSVR;
   struct ReporterTest {
@@ -231,4 +413,11 @@ void RunOutputTests(int argc, char* argv[]) {
 
     std::cout << "\n";
   }
+
+  // now that we know the output is as expected, we can dispatch
+  // the checks to subscribees.
+  auto &csv = TestCases[2];
+  // would use == but gcc spits a warning
+  CHECK(std::strcmp(csv.name, "CSVReporter") == 0);
+  internal::GetResultsChecker().CheckResults(csv.out_stream);
 }
diff --git a/libcxx/utils/google-benchmark/test/register_benchmark_test.cc b/libcxx/utils/google-benchmark/test/register_benchmark_test.cc
index e9f8ea530c1..8ab2c299393 100644
--- a/libcxx/utils/google-benchmark/test/register_benchmark_test.cc
+++ b/libcxx/utils/google-benchmark/test/register_benchmark_test.cc
@@ -61,7 +61,7 @@ typedef benchmark::internal::Benchmark* ReturnVal;
 // Test RegisterBenchmark with no additional arguments
 //----------------------------------------------------------------------------//
 void BM_function(benchmark::State& state) {
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
   }
 }
 BENCHMARK(BM_function);
@@ -77,7 +77,7 @@ ADD_CASES({"BM_function"}, {"BM_function_manual_registration"});
 #ifndef BENCHMARK_HAS_NO_VARIADIC_REGISTER_BENCHMARK
 
 void BM_extra_args(benchmark::State& st, const char* label) {
-  while (st.KeepRunning()) {
+  for (auto _ : st) {
   }
   st.SetLabel(label);
 }
@@ -99,7 +99,7 @@ ADD_CASES({"test1", "One"}, {"test2", "Two"}, {"test3", "Three"});
 
 struct CustomFixture {
   void operator()(benchmark::State& st) {
-    while (st.KeepRunning()) {
+    for (auto _ : st) {
     }
   }
 };
@@ -114,23 +114,23 @@ void TestRegistrationAtRuntime() {
 #endif
 #ifndef BENCHMARK_HAS_NO_VARIADIC_REGISTER_BENCHMARK
   {
-    int x = 42;
+    const char* x = "42";
     auto capturing_lam = [=](benchmark::State& st) {
-      while (st.KeepRunning()) {
+      for (auto _ : st) {
       }
-      st.SetLabel(std::to_string(x));
+      st.SetLabel(x);
     };
     benchmark::RegisterBenchmark("lambda_benchmark", capturing_lam);
-    AddCases({{"lambda_benchmark", "42"}});
+    AddCases({{"lambda_benchmark", x}});
   }
 #endif
 }
 
-int main(int argc, char* argv[]) {
+// Test that all benchmarks, registered at either during static init or runtime,
+// are run and the results are passed to the reported.
+void RunTestOne() {
   TestRegistrationAtRuntime();
 
-  benchmark::Initialize(&argc, argv);
-
   TestReporter test_reporter;
   benchmark::RunSpecifiedBenchmarks(&test_reporter);
 
@@ -143,6 +143,40 @@ int main(int argc, char* argv[]) {
     ++EB;
   }
   assert(EB == ExpectedResults.end());
+}
 
-  return 0;
+// Test that ClearRegisteredBenchmarks() clears all previously registered
+// benchmarks.
+// Also test that new benchmarks can be registered and ran afterwards.
+void RunTestTwo() {
+  assert(ExpectedResults.size() != 0 &&
+         "must have at least one registered benchmark");
+  ExpectedResults.clear();
+  benchmark::ClearRegisteredBenchmarks();
+
+  TestReporter test_reporter;
+  size_t num_ran = benchmark::RunSpecifiedBenchmarks(&test_reporter);
+  assert(num_ran == 0);
+  assert(test_reporter.all_runs_.begin() == test_reporter.all_runs_.end());
+
+  TestRegistrationAtRuntime();
+  num_ran = benchmark::RunSpecifiedBenchmarks(&test_reporter);
+  assert(num_ran == ExpectedResults.size());
+
+  typedef benchmark::BenchmarkReporter::Run Run;
+  auto EB = ExpectedResults.begin();
+
+  for (Run const& run : test_reporter.all_runs_) {
+    assert(EB != ExpectedResults.end());
+    EB->CheckRun(run);
+    ++EB;
+  }
+  assert(EB == ExpectedResults.end());
+}
+
+int main(int argc, char* argv[]) {
+  benchmark::Initialize(&argc, argv);
+
+  RunTestOne();
+  RunTestTwo();
 }
diff --git a/libcxx/utils/google-benchmark/test/reporter_output_test.cc b/libcxx/utils/google-benchmark/test/reporter_output_test.cc
index cb52aec0c08..1620b31396f 100644
--- a/libcxx/utils/google-benchmark/test/reporter_output_test.cc
+++ b/libcxx/utils/google-benchmark/test/reporter_output_test.cc
@@ -13,16 +13,49 @@ ADD_CASES(TC_ConsoleOut,
           {{"^[-]+$", MR_Next},
            {"^Benchmark %s Time %s CPU %s Iterations$", MR_Next},
            {"^[-]+$", MR_Next}});
-ADD_CASES(TC_CSVOut,
-          {{"name,iterations,real_time,cpu_time,time_unit,bytes_per_second,"
-            "items_per_second,label,error_occurred,error_message"}});
+static int AddContextCases() {
+  AddCases(TC_ConsoleErr,
+           {
+               {"%int[-/]%int[-/]%int %int:%int:%int$", MR_Default},
+               {"Run on \\(%int X %float MHz CPU s\\)", MR_Next},
+           });
+  AddCases(TC_JSONOut, {{"^\\{", MR_Default},
+                        {"\"context\":", MR_Next},
+                        {"\"date\": \"", MR_Next},
+                        {"\"num_cpus\": %int,$", MR_Next},
+                        {"\"mhz_per_cpu\": %float,$", MR_Next},
+                        {"\"cpu_scaling_enabled\": ", MR_Next},
+                        {"\"caches\": \\[$", MR_Next}});
+  auto const& Caches = benchmark::CPUInfo::Get().caches;
+  if (!Caches.empty()) {
+    AddCases(TC_ConsoleErr, {{"CPU Caches:$", MR_Next}});
+  }
+  for (size_t I = 0; I < Caches.size(); ++I) {
+    std::string num_caches_str =
+        Caches[I].num_sharing != 0 ? " \\(x%int\\)$" : "$";
+    AddCases(
+        TC_ConsoleErr,
+        {{"L%int (Data|Instruction|Unified) %intK" + num_caches_str, MR_Next}});
+    AddCases(TC_JSONOut, {{"\\{$", MR_Next},
+                          {"\"type\": \"", MR_Next},
+                          {"\"level\": %int,$", MR_Next},
+                          {"\"size\": %int,$", MR_Next},
+                          {"\"num_sharing\": %int$", MR_Next},
+                          {"}[,]{0,1}$", MR_Next}});
+  }
+
+  AddCases(TC_JSONOut, {{"],$"}});
+  return 0;
+}
+int dummy_register = AddContextCases();
+ADD_CASES(TC_CSVOut, {{"%csv_header"}});
 
 // ========================================================================= //
 // ------------------------ Testing Basic Output --------------------------- //
 // ========================================================================= //
 
 void BM_basic(benchmark::State& state) {
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
   }
 }
 BENCHMARK(BM_basic);
@@ -30,8 +63,8 @@ BENCHMARK(BM_basic);
 ADD_CASES(TC_ConsoleOut, {{"^BM_basic %console_report$"}});
 ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_basic\",$"},
                        {"\"iterations\": %int,$", MR_Next},
-                       {"\"real_time\": %int,$", MR_Next},
-                       {"\"cpu_time\": %int,$", MR_Next},
+                       {"\"real_time\": %float,$", MR_Next},
+                       {"\"cpu_time\": %float,$", MR_Next},
                        {"\"time_unit\": \"ns\"$", MR_Next},
                        {"}", MR_Next}});
 ADD_CASES(TC_CSVOut, {{"^\"BM_basic\",%csv_report$"}});
@@ -41,20 +74,20 @@ ADD_CASES(TC_CSVOut, {{"^\"BM_basic\",%csv_report$"}});
 // ========================================================================= //
 
 void BM_bytes_per_second(benchmark::State& state) {
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
   }
   state.SetBytesProcessed(1);
 }
 BENCHMARK(BM_bytes_per_second);
 
 ADD_CASES(TC_ConsoleOut,
-          {{"^BM_bytes_per_second %console_report +%floatB/s$"}});
+          {{"^BM_bytes_per_second %console_report +%float[kM]{0,1}B/s$"}});
 ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_bytes_per_second\",$"},
                        {"\"iterations\": %int,$", MR_Next},
-                       {"\"real_time\": %int,$", MR_Next},
-                       {"\"cpu_time\": %int,$", MR_Next},
+                       {"\"real_time\": %float,$", MR_Next},
+                       {"\"cpu_time\": %float,$", MR_Next},
                        {"\"time_unit\": \"ns\",$", MR_Next},
-                       {"\"bytes_per_second\": %int$", MR_Next},
+                       {"\"bytes_per_second\": %float$", MR_Next},
                        {"}", MR_Next}});
 ADD_CASES(TC_CSVOut, {{"^\"BM_bytes_per_second\",%csv_bytes_report$"}});
 
@@ -63,20 +96,20 @@ ADD_CASES(TC_CSVOut, {{"^\"BM_bytes_per_second\",%csv_bytes_report$"}});
 // ========================================================================= //
 
 void BM_items_per_second(benchmark::State& state) {
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
   }
   state.SetItemsProcessed(1);
 }
 BENCHMARK(BM_items_per_second);
 
 ADD_CASES(TC_ConsoleOut,
-          {{"^BM_items_per_second %console_report +%float items/s$"}});
+          {{"^BM_items_per_second %console_report +%float[kM]{0,1} items/s$"}});
 ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_items_per_second\",$"},
                        {"\"iterations\": %int,$", MR_Next},
-                       {"\"real_time\": %int,$", MR_Next},
-                       {"\"cpu_time\": %int,$", MR_Next},
+                       {"\"real_time\": %float,$", MR_Next},
+                       {"\"cpu_time\": %float,$", MR_Next},
                        {"\"time_unit\": \"ns\",$", MR_Next},
-                       {"\"items_per_second\": %int$", MR_Next},
+                       {"\"items_per_second\": %float$", MR_Next},
                        {"}", MR_Next}});
 ADD_CASES(TC_CSVOut, {{"^\"BM_items_per_second\",%csv_items_report$"}});
 
@@ -85,7 +118,7 @@ ADD_CASES(TC_CSVOut, {{"^\"BM_items_per_second\",%csv_items_report$"}});
 // ========================================================================= //
 
 void BM_label(benchmark::State& state) {
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
   }
   state.SetLabel("some label");
 }
@@ -94,8 +127,8 @@ BENCHMARK(BM_label);
 ADD_CASES(TC_ConsoleOut, {{"^BM_label %console_report some label$"}});
 ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_label\",$"},
                        {"\"iterations\": %int,$", MR_Next},
-                       {"\"real_time\": %int,$", MR_Next},
-                       {"\"cpu_time\": %int,$", MR_Next},
+                       {"\"real_time\": %float,$", MR_Next},
+                       {"\"cpu_time\": %float,$", MR_Next},
                        {"\"time_unit\": \"ns\",$", MR_Next},
                        {"\"label\": \"some label\"$", MR_Next},
                        {"}", MR_Next}});
@@ -108,7 +141,7 @@ ADD_CASES(TC_CSVOut, {{"^\"BM_label\",%csv_label_report_begin\"some "
 
 void BM_error(benchmark::State& state) {
   state.SkipWithError("message");
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
   }
 }
 BENCHMARK(BM_error);
@@ -125,7 +158,7 @@ ADD_CASES(TC_CSVOut, {{"^\"BM_error\",,,,,,,,true,\"message\"$"}});
 // ========================================================================= //
 
 void BM_no_arg_name(benchmark::State& state) {
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
   }
 }
 BENCHMARK(BM_no_arg_name)->Arg(3);
@@ -138,7 +171,7 @@ ADD_CASES(TC_CSVOut, {{"^\"BM_no_arg_name/3\",%csv_report$"}});
 // ========================================================================= //
 
 void BM_arg_name(benchmark::State& state) {
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
   }
 }
 BENCHMARK(BM_arg_name)->ArgName("first")->Arg(3);
@@ -151,7 +184,7 @@ ADD_CASES(TC_CSVOut, {{"^\"BM_arg_name/first:3\",%csv_report$"}});
 // ========================================================================= //
 
 void BM_arg_names(benchmark::State& state) {
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
   }
 }
 BENCHMARK(BM_arg_names)->Args({2, 5, 4})->ArgNames({"first", "", "third"});
@@ -165,7 +198,7 @@ ADD_CASES(TC_CSVOut, {{"^\"BM_arg_names/first:2/5/third:4\",%csv_report$"}});
 // ========================================================================= //
 
 void BM_Complexity_O1(benchmark::State& state) {
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
   }
   state.SetComplexityN(state.range(0));
 }
@@ -181,30 +214,74 @@ ADD_CASES(TC_ConsoleOut, {{"^BM_Complexity_O1_BigO %bigOStr %bigOStr[ ]*$"},
 
 // Test that non-aggregate data is printed by default
 void BM_Repeat(benchmark::State& state) {
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
   }
 }
+// need two repetitions min to be able to output any aggregate output
+BENCHMARK(BM_Repeat)->Repetitions(2);
+ADD_CASES(TC_ConsoleOut, {{"^BM_Repeat/repeats:2 %console_report$"},
+                          {"^BM_Repeat/repeats:2 %console_report$"},
+                          {"^BM_Repeat/repeats:2_mean %console_report$"},
+                          {"^BM_Repeat/repeats:2_median %console_report$"},
+                          {"^BM_Repeat/repeats:2_stddev %console_report$"}});
+ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Repeat/repeats:2\",$"},
+                       {"\"name\": \"BM_Repeat/repeats:2\",$"},
+                       {"\"name\": \"BM_Repeat/repeats:2_mean\",$"},
+                       {"\"name\": \"BM_Repeat/repeats:2_median\",$"},
+                       {"\"name\": \"BM_Repeat/repeats:2_stddev\",$"}});
+ADD_CASES(TC_CSVOut, {{"^\"BM_Repeat/repeats:2\",%csv_report$"},
+                      {"^\"BM_Repeat/repeats:2\",%csv_report$"},
+                      {"^\"BM_Repeat/repeats:2_mean\",%csv_report$"},
+                      {"^\"BM_Repeat/repeats:2_median\",%csv_report$"},
+                      {"^\"BM_Repeat/repeats:2_stddev\",%csv_report$"}});
+// but for two repetitions, mean and median is the same, so let's repeat..
 BENCHMARK(BM_Repeat)->Repetitions(3);
 ADD_CASES(TC_ConsoleOut, {{"^BM_Repeat/repeats:3 %console_report$"},
                           {"^BM_Repeat/repeats:3 %console_report$"},
                           {"^BM_Repeat/repeats:3 %console_report$"},
                           {"^BM_Repeat/repeats:3_mean %console_report$"},
+                          {"^BM_Repeat/repeats:3_median %console_report$"},
                           {"^BM_Repeat/repeats:3_stddev %console_report$"}});
 ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Repeat/repeats:3\",$"},
                        {"\"name\": \"BM_Repeat/repeats:3\",$"},
                        {"\"name\": \"BM_Repeat/repeats:3\",$"},
                        {"\"name\": \"BM_Repeat/repeats:3_mean\",$"},
+                       {"\"name\": \"BM_Repeat/repeats:3_median\",$"},
                        {"\"name\": \"BM_Repeat/repeats:3_stddev\",$"}});
 ADD_CASES(TC_CSVOut, {{"^\"BM_Repeat/repeats:3\",%csv_report$"},
                       {"^\"BM_Repeat/repeats:3\",%csv_report$"},
                       {"^\"BM_Repeat/repeats:3\",%csv_report$"},
                       {"^\"BM_Repeat/repeats:3_mean\",%csv_report$"},
+                      {"^\"BM_Repeat/repeats:3_median\",%csv_report$"},
                       {"^\"BM_Repeat/repeats:3_stddev\",%csv_report$"}});
+// median differs between even/odd number of repetitions, so just to be sure
+BENCHMARK(BM_Repeat)->Repetitions(4);
+ADD_CASES(TC_ConsoleOut, {{"^BM_Repeat/repeats:4 %console_report$"},
+                          {"^BM_Repeat/repeats:4 %console_report$"},
+                          {"^BM_Repeat/repeats:4 %console_report$"},
+                          {"^BM_Repeat/repeats:4 %console_report$"},
+                          {"^BM_Repeat/repeats:4_mean %console_report$"},
+                          {"^BM_Repeat/repeats:4_median %console_report$"},
+                          {"^BM_Repeat/repeats:4_stddev %console_report$"}});
+ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Repeat/repeats:4\",$"},
+                       {"\"name\": \"BM_Repeat/repeats:4\",$"},
+                       {"\"name\": \"BM_Repeat/repeats:4\",$"},
+                       {"\"name\": \"BM_Repeat/repeats:4\",$"},
+                       {"\"name\": \"BM_Repeat/repeats:4_mean\",$"},
+                       {"\"name\": \"BM_Repeat/repeats:4_median\",$"},
+                       {"\"name\": \"BM_Repeat/repeats:4_stddev\",$"}});
+ADD_CASES(TC_CSVOut, {{"^\"BM_Repeat/repeats:4\",%csv_report$"},
+                      {"^\"BM_Repeat/repeats:4\",%csv_report$"},
+                      {"^\"BM_Repeat/repeats:4\",%csv_report$"},
+                      {"^\"BM_Repeat/repeats:4\",%csv_report$"},
+                      {"^\"BM_Repeat/repeats:4_mean\",%csv_report$"},
+                      {"^\"BM_Repeat/repeats:4_median\",%csv_report$"},
+                      {"^\"BM_Repeat/repeats:4_stddev\",%csv_report$"}});
 
 // Test that a non-repeated test still prints non-aggregate results even when
 // only-aggregate reports have been requested
 void BM_RepeatOnce(benchmark::State& state) {
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
   }
 }
 BENCHMARK(BM_RepeatOnce)->Repetitions(1)->ReportAggregatesOnly();
@@ -214,23 +291,26 @@ ADD_CASES(TC_CSVOut, {{"^\"BM_RepeatOnce/repeats:1\",%csv_report$"}});
 
 // Test that non-aggregate data is not reported
 void BM_SummaryRepeat(benchmark::State& state) {
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
   }
 }
 BENCHMARK(BM_SummaryRepeat)->Repetitions(3)->ReportAggregatesOnly();
 ADD_CASES(TC_ConsoleOut,
           {{".*BM_SummaryRepeat/repeats:3 ", MR_Not},
            {"^BM_SummaryRepeat/repeats:3_mean %console_report$"},
+           {"^BM_SummaryRepeat/repeats:3_median %console_report$"},
            {"^BM_SummaryRepeat/repeats:3_stddev %console_report$"}});
 ADD_CASES(TC_JSONOut, {{".*BM_SummaryRepeat/repeats:3 ", MR_Not},
                        {"\"name\": \"BM_SummaryRepeat/repeats:3_mean\",$"},
+                       {"\"name\": \"BM_SummaryRepeat/repeats:3_median\",$"},
                        {"\"name\": \"BM_SummaryRepeat/repeats:3_stddev\",$"}});
 ADD_CASES(TC_CSVOut, {{".*BM_SummaryRepeat/repeats:3 ", MR_Not},
                       {"^\"BM_SummaryRepeat/repeats:3_mean\",%csv_report$"},
+                      {"^\"BM_SummaryRepeat/repeats:3_median\",%csv_report$"},
                       {"^\"BM_SummaryRepeat/repeats:3_stddev\",%csv_report$"}});
 
 void BM_RepeatTimeUnit(benchmark::State& state) {
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
   }
 }
 BENCHMARK(BM_RepeatTimeUnit)
@@ -240,18 +320,60 @@ BENCHMARK(BM_RepeatTimeUnit)
 ADD_CASES(TC_ConsoleOut,
           {{".*BM_RepeatTimeUnit/repeats:3 ", MR_Not},
            {"^BM_RepeatTimeUnit/repeats:3_mean %console_us_report$"},
+           {"^BM_RepeatTimeUnit/repeats:3_median %console_us_report$"},
            {"^BM_RepeatTimeUnit/repeats:3_stddev %console_us_report$"}});
 ADD_CASES(TC_JSONOut, {{".*BM_RepeatTimeUnit/repeats:3 ", MR_Not},
                        {"\"name\": \"BM_RepeatTimeUnit/repeats:3_mean\",$"},
                        {"\"time_unit\": \"us\",?$"},
+                       {"\"name\": \"BM_RepeatTimeUnit/repeats:3_median\",$"},
+                       {"\"time_unit\": \"us\",?$"},
                        {"\"name\": \"BM_RepeatTimeUnit/repeats:3_stddev\",$"},
                        {"\"time_unit\": \"us\",?$"}});
 ADD_CASES(TC_CSVOut,
           {{".*BM_RepeatTimeUnit/repeats:3 ", MR_Not},
            {"^\"BM_RepeatTimeUnit/repeats:3_mean\",%csv_us_report$"},
+           {"^\"BM_RepeatTimeUnit/repeats:3_median\",%csv_us_report$"},
            {"^\"BM_RepeatTimeUnit/repeats:3_stddev\",%csv_us_report$"}});
 
 // ========================================================================= //
+// -------------------- Testing user-provided statistics ------------------- //
+// ========================================================================= //
+
+const auto UserStatistics = [](const std::vector<double>& v) {
+  return v.back();
+};
+void BM_UserStats(benchmark::State& state) {
+  for (auto _ : state) {
+  }
+}
+BENCHMARK(BM_UserStats)
+    ->Repetitions(3)
+    ->ComputeStatistics("", UserStatistics);
+// check that user-provided stats is calculated, and is after the default-ones
+// empty string as name is intentional, it would sort before anything else
+ADD_CASES(TC_ConsoleOut, {{"^BM_UserStats/repeats:3 %console_report$"},
+                          {"^BM_UserStats/repeats:3 %console_report$"},
+                          {"^BM_UserStats/repeats:3 %console_report$"},
+                          {"^BM_UserStats/repeats:3_mean %console_report$"},
+                          {"^BM_UserStats/repeats:3_median %console_report$"},
+                          {"^BM_UserStats/repeats:3_stddev %console_report$"},
+                          {"^BM_UserStats/repeats:3_ %console_report$"}});
+ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_UserStats/repeats:3\",$"},
+                       {"\"name\": \"BM_UserStats/repeats:3\",$"},
+                       {"\"name\": \"BM_UserStats/repeats:3\",$"},
+                       {"\"name\": \"BM_UserStats/repeats:3_mean\",$"},
+                       {"\"name\": \"BM_UserStats/repeats:3_median\",$"},
+                       {"\"name\": \"BM_UserStats/repeats:3_stddev\",$"},
+                       {"\"name\": \"BM_UserStats/repeats:3_\",$"}});
+ADD_CASES(TC_CSVOut, {{"^\"BM_UserStats/repeats:3\",%csv_report$"},
+                      {"^\"BM_UserStats/repeats:3\",%csv_report$"},
+                      {"^\"BM_UserStats/repeats:3\",%csv_report$"},
+                      {"^\"BM_UserStats/repeats:3_mean\",%csv_report$"},
+                      {"^\"BM_UserStats/repeats:3_median\",%csv_report$"},
+                      {"^\"BM_UserStats/repeats:3_stddev\",%csv_report$"},
+                      {"^\"BM_UserStats/repeats:3_\",%csv_report$"}});
+
+// ========================================================================= //
 // --------------------------- TEST CASES END ------------------------------ //
 // ========================================================================= //
 
diff --git a/libcxx/utils/google-benchmark/test/skip_with_error_test.cc b/libcxx/utils/google-benchmark/test/skip_with_error_test.cc
index b74d33c5899..0c2f3481a85 100644
--- a/libcxx/utils/google-benchmark/test/skip_with_error_test.cc
+++ b/libcxx/utils/google-benchmark/test/skip_with_error_test.cc
@@ -70,6 +70,15 @@ void BM_error_before_running(benchmark::State& state) {
 BENCHMARK(BM_error_before_running);
 ADD_CASES("BM_error_before_running", {{"", true, "error message"}});
 
+void BM_error_before_running_range_for(benchmark::State& state) {
+  state.SkipWithError("error message");
+  for (auto _ : state) {
+    assert(false);
+  }
+}
+BENCHMARK(BM_error_before_running_range_for);
+ADD_CASES("BM_error_before_running_range_for", {{"", true, "error message"}});
+
 void BM_error_during_running(benchmark::State& state) {
   int first_iter = true;
   while (state.KeepRunning()) {
@@ -93,8 +102,31 @@ ADD_CASES("BM_error_during_running", {{"/1/threads:1", true, "error message"},
                                       {"/2/threads:4", false, ""},
                                       {"/2/threads:8", false, ""}});
 
+void BM_error_during_running_ranged_for(benchmark::State& state) {
+  assert(state.max_iterations > 3 && "test requires at least a few iterations");
+  int first_iter = true;
+  // NOTE: Users should not write the for loop explicitly.
+  for (auto It = state.begin(), End = state.end(); It != End; ++It) {
+    if (state.range(0) == 1) {
+      assert(first_iter);
+      first_iter = false;
+      state.SkipWithError("error message");
+      // Test the unfortunate but documented behavior that the ranged-for loop
+      // doesn't automatically terminate when SkipWithError is set.
+      assert(++It != End);
+      break; // Required behavior
+    }
+  }
+}
+BENCHMARK(BM_error_during_running_ranged_for)->Arg(1)->Arg(2)->Iterations(5);
+ADD_CASES("BM_error_during_running_ranged_for",
+          {{"/1/iterations:5", true, "error message"},
+           {"/2/iterations:5", false, ""}});
+
+
+
 void BM_error_after_running(benchmark::State& state) {
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
     benchmark::DoNotOptimize(state.iterations());
   }
   if (state.thread_index <= (state.threads / 2))
diff --git a/libcxx/utils/google-benchmark/test/statistics_test.cc b/libcxx/utils/google-benchmark/test/statistics_test.cc
new file mode 100644
index 00000000000..b4d6abbb578
--- /dev/null
+++ b/libcxx/utils/google-benchmark/test/statistics_test.cc
@@ -0,0 +1,61 @@
+//===---------------------------------------------------------------------===//
+// statistics_test - Unit tests for src/statistics.cc
+//===---------------------------------------------------------------------===//
+
+#include "../src/statistics.h"
+#include "gtest/gtest.h"
+
+namespace {
+TEST(StatisticsTest, Mean) {
+  std::vector<double> Inputs;
+  {
+    Inputs = {42, 42, 42, 42};
+    double Res = benchmark::StatisticsMean(Inputs);
+    EXPECT_DOUBLE_EQ(Res, 42.0);
+  }
+  {
+    Inputs = {1, 2, 3, 4};
+    double Res = benchmark::StatisticsMean(Inputs);
+    EXPECT_DOUBLE_EQ(Res, 2.5);
+  }
+  {
+    Inputs = {1, 2, 5, 10, 10, 14};
+    double Res = benchmark::StatisticsMean(Inputs);
+    EXPECT_DOUBLE_EQ(Res, 7.0);
+  }
+}
+
+TEST(StatisticsTest, Median) {
+  std::vector<double> Inputs;
+  {
+    Inputs = {42, 42, 42, 42};
+    double Res = benchmark::StatisticsMedian(Inputs);
+    EXPECT_DOUBLE_EQ(Res, 42.0);
+  }
+  {
+    Inputs = {1, 2, 3, 4};
+    double Res = benchmark::StatisticsMedian(Inputs);
+    EXPECT_DOUBLE_EQ(Res, 2.5);
+  }
+  {
+    Inputs = {1, 2, 5, 10, 10};
+    double Res = benchmark::StatisticsMedian(Inputs);
+    EXPECT_DOUBLE_EQ(Res, 5.0);
+  }
+}
+
+TEST(StatisticsTest, StdDev) {
+  std::vector<double> Inputs;
+  {
+    Inputs = {101, 101, 101, 101};
+    double Res = benchmark::StatisticsStdDev(Inputs);
+    EXPECT_DOUBLE_EQ(Res, 0.0);
+  }
+  {
+    Inputs = {1, 2, 3};
+    double Res = benchmark::StatisticsStdDev(Inputs);
+    EXPECT_DOUBLE_EQ(Res, 1.0);
+  }
+}
+
+}  // end namespace
diff --git a/libcxx/utils/google-benchmark/test/templated_fixture_test.cc b/libcxx/utils/google-benchmark/test/templated_fixture_test.cc
new file mode 100644
index 00000000000..ec5b4c0cc07
--- /dev/null
+++ b/libcxx/utils/google-benchmark/test/templated_fixture_test.cc
@@ -0,0 +1,28 @@
+
+#include "benchmark/benchmark.h"
+
+#include <cassert>
+#include <memory>
+
+template<typename T>
+class MyFixture : public ::benchmark::Fixture {
+public:
+  MyFixture() : data(0) {}
+
+  T data;
+};
+
+BENCHMARK_TEMPLATE_F(MyFixture, Foo, int)(benchmark::State &st) {
+  for (auto _ : st) {
+    data += 1;
+  }
+}
+
+BENCHMARK_TEMPLATE_DEFINE_F(MyFixture, Bar, double)(benchmark::State& st) {
+  for (auto _ : st) {
+    data += 1.0;
+  }
+}
+BENCHMARK_REGISTER_F(MyFixture, Bar);
+
+BENCHMARK_MAIN();
diff --git a/libcxx/utils/google-benchmark/test/user_counters_tabular_test.cc b/libcxx/utils/google-benchmark/test/user_counters_tabular_test.cc
new file mode 100644
index 00000000000..9b8a6132e6d
--- /dev/null
+++ b/libcxx/utils/google-benchmark/test/user_counters_tabular_test.cc
@@ -0,0 +1,250 @@
+
+#undef NDEBUG
+
+#include "benchmark/benchmark.h"
+#include "output_test.h"
+
+// @todo: <jpmag> this checks the full output at once; the rule for
+// CounterSet1 was failing because it was not matching "^[-]+$".
+// @todo: <jpmag> check that the counters are vertically aligned.
+ADD_CASES(TC_ConsoleOut, {
+// keeping these lines long improves readability, so:
+// clang-format off
+    {"^[-]+$", MR_Next},
+    {"^Benchmark %s Time %s CPU %s Iterations %s Bar %s Bat %s Baz %s Foo %s Frob %s Lob$", MR_Next},
+    {"^[-]+$", MR_Next},
+    {"^BM_Counters_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next},
+    {"^BM_Counters_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next},
+    {"^BM_Counters_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next},
+    {"^BM_Counters_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next},
+    {"^BM_Counters_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next},
+    {"^BM_CounterRates_Tabular/threads:%int %console_report [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s$", MR_Next},
+    {"^BM_CounterRates_Tabular/threads:%int %console_report [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s$", MR_Next},
+    {"^BM_CounterRates_Tabular/threads:%int %console_report [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s$", MR_Next},
+    {"^BM_CounterRates_Tabular/threads:%int %console_report [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s$", MR_Next},
+    {"^BM_CounterRates_Tabular/threads:%int %console_report [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s$", MR_Next},
+    {"^[-]+$", MR_Next},
+    {"^Benchmark %s Time %s CPU %s Iterations %s Bar %s Baz %s Foo$", MR_Next},
+    {"^[-]+$", MR_Next},
+    {"^BM_CounterSet0_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next},
+    {"^BM_CounterSet0_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next},
+    {"^BM_CounterSet0_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next},
+    {"^BM_CounterSet0_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next},
+    {"^BM_CounterSet0_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next},
+    {"^BM_CounterSet1_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next},
+    {"^BM_CounterSet1_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next},
+    {"^BM_CounterSet1_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next},
+    {"^BM_CounterSet1_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next},
+    {"^BM_CounterSet1_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next},
+    {"^[-]+$", MR_Next},
+    {"^Benchmark %s Time %s CPU %s Iterations %s Bat %s Baz %s Foo$", MR_Next},
+    {"^[-]+$", MR_Next},
+    {"^BM_CounterSet2_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next},
+    {"^BM_CounterSet2_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next},
+    {"^BM_CounterSet2_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next},
+    {"^BM_CounterSet2_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next},
+    {"^BM_CounterSet2_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$"},
+// clang-format on
+});
+ADD_CASES(TC_CSVOut, {{"%csv_header,"
+                       "\"Bar\",\"Bat\",\"Baz\",\"Foo\",\"Frob\",\"Lob\""}});
+
+// ========================================================================= //
+// ------------------------- Tabular Counters Output ----------------------- //
+// ========================================================================= //
+
+void BM_Counters_Tabular(benchmark::State& state) {
+  for (auto _ : state) {
+  }
+  namespace bm = benchmark;
+  state.counters.insert({
+    {"Foo",  { 1, bm::Counter::kAvgThreads}},
+    {"Bar",  { 2, bm::Counter::kAvgThreads}},
+    {"Baz",  { 4, bm::Counter::kAvgThreads}},
+    {"Bat",  { 8, bm::Counter::kAvgThreads}},
+    {"Frob", {16, bm::Counter::kAvgThreads}},
+    {"Lob",  {32, bm::Counter::kAvgThreads}},
+  });
+}
+BENCHMARK(BM_Counters_Tabular)->ThreadRange(1, 16);
+ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_Tabular/threads:%int\",$"},
+                       {"\"iterations\": %int,$", MR_Next},
+                       {"\"real_time\": %float,$", MR_Next},
+                       {"\"cpu_time\": %float,$", MR_Next},
+                       {"\"time_unit\": \"ns\",$", MR_Next},
+                       {"\"Bar\": %float,$", MR_Next},
+                       {"\"Bat\": %float,$", MR_Next},
+                       {"\"Baz\": %float,$", MR_Next},
+                       {"\"Foo\": %float,$", MR_Next},
+                       {"\"Frob\": %float,$", MR_Next},
+                       {"\"Lob\": %float$", MR_Next},
+                       {"}", MR_Next}});
+ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_Tabular/threads:%int\",%csv_report,"
+                       "%float,%float,%float,%float,%float,%float$"}});
+// VS2013 does not allow this function to be passed as a lambda argument
+// to CHECK_BENCHMARK_RESULTS()
+void CheckTabular(Results const& e) {
+  CHECK_COUNTER_VALUE(e, int, "Foo", EQ, 1);
+  CHECK_COUNTER_VALUE(e, int, "Bar", EQ, 2);
+  CHECK_COUNTER_VALUE(e, int, "Baz", EQ, 4);
+  CHECK_COUNTER_VALUE(e, int, "Bat", EQ, 8);
+  CHECK_COUNTER_VALUE(e, int, "Frob", EQ, 16);
+  CHECK_COUNTER_VALUE(e, int, "Lob", EQ, 32);
+}
+CHECK_BENCHMARK_RESULTS("BM_Counters_Tabular/threads:%int", &CheckTabular);
+
+// ========================================================================= //
+// -------------------- Tabular+Rate Counters Output ----------------------- //
+// ========================================================================= //
+
+void BM_CounterRates_Tabular(benchmark::State& state) {
+  for (auto _ : state) {
+  }
+  namespace bm = benchmark;
+  state.counters.insert({
+    {"Foo",  { 1, bm::Counter::kAvgThreadsRate}},
+    {"Bar",  { 2, bm::Counter::kAvgThreadsRate}},
+    {"Baz",  { 4, bm::Counter::kAvgThreadsRate}},
+    {"Bat",  { 8, bm::Counter::kAvgThreadsRate}},
+    {"Frob", {16, bm::Counter::kAvgThreadsRate}},
+    {"Lob",  {32, bm::Counter::kAvgThreadsRate}},
+  });
+}
+BENCHMARK(BM_CounterRates_Tabular)->ThreadRange(1, 16);
+ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_CounterRates_Tabular/threads:%int\",$"},
+                       {"\"iterations\": %int,$", MR_Next},
+                       {"\"real_time\": %float,$", MR_Next},
+                       {"\"cpu_time\": %float,$", MR_Next},
+                       {"\"time_unit\": \"ns\",$", MR_Next},
+                       {"\"Bar\": %float,$", MR_Next},
+                       {"\"Bat\": %float,$", MR_Next},
+                       {"\"Baz\": %float,$", MR_Next},
+                       {"\"Foo\": %float,$", MR_Next},
+                       {"\"Frob\": %float,$", MR_Next},
+                       {"\"Lob\": %float$", MR_Next},
+                       {"}", MR_Next}});
+ADD_CASES(TC_CSVOut, {{"^\"BM_CounterRates_Tabular/threads:%int\",%csv_report,"
+                       "%float,%float,%float,%float,%float,%float$"}});
+// VS2013 does not allow this function to be passed as a lambda argument
+// to CHECK_BENCHMARK_RESULTS()
+void CheckTabularRate(Results const& e) {
+  double t = e.DurationCPUTime();
+  CHECK_FLOAT_COUNTER_VALUE(e, "Foo", EQ, 1./t, 0.001);
+  CHECK_FLOAT_COUNTER_VALUE(e, "Bar", EQ, 2./t, 0.001);
+  CHECK_FLOAT_COUNTER_VALUE(e, "Baz", EQ, 4./t, 0.001);
+  CHECK_FLOAT_COUNTER_VALUE(e, "Bat", EQ, 8./t, 0.001);
+  CHECK_FLOAT_COUNTER_VALUE(e, "Frob", EQ, 16./t, 0.001);
+  CHECK_FLOAT_COUNTER_VALUE(e, "Lob", EQ, 32./t, 0.001);
+}
+CHECK_BENCHMARK_RESULTS("BM_CounterRates_Tabular/threads:%int",
+                        &CheckTabularRate);
+
+// ========================================================================= //
+// ------------------------- Tabular Counters Output ----------------------- //
+// ========================================================================= //
+
+// set only some of the counters
+void BM_CounterSet0_Tabular(benchmark::State& state) {
+  for (auto _ : state) {
+  }
+  namespace bm = benchmark;
+  state.counters.insert({
+    {"Foo", {10, bm::Counter::kAvgThreads}},
+    {"Bar", {20, bm::Counter::kAvgThreads}},
+    {"Baz", {40, bm::Counter::kAvgThreads}},
+  });
+}
+BENCHMARK(BM_CounterSet0_Tabular)->ThreadRange(1, 16);
+ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_CounterSet0_Tabular/threads:%int\",$"},
+                       {"\"iterations\": %int,$", MR_Next},
+                       {"\"real_time\": %float,$", MR_Next},
+                       {"\"cpu_time\": %float,$", MR_Next},
+                       {"\"time_unit\": \"ns\",$", MR_Next},
+                       {"\"Bar\": %float,$", MR_Next},
+                       {"\"Baz\": %float,$", MR_Next},
+                       {"\"Foo\": %float$", MR_Next},
+                       {"}", MR_Next}});
+ADD_CASES(TC_CSVOut, {{"^\"BM_CounterSet0_Tabular/threads:%int\",%csv_report,"
+                       "%float,,%float,%float,,"}});
+// VS2013 does not allow this function to be passed as a lambda argument
+// to CHECK_BENCHMARK_RESULTS()
+void CheckSet0(Results const& e) {
+  CHECK_COUNTER_VALUE(e, int, "Foo", EQ, 10);
+  CHECK_COUNTER_VALUE(e, int, "Bar", EQ, 20);
+  CHECK_COUNTER_VALUE(e, int, "Baz", EQ, 40);
+}
+CHECK_BENCHMARK_RESULTS("BM_CounterSet0_Tabular", &CheckSet0);
+
+// again.
+void BM_CounterSet1_Tabular(benchmark::State& state) {
+  for (auto _ : state) {
+  }
+  namespace bm = benchmark;
+  state.counters.insert({
+    {"Foo", {15, bm::Counter::kAvgThreads}},
+    {"Bar", {25, bm::Counter::kAvgThreads}},
+    {"Baz", {45, bm::Counter::kAvgThreads}},
+  });
+}
+BENCHMARK(BM_CounterSet1_Tabular)->ThreadRange(1, 16);
+ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_CounterSet1_Tabular/threads:%int\",$"},
+                       {"\"iterations\": %int,$", MR_Next},
+                       {"\"real_time\": %float,$", MR_Next},
+                       {"\"cpu_time\": %float,$", MR_Next},
+                       {"\"time_unit\": \"ns\",$", MR_Next},
+                       {"\"Bar\": %float,$", MR_Next},
+                       {"\"Baz\": %float,$", MR_Next},
+                       {"\"Foo\": %float$", MR_Next},
+                       {"}", MR_Next}});
+ADD_CASES(TC_CSVOut, {{"^\"BM_CounterSet1_Tabular/threads:%int\",%csv_report,"
+                       "%float,,%float,%float,,"}});
+// VS2013 does not allow this function to be passed as a lambda argument
+// to CHECK_BENCHMARK_RESULTS()
+void CheckSet1(Results const& e) {
+  CHECK_COUNTER_VALUE(e, int, "Foo", EQ, 15);
+  CHECK_COUNTER_VALUE(e, int, "Bar", EQ, 25);
+  CHECK_COUNTER_VALUE(e, int, "Baz", EQ, 45);
+}
+CHECK_BENCHMARK_RESULTS("BM_CounterSet1_Tabular/threads:%int", &CheckSet1);
+
+// ========================================================================= //
+// ------------------------- Tabular Counters Output ----------------------- //
+// ========================================================================= //
+
+// set only some of the counters, different set now.
+void BM_CounterSet2_Tabular(benchmark::State& state) {
+  for (auto _ : state) {
+  }
+  namespace bm = benchmark;
+  state.counters.insert({
+    {"Foo", {10, bm::Counter::kAvgThreads}},
+    {"Bat", {30, bm::Counter::kAvgThreads}},
+    {"Baz", {40, bm::Counter::kAvgThreads}},
+  });
+}
+BENCHMARK(BM_CounterSet2_Tabular)->ThreadRange(1, 16);
+ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_CounterSet2_Tabular/threads:%int\",$"},
+                       {"\"iterations\": %int,$", MR_Next},
+                       {"\"real_time\": %float,$", MR_Next},
+                       {"\"cpu_time\": %float,$", MR_Next},
+                       {"\"time_unit\": \"ns\",$", MR_Next},
+                       {"\"Bat\": %float,$", MR_Next},
+                       {"\"Baz\": %float,$", MR_Next},
+                       {"\"Foo\": %float$", MR_Next},
+                       {"}", MR_Next}});
+ADD_CASES(TC_CSVOut, {{"^\"BM_CounterSet2_Tabular/threads:%int\",%csv_report,"
+                       ",%float,%float,%float,,"}});
+// VS2013 does not allow this function to be passed as a lambda argument
+// to CHECK_BENCHMARK_RESULTS()
+void CheckSet2(Results const& e) {
+  CHECK_COUNTER_VALUE(e, int, "Foo", EQ, 10);
+  CHECK_COUNTER_VALUE(e, int, "Bat", EQ, 30);
+  CHECK_COUNTER_VALUE(e, int, "Baz", EQ, 40);
+}
+CHECK_BENCHMARK_RESULTS("BM_CounterSet2_Tabular", &CheckSet2);
+
+// ========================================================================= //
+// --------------------------- TEST CASES END ------------------------------ //
+// ========================================================================= //
+
+int main(int argc, char* argv[]) { RunOutputTests(argc, argv); }
diff --git a/libcxx/utils/google-benchmark/test/user_counters_test.cc b/libcxx/utils/google-benchmark/test/user_counters_test.cc
new file mode 100644
index 00000000000..06aafb1fa14
--- /dev/null
+++ b/libcxx/utils/google-benchmark/test/user_counters_test.cc
@@ -0,0 +1,217 @@
+
+#undef NDEBUG
+
+#include "benchmark/benchmark.h"
+#include "output_test.h"
+
+// ========================================================================= //
+// ---------------------- Testing Prologue Output -------------------------- //
+// ========================================================================= //
+
+ADD_CASES(TC_ConsoleOut,
+          {{"^[-]+$", MR_Next},
+           {"^Benchmark %s Time %s CPU %s Iterations UserCounters...$", MR_Next},
+           {"^[-]+$", MR_Next}});
+ADD_CASES(TC_CSVOut, {{"%csv_header,\"bar\",\"foo\""}});
+
+// ========================================================================= //
+// ------------------------- Simple Counters Output ------------------------ //
+// ========================================================================= //
+
+void BM_Counters_Simple(benchmark::State& state) {
+  for (auto _ : state) {
+  }
+  state.counters["foo"] = 1;
+  state.counters["bar"] = 2 * (double)state.iterations();
+}
+BENCHMARK(BM_Counters_Simple);
+ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_Simple %console_report bar=%hrfloat foo=%hrfloat$"}});
+ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_Simple\",$"},
+                       {"\"iterations\": %int,$", MR_Next},
+                       {"\"real_time\": %float,$", MR_Next},
+                       {"\"cpu_time\": %float,$", MR_Next},
+                       {"\"time_unit\": \"ns\",$", MR_Next},
+                       {"\"bar\": %float,$", MR_Next},
+                       {"\"foo\": %float$", MR_Next},
+                       {"}", MR_Next}});
+ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_Simple\",%csv_report,%float,%float$"}});
+// VS2013 does not allow this function to be passed as a lambda argument
+// to CHECK_BENCHMARK_RESULTS()
+void CheckSimple(Results const& e) {
+  double its = e.GetAs< double >("iterations");
+  CHECK_COUNTER_VALUE(e, int, "foo", EQ, 1);
+  // check that the value of bar is within 0.1% of the expected value
+  CHECK_FLOAT_COUNTER_VALUE(e, "bar", EQ, 2.*its, 0.001);
+}
+CHECK_BENCHMARK_RESULTS("BM_Counters_Simple", &CheckSimple);
+
+// ========================================================================= //
+// --------------------- Counters+Items+Bytes/s Output --------------------- //
+// ========================================================================= //
+
+namespace { int num_calls1 = 0; }
+void BM_Counters_WithBytesAndItemsPSec(benchmark::State& state) {
+  for (auto _ : state) {
+  }
+  state.counters["foo"] = 1;
+  state.counters["bar"] = ++num_calls1;
+  state.SetBytesProcessed(364);
+  state.SetItemsProcessed(150);
+}
+BENCHMARK(BM_Counters_WithBytesAndItemsPSec);
+ADD_CASES(TC_ConsoleOut,
+          {{"^BM_Counters_WithBytesAndItemsPSec %console_report "
+            "bar=%hrfloat foo=%hrfloat +%hrfloatB/s +%hrfloat items/s$"}});
+ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_WithBytesAndItemsPSec\",$"},
+                       {"\"iterations\": %int,$", MR_Next},
+                       {"\"real_time\": %float,$", MR_Next},
+                       {"\"cpu_time\": %float,$", MR_Next},
+                       {"\"time_unit\": \"ns\",$", MR_Next},
+                       {"\"bytes_per_second\": %float,$", MR_Next},
+                       {"\"items_per_second\": %float,$", MR_Next},
+                       {"\"bar\": %float,$", MR_Next},
+                       {"\"foo\": %float$", MR_Next},
+                       {"}", MR_Next}});
+ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_WithBytesAndItemsPSec\","
+                       "%csv_bytes_items_report,%float,%float$"}});
+// VS2013 does not allow this function to be passed as a lambda argument
+// to CHECK_BENCHMARK_RESULTS()
+void CheckBytesAndItemsPSec(Results const& e) {
+  double t = e.DurationCPUTime(); // this (and not real time) is the time used
+  CHECK_COUNTER_VALUE(e, int, "foo", EQ, 1);
+  CHECK_COUNTER_VALUE(e, int, "bar", EQ, num_calls1);
+  // check that the values are within 0.1% of the expected values
+  CHECK_FLOAT_RESULT_VALUE(e, "bytes_per_second", EQ, 364./t, 0.001);
+  CHECK_FLOAT_RESULT_VALUE(e, "items_per_second", EQ, 150./t, 0.001);
+}
+CHECK_BENCHMARK_RESULTS("BM_Counters_WithBytesAndItemsPSec",
+                        &CheckBytesAndItemsPSec);
+
+// ========================================================================= //
+// ------------------------- Rate Counters Output -------------------------- //
+// ========================================================================= //
+
+void BM_Counters_Rate(benchmark::State& state) {
+  for (auto _ : state) {
+  }
+  namespace bm = benchmark;
+  state.counters["foo"] = bm::Counter{1, bm::Counter::kIsRate};
+  state.counters["bar"] = bm::Counter{2, bm::Counter::kIsRate};
+}
+BENCHMARK(BM_Counters_Rate);
+ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_Rate %console_report bar=%hrfloat/s foo=%hrfloat/s$"}});
+ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_Rate\",$"},
+                       {"\"iterations\": %int,$", MR_Next},
+                       {"\"real_time\": %float,$", MR_Next},
+                       {"\"cpu_time\": %float,$", MR_Next},
+                       {"\"time_unit\": \"ns\",$", MR_Next},
+                       {"\"bar\": %float,$", MR_Next},
+                       {"\"foo\": %float$", MR_Next},
+                       {"}", MR_Next}});
+ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_Rate\",%csv_report,%float,%float$"}});
+// VS2013 does not allow this function to be passed as a lambda argument
+// to CHECK_BENCHMARK_RESULTS()
+void CheckRate(Results const& e) {
+  double t = e.DurationCPUTime(); // this (and not real time) is the time used
+  // check that the values are within 0.1% of the expected values
+  CHECK_FLOAT_COUNTER_VALUE(e, "foo", EQ, 1./t, 0.001);
+  CHECK_FLOAT_COUNTER_VALUE(e, "bar", EQ, 2./t, 0.001);
+}
+CHECK_BENCHMARK_RESULTS("BM_Counters_Rate", &CheckRate);
+
+// ========================================================================= //
+// ------------------------- Thread Counters Output ------------------------ //
+// ========================================================================= //
+
+void BM_Counters_Threads(benchmark::State& state) {
+  for (auto _ : state) {
+  }
+  state.counters["foo"] = 1;
+  state.counters["bar"] = 2;
+}
+BENCHMARK(BM_Counters_Threads)->ThreadRange(1, 8);
+ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_Threads/threads:%int %console_report bar=%hrfloat foo=%hrfloat$"}});
+ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_Threads/threads:%int\",$"},
+                       {"\"iterations\": %int,$", MR_Next},
+                       {"\"real_time\": %float,$", MR_Next},
+                       {"\"cpu_time\": %float,$", MR_Next},
+                       {"\"time_unit\": \"ns\",$", MR_Next},
+                       {"\"bar\": %float,$", MR_Next},
+                       {"\"foo\": %float$", MR_Next},
+                       {"}", MR_Next}});
+ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_Threads/threads:%int\",%csv_report,%float,%float$"}});
+// VS2013 does not allow this function to be passed as a lambda argument
+// to CHECK_BENCHMARK_RESULTS()
+void CheckThreads(Results const& e) {
+  CHECK_COUNTER_VALUE(e, int, "foo", EQ, e.NumThreads());
+  CHECK_COUNTER_VALUE(e, int, "bar", EQ, 2 * e.NumThreads());
+}
+CHECK_BENCHMARK_RESULTS("BM_Counters_Threads/threads:%int", &CheckThreads);
+
+// ========================================================================= //
+// ---------------------- ThreadAvg Counters Output ------------------------ //
+// ========================================================================= //
+
+void BM_Counters_AvgThreads(benchmark::State& state) {
+  for (auto _ : state) {
+  }
+  namespace bm = benchmark;
+  state.counters["foo"] = bm::Counter{1, bm::Counter::kAvgThreads};
+  state.counters["bar"] = bm::Counter{2, bm::Counter::kAvgThreads};
+}
+BENCHMARK(BM_Counters_AvgThreads)->ThreadRange(1, 8);
+ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_AvgThreads/threads:%int %console_report bar=%hrfloat foo=%hrfloat$"}});
+ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_AvgThreads/threads:%int\",$"},
+                       {"\"iterations\": %int,$", MR_Next},
+                       {"\"real_time\": %float,$", MR_Next},
+                       {"\"cpu_time\": %float,$", MR_Next},
+                       {"\"time_unit\": \"ns\",$", MR_Next},
+                       {"\"bar\": %float,$", MR_Next},
+                       {"\"foo\": %float$", MR_Next},
+                       {"}", MR_Next}});
+ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_AvgThreads/threads:%int\",%csv_report,%float,%float$"}});
+// VS2013 does not allow this function to be passed as a lambda argument
+// to CHECK_BENCHMARK_RESULTS()
+void CheckAvgThreads(Results const& e) {
+  CHECK_COUNTER_VALUE(e, int, "foo", EQ, 1);
+  CHECK_COUNTER_VALUE(e, int, "bar", EQ, 2);
+}
+CHECK_BENCHMARK_RESULTS("BM_Counters_AvgThreads/threads:%int",
+                        &CheckAvgThreads);
+
+// ========================================================================= //
+// ---------------------- ThreadAvg Counters Output ------------------------ //
+// ========================================================================= //
+
+void BM_Counters_AvgThreadsRate(benchmark::State& state) {
+  for (auto _ : state) {
+  }
+  namespace bm = benchmark;
+  state.counters["foo"] = bm::Counter{1, bm::Counter::kAvgThreadsRate};
+  state.counters["bar"] = bm::Counter{2, bm::Counter::kAvgThreadsRate};
+}
+BENCHMARK(BM_Counters_AvgThreadsRate)->ThreadRange(1, 8);
+ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_AvgThreadsRate/threads:%int %console_report bar=%hrfloat/s foo=%hrfloat/s$"}});
+ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_AvgThreadsRate/threads:%int\",$"},
+                       {"\"iterations\": %int,$", MR_Next},
+                       {"\"real_time\": %float,$", MR_Next},
+                       {"\"cpu_time\": %float,$", MR_Next},
+                       {"\"time_unit\": \"ns\",$", MR_Next},
+                       {"\"bar\": %float,$", MR_Next},
+                       {"\"foo\": %float$", MR_Next},
+                       {"}", MR_Next}});
+ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_AvgThreadsRate/threads:%int\",%csv_report,%float,%float$"}});
+// VS2013 does not allow this function to be passed as a lambda argument
+// to CHECK_BENCHMARK_RESULTS()
+void CheckAvgThreadsRate(Results const& e) {
+  CHECK_FLOAT_COUNTER_VALUE(e, "foo", EQ, 1./e.DurationCPUTime(), 0.001);
+  CHECK_FLOAT_COUNTER_VALUE(e, "bar", EQ, 2./e.DurationCPUTime(), 0.001);
+}
+CHECK_BENCHMARK_RESULTS("BM_Counters_AvgThreadsRate/threads:%int",
+                        &CheckAvgThreadsRate);
+
+// ========================================================================= //
+// --------------------------- TEST CASES END ------------------------------ //
+// ========================================================================= //
+
+int main(int argc, char* argv[]) { RunOutputTests(argc, argv); }
diff --git a/libcxx/utils/google-benchmark/tools/compare.py b/libcxx/utils/google-benchmark/tools/compare.py
new file mode 100755
index 00000000000..c4a47e8d500
--- /dev/null
+++ b/libcxx/utils/google-benchmark/tools/compare.py
@@ -0,0 +1,312 @@
+#!/usr/bin/env python
+
+"""
+compare.py - versatile benchmark output compare tool
+"""
+
+import argparse
+from argparse import ArgumentParser
+import sys
+import gbench
+from gbench import util, report
+from gbench.util import *
+
+
+def check_inputs(in1, in2, flags):
+    """
+    Perform checking on the user provided inputs and diagnose any abnormalities
+    """
+    in1_kind, in1_err = classify_input_file(in1)
+    in2_kind, in2_err = classify_input_file(in2)
+    output_file = find_benchmark_flag('--benchmark_out=', flags)
+    output_type = find_benchmark_flag('--benchmark_out_format=', flags)
+    if in1_kind == IT_Executable and in2_kind == IT_Executable and output_file:
+        print(("WARNING: '--benchmark_out=%s' will be passed to both "
+               "benchmarks causing it to be overwritten") % output_file)
+    if in1_kind == IT_JSON and in2_kind == IT_JSON and len(flags) > 0:
+        print("WARNING: passing optional flags has no effect since both "
+              "inputs are JSON")
+    if output_type is not None and output_type != 'json':
+        print(("ERROR: passing '--benchmark_out_format=%s' to 'compare.py`"
+               " is not supported.") % output_type)
+        sys.exit(1)
+
+
+def create_parser():
+    parser = ArgumentParser(
+        description='versatile benchmark output compare tool')
+    subparsers = parser.add_subparsers(
+        help='This tool has multiple modes of operation:',
+        dest='mode')
+
+    parser_a = subparsers.add_parser(
+        'benchmarks',
+        help='The most simple use-case, compare all the output of these two benchmarks')
+    baseline = parser_a.add_argument_group(
+        'baseline', 'The benchmark baseline')
+    baseline.add_argument(
+        'test_baseline',
+        metavar='test_baseline',
+        type=argparse.FileType('r'),
+        nargs=1,
+        help='A benchmark executable or JSON output file')
+    contender = parser_a.add_argument_group(
+        'contender', 'The benchmark that will be compared against the baseline')
+    contender.add_argument(
+        'test_contender',
+        metavar='test_contender',
+        type=argparse.FileType('r'),
+        nargs=1,
+        help='A benchmark executable or JSON output file')
+    parser_a.add_argument(
+        'benchmark_options',
+        metavar='benchmark_options',
+        nargs=argparse.REMAINDER,
+        help='Arguments to pass when running benchmark executables')
+
+    parser_b = subparsers.add_parser(
+        'filters', help='Compare filter one with the filter two of benchmark')
+    baseline = parser_b.add_argument_group(
+        'baseline', 'The benchmark baseline')
+    baseline.add_argument(
+        'test',
+        metavar='test',
+        type=argparse.FileType('r'),
+        nargs=1,
+        help='A benchmark executable or JSON output file')
+    baseline.add_argument(
+        'filter_baseline',
+        metavar='filter_baseline',
+        type=str,
+        nargs=1,
+        help='The first filter, that will be used as baseline')
+    contender = parser_b.add_argument_group(
+        'contender', 'The benchmark that will be compared against the baseline')
+    contender.add_argument(
+        'filter_contender',
+        metavar='filter_contender',
+        type=str,
+        nargs=1,
+        help='The second filter, that will be compared against the baseline')
+    parser_b.add_argument(
+        'benchmark_options',
+        metavar='benchmark_options',
+        nargs=argparse.REMAINDER,
+        help='Arguments to pass when running benchmark executables')
+
+    parser_c = subparsers.add_parser(
+        'benchmarksfiltered',
+        help='Compare filter one of first benchmark with filter two of the second benchmark')
+    baseline = parser_c.add_argument_group(
+        'baseline', 'The benchmark baseline')
+    baseline.add_argument(
+        'test_baseline',
+        metavar='test_baseline',
+        type=argparse.FileType('r'),
+        nargs=1,
+        help='A benchmark executable or JSON output file')
+    baseline.add_argument(
+        'filter_baseline',
+        metavar='filter_baseline',
+        type=str,
+        nargs=1,
+        help='The first filter, that will be used as baseline')
+    contender = parser_c.add_argument_group(
+        'contender', 'The benchmark that will be compared against the baseline')
+    contender.add_argument(
+        'test_contender',
+        metavar='test_contender',
+        type=argparse.FileType('r'),
+        nargs=1,
+        help='The second benchmark executable or JSON output file, that will be compared against the baseline')
+    contender.add_argument(
+        'filter_contender',
+        metavar='filter_contender',
+        type=str,
+        nargs=1,
+        help='The second filter, that will be compared against the baseline')
+    parser_c.add_argument(
+        'benchmark_options',
+        metavar='benchmark_options',
+        nargs=argparse.REMAINDER,
+        help='Arguments to pass when running benchmark executables')
+
+    return parser
+
+
+def main():
+    # Parse the command line flags
+    parser = create_parser()
+    args, unknown_args = parser.parse_known_args()
+    assert not unknown_args
+    benchmark_options = args.benchmark_options
+
+    if args.mode == 'benchmarks':
+        test_baseline = args.test_baseline[0].name
+        test_contender = args.test_contender[0].name
+        filter_baseline = ''
+        filter_contender = ''
+
+        # NOTE: if test_baseline == test_contender, you are analyzing the stdev
+
+        description = 'Comparing %s to %s' % (test_baseline, test_contender)
+    elif args.mode == 'filters':
+        test_baseline = args.test[0].name
+        test_contender = args.test[0].name
+        filter_baseline = args.filter_baseline[0]
+        filter_contender = args.filter_contender[0]
+
+        # NOTE: if filter_baseline == filter_contender, you are analyzing the
+        # stdev
+
+        description = 'Comparing %s to %s (from %s)' % (
+            filter_baseline, filter_contender, args.test[0].name)
+    elif args.mode == 'benchmarksfiltered':
+        test_baseline = args.test_baseline[0].name
+        test_contender = args.test_contender[0].name
+        filter_baseline = args.filter_baseline[0]
+        filter_contender = args.filter_contender[0]
+
+        # NOTE: if test_baseline == test_contender and
+        # filter_baseline == filter_contender, you are analyzing the stdev
+
+        description = 'Comparing %s (from %s) to %s (from %s)' % (
+            filter_baseline, test_baseline, filter_contender, test_contender)
+    else:
+        # should never happen
+        print("Unrecognized mode of operation: '%s'" % args.mode)
+        exit(1)
+
+    check_inputs(test_baseline, test_contender, benchmark_options)
+
+    options_baseline = []
+    options_contender = []
+
+    if filter_baseline and filter_contender:
+        options_baseline = ['--benchmark_filter=%s' % filter_baseline]
+        options_contender = ['--benchmark_filter=%s' % filter_contender]
+
+    # Run the benchmarks and report the results
+    json1 = json1_orig = gbench.util.run_or_load_benchmark(
+        test_baseline, benchmark_options + options_baseline)
+    json2 = json2_orig = gbench.util.run_or_load_benchmark(
+        test_contender, benchmark_options + options_contender)
+
+    # Now, filter the benchmarks so that the difference report can work
+    if filter_baseline and filter_contender:
+        replacement = '[%s vs. %s]' % (filter_baseline, filter_contender)
+        json1 = gbench.report.filter_benchmark(
+            json1_orig, filter_baseline, replacement)
+        json2 = gbench.report.filter_benchmark(
+            json2_orig, filter_contender, replacement)
+
+    # Diff and output
+    output_lines = gbench.report.generate_difference_report(json1, json2)
+    print(description)
+    for ln in output_lines:
+        print(ln)
+
+
+import unittest
+
+
+class TestParser(unittest.TestCase):
+    def setUp(self):
+        self.parser = create_parser()
+        testInputs = os.path.join(
+            os.path.dirname(
+                os.path.realpath(__file__)),
+            'gbench',
+            'Inputs')
+        self.testInput0 = os.path.join(testInputs, 'test_baseline_run1.json')
+        self.testInput1 = os.path.join(testInputs, 'test_baseline_run2.json')
+
+    def test_benchmarks_basic(self):
+        parsed = self.parser.parse_args(
+            ['benchmarks', self.testInput0, self.testInput1])
+        self.assertEqual(parsed.mode, 'benchmarks')
+        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
+        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
+        self.assertFalse(parsed.benchmark_options)
+
+    def test_benchmarks_with_remainder(self):
+        parsed = self.parser.parse_args(
+            ['benchmarks', self.testInput0, self.testInput1, 'd'])
+        self.assertEqual(parsed.mode, 'benchmarks')
+        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
+        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
+        self.assertEqual(parsed.benchmark_options, ['d'])
+
+    def test_benchmarks_with_remainder_after_doubleminus(self):
+        parsed = self.parser.parse_args(
+            ['benchmarks', self.testInput0, self.testInput1, '--', 'e'])
+        self.assertEqual(parsed.mode, 'benchmarks')
+        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
+        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
+        self.assertEqual(parsed.benchmark_options, ['e'])
+
+    def test_filters_basic(self):
+        parsed = self.parser.parse_args(
+            ['filters', self.testInput0, 'c', 'd'])
+        self.assertEqual(parsed.mode, 'filters')
+        self.assertEqual(parsed.test[0].name, self.testInput0)
+        self.assertEqual(parsed.filter_baseline[0], 'c')
+        self.assertEqual(parsed.filter_contender[0], 'd')
+        self.assertFalse(parsed.benchmark_options)
+
+    def test_filters_with_remainder(self):
+        parsed = self.parser.parse_args(
+            ['filters', self.testInput0, 'c', 'd', 'e'])
+        self.assertEqual(parsed.mode, 'filters')
+        self.assertEqual(parsed.test[0].name, self.testInput0)
+        self.assertEqual(parsed.filter_baseline[0], 'c')
+        self.assertEqual(parsed.filter_contender[0], 'd')
+        self.assertEqual(parsed.benchmark_options, ['e'])
+
+    def test_filters_with_remainder_after_doubleminus(self):
+        parsed = self.parser.parse_args(
+            ['filters', self.testInput0, 'c', 'd', '--', 'f'])
+        self.assertEqual(parsed.mode, 'filters')
+        self.assertEqual(parsed.test[0].name, self.testInput0)
+        self.assertEqual(parsed.filter_baseline[0], 'c')
+        self.assertEqual(parsed.filter_contender[0], 'd')
+        self.assertEqual(parsed.benchmark_options, ['f'])
+
+    def test_benchmarksfiltered_basic(self):
+        parsed = self.parser.parse_args(
+            ['benchmarksfiltered', self.testInput0, 'c', self.testInput1, 'e'])
+        self.assertEqual(parsed.mode, 'benchmarksfiltered')
+        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
+        self.assertEqual(parsed.filter_baseline[0], 'c')
+        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
+        self.assertEqual(parsed.filter_contender[0], 'e')
+        self.assertFalse(parsed.benchmark_options)
+
+    def test_benchmarksfiltered_with_remainder(self):
+        parsed = self.parser.parse_args(
+            ['benchmarksfiltered', self.testInput0, 'c', self.testInput1, 'e', 'f'])
+        self.assertEqual(parsed.mode, 'benchmarksfiltered')
+        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
+        self.assertEqual(parsed.filter_baseline[0], 'c')
+        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
+        self.assertEqual(parsed.filter_contender[0], 'e')
+        self.assertEqual(parsed.benchmark_options[0], 'f')
+
+    def test_benchmarksfiltered_with_remainder_after_doubleminus(self):
+        parsed = self.parser.parse_args(
+            ['benchmarksfiltered', self.testInput0, 'c', self.testInput1, 'e', '--', 'g'])
+        self.assertEqual(parsed.mode, 'benchmarksfiltered')
+        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
+        self.assertEqual(parsed.filter_baseline[0], 'c')
+        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
+        self.assertEqual(parsed.filter_contender[0], 'e')
+        self.assertEqual(parsed.benchmark_options[0], 'g')
+
+
+if __name__ == '__main__':
+    # unittest.main()
+    main()
+
+# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
+# kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off;
+# kate: indent-mode python; remove-trailing-spaces modified;
diff --git a/libcxx/utils/google-benchmark/tools/compare_bench.py b/libcxx/utils/google-benchmark/tools/compare_bench.py
index d54baaa0e8f..7bbf0d01574 100644..100755
--- a/libcxx/utils/google-benchmark/tools/compare_bench.py
+++ b/libcxx/utils/google-benchmark/tools/compare_bench.py
@@ -39,21 +39,20 @@ def main():
     parser.add_argument(
         'test2', metavar='test2', type=str, nargs=1,
         help='A benchmark executable or JSON output file')
-    # FIXME this is a dummy argument which will never actually match
-    # any --benchmark flags but it helps generate a better usage message
     parser.add_argument(
-        'benchmark_options', metavar='benchmark_option', nargs='*',
+        'benchmark_options', metavar='benchmark_options', nargs=argparse.REMAINDER,
         help='Arguments to pass when running benchmark executables'
     )
     args, unknown_args = parser.parse_known_args()
     # Parse the command line flags
     test1 = args.test1[0]
     test2 = args.test2[0]
-    if args.benchmark_options:
+    if unknown_args:
+        # should never happen
         print("Unrecognized positional argument arguments: '%s'"
-              % args.benchmark_options)
+              % unknown_args)
         exit(1)
-    benchmark_options = unknown_args
+    benchmark_options = args.benchmark_options
     check_inputs(test1, test2, benchmark_options)
     # Run the benchmarks and report the results
     json1 = gbench.util.run_or_load_benchmark(test1, benchmark_options)
diff --git a/libcxx/utils/google-benchmark/tools/gbench/Inputs/test1_run1.json b/libcxx/utils/google-benchmark/tools/gbench/Inputs/test1_run1.json
index 37faed46d13..d7ec6a9c8f6 100644
--- a/libcxx/utils/google-benchmark/tools/gbench/Inputs/test1_run1.json
+++ b/libcxx/utils/google-benchmark/tools/gbench/Inputs/test1_run1.json
@@ -29,6 +29,20 @@
       "time_unit": "ns"
     },
     {
+      "name": "BM_1PercentFaster",
+      "iterations": 1000,
+      "real_time": 100,
+      "cpu_time": 100,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_1PercentSlower",
+      "iterations": 1000,
+      "real_time": 100,
+      "cpu_time": 100,
+      "time_unit": "ns"
+    },
+    {
       "name": "BM_10PercentFaster",
       "iterations": 1000,
       "real_time": 100,
@@ -55,6 +69,34 @@
       "real_time": 10000,
       "cpu_time": 10000,
       "time_unit": "ns"
+    },
+    {
+      "name": "BM_10PercentCPUToTime",
+      "iterations": 1000,
+      "real_time": 100,
+      "cpu_time": 100,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_ThirdFaster",
+      "iterations": 1000,
+      "real_time": 100,
+      "cpu_time": 100,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_BadTimeUnit",
+      "iterations": 1000,
+      "real_time": 0.4,
+      "cpu_time": 0.5,
+      "time_unit": "s"
+    },
+    {
+      "name": "BM_DifferentTimeUnit",
+      "iterations": 1,
+      "real_time": 1,
+      "cpu_time": 1,
+      "time_unit": "s"
     }
   ]
-}
-\ No newline at end of file
+}
diff --git a/libcxx/utils/google-benchmark/tools/gbench/Inputs/test1_run2.json b/libcxx/utils/google-benchmark/tools/gbench/Inputs/test1_run2.json
index aed5151d392..59a5ffaca4d 100644
--- a/libcxx/utils/google-benchmark/tools/gbench/Inputs/test1_run2.json
+++ b/libcxx/utils/google-benchmark/tools/gbench/Inputs/test1_run2.json
@@ -29,6 +29,20 @@
       "time_unit": "ns"
     },
     {
+      "name": "BM_1PercentFaster",
+      "iterations": 1000,
+      "real_time": 98.9999999,
+      "cpu_time": 98.9999999,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_1PercentSlower",
+      "iterations": 1000,
+      "real_time": 100.9999999,
+      "cpu_time": 100.9999999,
+      "time_unit": "ns"
+    },
+    {
       "name": "BM_10PercentFaster",
       "iterations": 1000,
       "real_time": 90,
@@ -45,8 +59,8 @@
     {
       "name": "BM_100xSlower",
       "iterations": 1000,
-      "real_time": 10000,
-      "cpu_time": 10000,
+      "real_time": 1.0000e+04,
+      "cpu_time": 1.0000e+04,
       "time_unit": "ns"
     },
     {
@@ -55,6 +69,34 @@
       "real_time": 100,
       "cpu_time": 100,
       "time_unit": "ns"
+    },
+    {
+      "name": "BM_10PercentCPUToTime",
+      "iterations": 1000,
+      "real_time": 110,
+      "cpu_time": 90,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_ThirdFaster",
+      "iterations": 1000,
+      "real_time": 66.665,
+      "cpu_time": 66.664,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_BadTimeUnit",
+      "iterations": 1000,
+      "real_time": 0.04,
+      "cpu_time": 0.6,
+      "time_unit": "s"
+    },
+    {
+      "name": "BM_DifferentTimeUnit",
+      "iterations": 1,
+      "real_time": 1,
+      "cpu_time": 1,
+      "time_unit": "ns"
     }
   ]
-}
-\ No newline at end of file
+}
diff --git a/libcxx/utils/google-benchmark/tools/gbench/Inputs/test2_run.json b/libcxx/utils/google-benchmark/tools/gbench/Inputs/test2_run.json
new file mode 100644
index 00000000000..15bc6980304
--- /dev/null
+++ b/libcxx/utils/google-benchmark/tools/gbench/Inputs/test2_run.json
@@ -0,0 +1,81 @@
+{
+  "context": {
+    "date": "2016-08-02 17:44:46",
+    "num_cpus": 4,
+    "mhz_per_cpu": 4228,
+    "cpu_scaling_enabled": false,
+    "library_build_type": "release"
+  },
+  "benchmarks": [
+    {
+      "name": "BM_Hi",
+      "iterations": 1234,
+      "real_time": 42,
+      "cpu_time": 24,
+      "time_unit": "ms"
+    },
+    {
+      "name": "BM_Zero",
+      "iterations": 1000,
+      "real_time": 10,
+      "cpu_time": 10,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_Zero/4",
+      "iterations": 4000,
+      "real_time": 40,
+      "cpu_time": 40,
+      "time_unit": "ns"
+    },
+    {
+      "name": "Prefix/BM_Zero",
+      "iterations": 2000,
+      "real_time": 20,
+      "cpu_time": 20,
+      "time_unit": "ns"
+    },
+    {
+      "name": "Prefix/BM_Zero/3",
+      "iterations": 3000,
+      "real_time": 30,
+      "cpu_time": 30,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_One",
+      "iterations": 5000,
+      "real_time": 5,
+      "cpu_time": 5,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_One/4",
+      "iterations": 2000,
+      "real_time": 20,
+      "cpu_time": 20,
+      "time_unit": "ns"
+    },
+    {
+      "name": "Prefix/BM_One",
+      "iterations": 1000,
+      "real_time": 10,
+      "cpu_time": 10,
+      "time_unit": "ns"
+    },
+    {
+      "name": "Prefix/BM_One/3",
+      "iterations": 1500,
+      "real_time": 15,
+      "cpu_time": 15,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_Bye",
+      "iterations": 5321,
+      "real_time": 11,
+      "cpu_time": 63,
+      "time_unit": "ns"
+    }
+  ]
+}
diff --git a/libcxx/utils/google-benchmark/tools/gbench/report.py b/libcxx/utils/google-benchmark/tools/gbench/report.py
index 8f1b0fa8604..8d68fe96ee6 100644
--- a/libcxx/utils/google-benchmark/tools/gbench/report.py
+++ b/libcxx/utils/google-benchmark/tools/gbench/report.py
@@ -1,6 +1,8 @@
 """report.py - Utilities for reporting statistics about benchmark results
 """
 import os
+import re
+import copy
 
 class BenchmarkColor(object):
     def __init__(self, name, code):
@@ -66,25 +68,47 @@ def calculate_change(old_val, new_val):
     return float(new_val - old_val) / abs(old_val)
 
 
+def filter_benchmark(json_orig, family, replacement=""):
+    """
+    Apply a filter to the json, and only leave the 'family' of benchmarks.
+    """
+    regex = re.compile(family)
+    filtered = {}
+    filtered['benchmarks'] = []
+    for be in json_orig['benchmarks']:
+        if not regex.search(be['name']):
+            continue
+        filteredbench = copy.deepcopy(be) # Do NOT modify the old name!
+        filteredbench['name'] = regex.sub(replacement, filteredbench['name'])
+        filtered['benchmarks'].append(filteredbench)
+    return filtered
+
+
 def generate_difference_report(json1, json2, use_color=True):
     """
     Calculate and report the difference between each test of two benchmarks
     runs specified as 'json1' and 'json2'.
     """
-    first_col_width = find_longest_name(json1['benchmarks']) + 5
+    first_col_width = find_longest_name(json1['benchmarks'])
     def find_test(name):
         for b in json2['benchmarks']:
             if b['name'] == name:
                 return b
         return None
-    first_line = "{:<{}s}     Time           CPU           Old           New".format(
-        'Benchmark', first_col_width)
+    first_col_width = max(first_col_width, len('Benchmark'))
+    first_line = "{:<{}s}Time             CPU      Time Old      Time New       CPU Old       CPU New".format(
+        'Benchmark', 12 + first_col_width)
     output_strs = [first_line, '-' * len(first_line)]
-    for bn in json1['benchmarks']:
+
+    gen = (bn for bn in json1['benchmarks'] if 'real_time' in bn and 'cpu_time' in bn)
+    for bn in gen:
         other_bench = find_test(bn['name'])
         if not other_bench:
             continue
 
+        if bn['time_unit'] != other_bench['time_unit']:
+            continue
+
         def get_color(res):
             if res > 0.05:
                 return BC_FAIL
@@ -92,12 +116,13 @@ def generate_difference_report(json1, json2, use_color=True):
                 return BC_WHITE
             else:
                 return BC_CYAN
-        fmt_str = "{}{:<{}s}{endc}{}{:+9.2f}{endc}{}{:+14.2f}{endc}{:14d}{:14d}"
+        fmt_str = "{}{:<{}s}{endc}{}{:+16.4f}{endc}{}{:+16.4f}{endc}{:14.0f}{:14.0f}{endc}{:14.0f}{:14.0f}"
         tres = calculate_change(bn['real_time'], other_bench['real_time'])
         cpures = calculate_change(bn['cpu_time'], other_bench['cpu_time'])
         output_strs += [color_format(use_color, fmt_str,
             BC_HEADER, bn['name'], first_col_width,
             get_color(tres), tres, get_color(cpures), cpures,
+            bn['real_time'], other_bench['real_time'],
             bn['cpu_time'], other_bench['cpu_time'],
             endc=BC_ENDC)]
     return output_strs
@@ -121,24 +146,63 @@ class TestReportDifference(unittest.TestCase):
 
     def test_basic(self):
         expect_lines = [
-            ['BM_SameTimes', '+0.00', '+0.00', '10', '10'],
-            ['BM_2xFaster', '-0.50', '-0.50', '50', '25'],
-            ['BM_2xSlower', '+1.00', '+1.00', '50', '100'],
-            ['BM_10PercentFaster', '-0.10', '-0.10', '100', '90'],
-            ['BM_10PercentSlower', '+0.10', '+0.10', '100', '110'],
-            ['BM_100xSlower', '+99.00', '+99.00', '100', '10000'],
-            ['BM_100xFaster', '-0.99', '-0.99', '10000', '100'],
+            ['BM_SameTimes', '+0.0000', '+0.0000', '10', '10', '10', '10'],
+            ['BM_2xFaster', '-0.5000', '-0.5000', '50', '25', '50', '25'],
+            ['BM_2xSlower', '+1.0000', '+1.0000', '50', '100', '50', '100'],
+            ['BM_1PercentFaster', '-0.0100', '-0.0100', '100', '99', '100', '99'],
+            ['BM_1PercentSlower', '+0.0100', '+0.0100', '100', '101', '100', '101'],
+            ['BM_10PercentFaster', '-0.1000', '-0.1000', '100', '90', '100', '90'],
+            ['BM_10PercentSlower', '+0.1000', '+0.1000', '100', '110', '100', '110'],
+            ['BM_100xSlower', '+99.0000', '+99.0000', '100', '10000', '100', '10000'],
+            ['BM_100xFaster', '-0.9900', '-0.9900', '10000', '100', '10000', '100'],
+            ['BM_10PercentCPUToTime', '+0.1000', '-0.1000', '100', '110', '100', '90'],
+            ['BM_ThirdFaster', '-0.3333', '-0.3334', '100', '67', '100', '67'],
+            ['BM_BadTimeUnit', '-0.9000', '+0.2000', '0', '0', '0', '1'],
         ]
         json1, json2 = self.load_results()
         output_lines_with_header = generate_difference_report(json1, json2, use_color=False)
         output_lines = output_lines_with_header[2:]
         print("\n".join(output_lines_with_header))
         self.assertEqual(len(output_lines), len(expect_lines))
-        for i in xrange(0, len(output_lines)):
+        for i in range(0, len(output_lines)):
             parts = [x for x in output_lines[i].split(' ') if x]
-            self.assertEqual(len(parts), 5)
+            self.assertEqual(len(parts), 7)
+            self.assertEqual(parts, expect_lines[i])
+
+
+class TestReportDifferenceBetweenFamilies(unittest.TestCase):
+    def load_result(self):
+        import json
+        testInputs = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'Inputs')
+        testOutput = os.path.join(testInputs, 'test2_run.json')
+        with open(testOutput, 'r') as f:
+            json = json.load(f)
+        return json
+
+    def test_basic(self):
+        expect_lines = [
+            ['.', '-0.5000', '-0.5000', '10', '5', '10', '5'],
+            ['./4', '-0.5000', '-0.5000', '40', '20', '40', '20'],
+            ['Prefix/.', '-0.5000', '-0.5000', '20', '10', '20', '10'],
+            ['Prefix/./3', '-0.5000', '-0.5000', '30', '15', '30', '15'],
+        ]
+        json = self.load_result()
+        json1 = filter_benchmark(json, "BM_Z.ro", ".")
+        json2 = filter_benchmark(json, "BM_O.e", ".")
+        output_lines_with_header = generate_difference_report(json1, json2, use_color=False)
+        output_lines = output_lines_with_header[2:]
+        print "\n"
+        print("\n".join(output_lines_with_header))
+        self.assertEqual(len(output_lines), len(expect_lines))
+        for i in range(0, len(output_lines)):
+            parts = [x for x in output_lines[i].split(' ') if x]
+            self.assertEqual(len(parts), 7)
             self.assertEqual(parts, expect_lines[i])
 
 
 if __name__ == '__main__':
     unittest.main()
+
+# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
+# kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off;
+# kate: indent-mode python; remove-trailing-spaces modified;
author	Eric Fiselier <eric@efcs.ca>	2018-01-18 04:23:01 +0000
committer	Eric Fiselier <eric@efcs.ca>	2018-01-18 04:23:01 +0000
commit	1903976d37868f8e13e60e44b77a71b5ede04bed (patch)
tree	f2c591f8bb25afea2fd83e115fa21c809cca79b5
parent	7f0d85ec1e567ceeb53523bede3dc948d1a833b0 (diff)
download	bcm5719-llvm-1903976d37868f8e13e60e44b77a71b5ede04bed.tar.gz bcm5719-llvm-1903976d37868f8e13e60e44b77a71b5ede04bed.zip