1 files changed, 69 insertions, 0 deletions
diff --git a/llvm/unittests/FuzzMutate/ReservoirSamplerTest.cpp b/llvm/unittests/FuzzMutate/ReservoirSamplerTest.cpp
new file mode 100644
index 00000000000..d246ba1428b
--- /dev/null
+++ b/llvm/unittests/FuzzMutate/ReservoirSamplerTest.cpp
@@ -0,0 +1,69 @@
+//===- ReservoirSampler.cpp - Tests for the ReservoirSampler --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/FuzzMutate/Random.h"
+#include "gtest/gtest.h"
+#include <random>
+
+using namespace llvm;
+
+TEST(ReservoirSamplerTest, OneItem) {
+  std::mt19937 Rand;
+  auto Sampler = makeSampler(Rand, 7, 1);
+  ASSERT_FALSE(Sampler.isEmpty());
+  ASSERT_EQ(7, Sampler.getSelection());
+}
+
+TEST(ReservoirSamplerTest, NoWeight) {
+  std::mt19937 Rand;
+  auto Sampler = makeSampler(Rand, 7, 0);
+  ASSERT_TRUE(Sampler.isEmpty());
+}
+
+TEST(ReservoirSamplerTest, Uniform) {
+  std::mt19937 Rand;
+
+  // Run three chi-squared tests to check that the distribution is reasonably
+  // uniform.
+  std::vector<int> Items = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+
+  int Failures = 0;
+  for (int Run = 0; Run < 3; ++Run) {
+    std::vector<int> Counts(Items.size(), 0);
+
+    // We need $np_s > 5$ at minimum, but we're better off going a couple of
+    // orders of magnitude larger.
+    int N = Items.size() * 5 * 100;
+    for (int I = 0; I < N; ++I) {
+      auto Sampler = makeSampler(Rand, Items);
+      Counts[Sampler.getSelection()] += 1;
+    }
+
+    // Knuth. TAOCP Vol. 2, 3.3.1 (8):
+    // $V = \frac{1}{n} \sum_{s=1}^{k} \left(\frac{Y_s^2}{p_s}\right) - n$
+    double Ps = 1.0 / Items.size();
+    double Sum = 0.0;
+    for (int Ys : Counts)
+      Sum += Ys * Ys / Ps;
+    double V = (Sum / N) - N;
+
+    assert(Items.size() == 10 && "Our chi-squared values assume 10 items");
+    // Since we have 10 items, there are 9 degrees of freedom and the table of
+    // chi-squared values is as follows:
+    //
+    //     | p=1%  |   5%  |  25%  |  50%  |  75%  |  95%  |  99%  |
+    // v=9 | 2.088 | 3.325 | 5.899 | 8.343 | 11.39 | 16.92 | 21.67 |
+    //
+    // Check that we're in the likely range of results.
+    //if (V < 2.088 || V > 21.67)
+    if (V < 2.088 || V > 21.67)
+      ++Failures;
+  }
+  EXPECT_LT(Failures, 3) << "Non-uniform distribution?";
+}