diff --git a/package.json b/package.json index 73df16aa..782e9d4b 100644 --- a/package.json +++ b/package.json @@ -53,7 +53,8 @@ "devDependencies": { "smash": "~0.0.12", "uglify-js": "2.4.0", - "vows": "0.7.0" + "vows": "0.7.0", + "seedrandom": "0.1.6" }, "scripts": { "test": "node_modules/.bin/vows" diff --git a/test/math/random-test.js b/test/math/random-test.js index 02738c66..63601773 100644 --- a/test/math/random-test.js +++ b/test/math/random-test.js @@ -1,37 +1,143 @@ var vows = require("vows"), load = require("../load"), - assert = require("../assert"); + assert = require("../assert"), + seedrandom = require('seedrandom'); var suite = vows.describe("d3.random"); +/** + * Testing a random number generator is a bit more complicated than testing + * deterministic code, so we use different techniques. + * + * If the RNG is correct, each test in this suite will pass with probability + * at least P. The tests have been designed so that P ≥ 98%. Specific values + * of P are given above each case. We use the seedrandom module to get around + * this non-deterministic aspect -- so it is safe to assume that if the tests + * fail, then d3's RNG is broken. + * + * More on RNG testing here: + * @see http://www.johndcook.com/Beautiful_Testing_ch10.pdf + */ + +// Overwrites Math.random to a seeded random function. +// (by default Math.random is seeded with current time) +Math.seedrandom('a random seed.'); + suite.addBatch({ "random": { topic: load("math/random").expression("d3.random"), "normal": { - "topic": function(random) { - return random.normal(); - }, - "returns a number": function(r) { - assert.typeOf(r(), "number"); - } + "topic": function(random) { return random.normal(-43289, 38.8); }, + + // P = 98% + "has normal distribution" : KSTest(normalCDF(-43289, 38.8)) }, "logNormal": { - "topic": function(random) { - return random.logNormal(); - }, - "returns a number": function(r) { - assert.typeOf(r(), "number"); - } + // Use reasonable values for mean here because random.logNormal() grows + // exponentially with the mean. + "topic": function(random) { return random.logNormal(10, 2.5); }, + + // P = 98% + "has log-normal distribution" : KSTest(logNormalCDF(10, 2.5)) }, "irwinHall": { - "topic": function(random) { - return random.irwinHall(10); - }, - "returns a number": function(r) { - assert.typeOf(r(), "number"); - } + "topic": function(random) { return random.irwinHall(10); }, + + // P = 98% + "has Irwin-Hall distribution" : KSTest(irwinHallCDF(10)) } } }); +/** + * A macro that that takes a RNG and performs a Kolmogorov-Smirnov test: + * asserts that the values generated by the RNG could be generated by the + * distribution with cumulative distribution function `cdf'. Each test runs in + * O(n log n) * O(cdf). + * + * Passes with P≈98%. + * + * @param cdf function(x) { returns CDF of the distribution evaluated at x } + * @param n number of sample points. Higher n = better evaluation, slower test. + * @return function(rng) { + * // asserts that rng produces values fitting the distribution + * } + */ +function KSTest(cdf, n) { + return function(rng) { + var n = 1000; + var values = []; + for (var i = 0; i < n; i++) { + values.push(rng()); + } + values.sort(function(a, b) { return a - b; }); + + K_positive = -Infinity; // Identity of max() function + for (var i = 0; i < n; i++) { + var edf_i = i / n; // Empirical distribution function evaluated at x=values[i] + K_positive = Math.max(K_positive, edf_i - cdf(values[i])); + } + K_positive *= Math.sqrt(n); + + // Derivation of this interval is difficult. + // @see K-S test in Knuth's AoCP vol.2 + assert.inDelta(K_positive, 0.723255, 0.794145); + } +} + +function normalCDF(mean, stddev) { + // Logistic approximation to normal CDF around N(mean, stddev). + return function(x) { + return 1 / (1 + Math.exp(-0.07056 * Math.pow((x-mean)/stddev, 3) - 1.5976 * (x-mean)/stddev)); + } +} + +function logNormalCDF(mean, stddev) { + // @see http://en.wikipedia.org/wiki/Log-normal_distribution#Similar_distributions + var exponent = Math.PI / (stddev * Math.sqrt(3)); + var numerator = Math.exp(mean); + return function(x) { + return 1 / (Math.pow(numerator / x, exponent) + 1); + } +} + +function irwinHallCDF(n) { + var normalisingFactor = factorial(n); + + // Precompute binom(n, k), k=0..n for efficiency. (this array gets stored + // inside the closure, so it is only computed once) + var binoms = []; + for (var k = 0; k <= n; k++) { + binoms.push(binom(n, k)); + } + + // @see CDF at http://en.wikipedia.org/wiki/Irwin%E2%80%93Hall_distribution + return function(x) { + var t = 0; + + // What d3 calls Irwin-Hill distribution is actually a Bates distribution: + // the Irwin-Hall distribution divided by n. So we multiply the Bates + // distribution's x-value by n to get the Irwin-Hall CDF at x. + x *= n; + + for (var k = 0; k < x; k++) { + t += Math.pow(-1, k % 2) * binoms[k] * Math.pow(x - k, n); + } + return t / normalisingFactor; + } +} + +function factorial(n) { + var t = 1; + for (var i = 2; i <= n; i++) { + t *= i; + } + return t; +} + +function binom(n, k) { + if (k < 0 || k > n) return undefined; // only defined for 0 <= k <= n + return factorial(n) / (factorial(k) * factorial(n - k)); +} + suite.export(module);