d3/lib/science/science.stats.js

(function(){science.stats = {};
// Bandwidth selectors for Gaussian kernels.
// Based on R's implementations in `stats.bw`.
science.stats.bandwidth = {

  // Silverman, B. W. (1986) Density Estimation. London: Chapman and Hall.
  nrd0: function(x) {
    var hi = Math.sqrt(science.stats.variance(x));
    if (!(lo = Math.min(hi, science.stats.iqr(x) / 1.34)))
      (lo = hi) || (lo = Math.abs(x[1])) || (lo = 1);
    return .9 * lo * Math.pow(x.length, -.2);
  },

  // Scott, D. W. (1992) Multivariate Density Estimation: Theory, Practice, and
  // Visualization. Wiley.
  nrd: function(x) {
    var h = science.stats.iqr(x) / 1.34;
    return 1.06 * Math.min(Math.sqrt(science.stats.variance(x)), h)
      * Math.pow(x.length, -1/5);
  }
};
science.stats.distance = {
  euclidean: function(a, b) {
    var n = a.length,
        i = -1,
        s = 0,
        x;
    while (++i < n) {
      x = a[i] - b[i];
      s += x * x;
    }
    return Math.sqrt(s);
  },
  manhattan: function(a, b) {
    var n = a.length,
        i = -1,
        s = 0;
    while (++i < n) s += Math.abs(a[i] - b[i]);
    return s;
  },
  minkowski: function(p) {
    return function(a, b) {
      var n = a.length,
          i = -1,
          s = 0;
      while (++i < n) s += Math.pow(Math.abs(a[i] - b[i]), p);
      return Math.pow(s, 1 / p);
    };
  },
  chebyshev: function(a, b) {
    var n = a.length,
        i = -1,
        max = 0,
        x;
    while (++i < n) {
      x = Math.abs(a[i] - b[i]);
      if (x > max) max = x;
    }
    return max;
  },
  hamming: function(a, b) {
    var n = a.length,
        i = -1,
        d = 0;
    while (++i < n) if (a[i] !== b[i]) d++;
    return d;
  },
  jaccard: function(a, b) {
    var n = a.length,
        i = -1,
        s = 0;
    while (++i < n) if (a[i] === b[i]) s++;
    return s / n;
  },
  braycurtis: function(a, b) {
    var n = a.length,
        i = -1,
        s0 = 0,
        s1 = 0,
        ai,
        bi;
    while (++i < n) {
      ai = a[i];
      bi = b[i];
      s0 += Math.abs(ai - bi);
      s1 += Math.abs(ai + bi);
    }
    return s0 / s1;
  }
};
// Based on implementation in http://picomath.org/.
science.stats.erf = function(x) {
  var a1 =  0.254829592,
      a2 = -0.284496736,
      a3 =  1.421413741,
      a4 = -1.453152027,
      a5 =  1.061405429,
      p  =  0.3275911;

  // Save the sign of x
  var sign = x < 0 ? -1 : 1;
  if (x < 0) {
    sign = -1;
    x = -x;
  }

  // A&S formula 7.1.26
  var t = 1 / (1 + p * x);
  return sign * (
    1 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1)
    * t * Math.exp(-x * x));
};
science.stats.phi = function(x) {
  return .5 * (1 + science.stats.erf(x / Math.SQRT2));
};
// See <http://en.wikipedia.org/wiki/Kernel_(statistics)>.
science.stats.kernel = {
  uniform: function(u) {
    if (u <= 1 && u >= -1) return .5;
    return 0;
  },
  triangular: function(u) {
    if (u <= 1 && u >= -1) return 1 - Math.abs(u);
    return 0;
  },
  epanechnikov: function(u) {
    if (u <= 1 && u >= -1) return .75 * (1 - u * u);
    return 0;
  },
  quartic: function(u) {
    if (u <= 1 && u >= -1) {
      var tmp = 1 - u * u;
      return (15 / 16) * tmp * tmp;
    }
    return 0;
  },
  triweight: function(u) {
    if (u <= 1 && u >= -1) {
      var tmp = 1 - u * u;
      return (35 / 32) * tmp * tmp * tmp;
    }
    return 0;
  },
  gaussian: function(u) {
    return 1 / Math.sqrt(2 * Math.PI) * Math.exp(-.5 * u * u);
  },
  cosine: function(u) {
    if (u <= 1 && u >= -1) return Math.PI / 4 * Math.cos(Math.PI / 2 * u);
    return 0;
  }
};
// http://exploringdata.net/den_trac.htm
science.stats.kde = function() {
  var kernel = science.stats.kernel.gaussian,
      sample = [],
      bandwidth = science.stats.bandwidth.nrd;

  function kde(points, i) {
    var bw = bandwidth.call(this, sample);
    return points.map(function(x) {
      var i = -1,
          y = 0,
          n = sample.length;
      while (++i < n) {
        y += kernel((x - sample[i]) / bw);
      }
      return [x, y / bw / n];
    });
  }

  kde.kernel = function(x) {
    if (!arguments.length) return kernel;
    kernel = x;
    return kde;
  };

  kde.sample = function(x) {
    if (!arguments.length) return sample;
    sample = x;
    return kde;
  };

  kde.bandwidth = function(x) {
    if (!arguments.length) return bandwidth;
    bandwidth = science.functor(x);
    return kde;
  };

  return kde;
};
// Based on figue implementation by Jean-Yves Delort.
// http://code.google.com/p/figue/
science.stats.kmeans = function() {
  var distance = science.stats.distance.euclidean,
      maxIterations = 1000,
      k = 1;

  function kmeans(vectors) {
    var n = vectors.length,
        assignments = [],
        clusterSizes = [],
        repeat = 1,
        iterations = 0,
        centroids = science_stats_kmeansRandom(k, vectors),
        newCentroids,
        i,
        j,
        x,
        d,
        min,
        best;

    while (repeat && iterations < maxIterations) {
      // Assignment step.
      j = -1; while (++j < k) {
        clusterSizes[j] = 0;
      }

      i = -1; while (++i < n) {
        x = vectors[i];
        min = Infinity;
        j = -1; while (++j < k) {
          d = distance.call(this, centroids[j], x);
          if (d < min) {
            min = d;
            best = j;
          }
        }
        clusterSizes[assignments[i] = best]++;
      }

      // Update centroids step.
      newCentroids = [];
      i = -1; while (++i < n) {
        x = assignments[i];
        d = newCentroids[x];
        if (d == null) newCentroids[x] = vectors[i].slice();
        else {
          j = -1; while (++j < d.length) {
            d[j] += vectors[i][j];
          }
        }
      }
      j = -1; while (++j < k) {
        x = newCentroids[j];
        d = 1 / clusterSizes[j];
        i = -1; while (++i < x.length) x[i] *= d;
      }

      // Check convergence.
      repeat = 0;
      j = -1; while (++j < k) {
        if (!science_stats_kmeansCompare(newCentroids[j], centroids[j])) {
          repeat = 1;
          break;
        }
      }
      centroids = newCentroids;
      iterations++;
    }
    return {assignments: assignments, centroids: centroids};
  }

  kmeans.k = function(x) {
    if (!arguments.length) return k;
    k = x;
    return kmeans;
  };

  kmeans.distance = function(x) {
    if (!arguments.length) return distance;
    distance = x;
    return kmeans;
  };

  return kmeans;
};

function science_stats_kmeansCompare(a, b) {
  if (!a || !b || a.length !== b.length) return false;
  var n = a.length,
      i = -1;
  while (++i < n) if (a[i] !== b[i]) return false;
  return true;
}

// Returns an array of k distinct vectors randomly selected from the input
// array of vectors. Returns null if k > n or if there are less than k distinct
// objects in vectors.
function science_stats_kmeansRandom(k, vectors) {
  var n = vectors.length;
  if (k > n) return null;
  
  var selected_vectors = [];
  var selected_indices = [];
  var tested_indices = {};
  var tested = 0;
  var selected = 0;
  var i,
      vector,
      select;

  while (selected < k) {
    if (tested === n) return null;
    
    var random_index = Math.floor(Math.random() * n);
    if (random_index in tested_indices) continue;
    
    tested_indices[random_index] = 1;
    tested++;
    vector = vectors[random_index];
    select = true;
    for (i = 0; i < selected; i++) {
      if (science_stats_kmeansCompare(vector, selected_vectors[i])) {
        select = false;
        break;
      }
    }
    if (select) {
      selected_vectors[selected] = vector;
      selected_indices[selected] = random_index;
      selected++;
    }
  }
  return selected_vectors;
}
science.stats.hcluster = function() {
  var distance = science.stats.distance.euclidean,
      linkage = "simple"; // simple, complete or average

  function hcluster(vectors) {
    var n = vectors.length,
        dMin = [],
        cSize = [],
        distMatrix = [],
        clusters = [],
        c1,
        c2,
        c1Cluster,
        c2Cluster,
        p,
        root,
        i,
        j;

    // Initialise distance matrix and vector of closest clusters.
    i = -1; while (++i < n) {
      dMin[i] = 0;
      distMatrix[i] = [];
      j = -1; while (++j < n) {
        distMatrix[i][j] = i === j ? Infinity : distance(vectors[i] , vectors[j]);
        if (distMatrix[i][dMin[i]] > distMatrix[i][j]) dMin[i] = j;
      }
    }

    // create leaves of the tree
    i = -1; while (++i < n) {
      clusters[i] = [];
      clusters[i][0] = {
        left: null,
        right: null,
        dist: 0,
        centroid: vectors[i],
        size: 1,
        depth: 0
      };
      cSize[i] = 1;
    }

    // Main loop
    for (p = 0; p < n-1; p++) {
      // find the closest pair of clusters
      c1 = 0;
      for (i = 0; i < n; i++) {
        if (distMatrix[i][dMin[i]] < distMatrix[c1][dMin[c1]]) c1 = i;
      }
      c2 = dMin[c1];

      // create node to store cluster info 
      c1Cluster = clusters[c1][0];
      c2Cluster = clusters[c2][0];

      newCluster = {
        left: c1Cluster,
        right: c2Cluster,
        dist: distMatrix[c1][c2],
        centroid: calculateCentroid(c1Cluster.size, c1Cluster.centroid,
          c2Cluster.size, c2Cluster.centroid),
        size: c1Cluster.size + c2Cluster.size,
        depth: 1 + Math.max(c1Cluster.depth, c2Cluster.depth)
      };
      clusters[c1].splice(0, 0, newCluster);
      cSize[c1] += cSize[c2];

      // overwrite row c1 with respect to the linkage type
      for (j = 0; j < n; j++) {
        switch (linkage) {
          case "single":
            if (distMatrix[c1][j] > distMatrix[c2][j])
              distMatrix[j][c1] = distMatrix[c1][j] = distMatrix[c2][j];
            break;
          case "complete":
            if (distMatrix[c1][j] < distMatrix[c2][j])
              distMatrix[j][c1] = distMatrix[c1][j] = distMatrix[c2][j];
            break;
          case "average":
            distMatrix[j][c1] = distMatrix[c1][j] = (cSize[c1] * distMatrix[c1][j] + cSize[c2] * distMatrix[c2][j]) / (cSize[c1] + cSize[j]);
            break;
        }
      }
      distMatrix[c1][c1] = Infinity;

      // infinity out old row c2 and column c2
      for (i = 0; i < n; i++)
        distMatrix[i][c2] = distMatrix[c2][i] = Infinity;

      // update dmin and replace ones that previous pointed to c2 to point to c1
      for (j = 0; j < n; j++) {
        if (dMin[j] == c2) dMin[j] = c1;
        if (distMatrix[c1][j] < distMatrix[c1][dMin[c1]]) dMin[c1] = j;
      }

      // keep track of the last added cluster
      root = newCluster;
    }

    return root;
  }

  hcluster.distance = function(x) {
    if (!arguments.length) return distance;
    distance = x;
    return hcluster;
  };

  return hcluster;
};

function calculateCentroid(c1Size, c1Centroid, c2Size, c2Centroid) {
  var newCentroid = [],
      newSize = c1Size + c2Size,
      n = c1Centroid.length,
      i = -1;
  while (++i < n) {
    newCentroid[i] = (c1Size * c1Centroid[i] + c2Size * c2Centroid[i]) / newSize;
  }
  return newCentroid;
}
science.stats.iqr = function(x) {
  var quartiles = science.stats.quantiles(x, [.25, .75]);
  return quartiles[1] - quartiles[0];
};
// Based on org.apache.commons.math.analysis.interpolation.LoessInterpolator
// from http://commons.apache.org/math/
science.stats.loess = function() {    
  var bandwidth = .3,
      robustnessIters = 2,
      accuracy = 1e-12;

  function smooth(xval, yval, weights) {
    var n = xval.length,
        i;

    if (n !== yval.length) throw {error: "Mismatched array lengths"};
    if (n == 0) throw {error: "At least one point required."};

    if (arguments.length < 3) {
      weights = [];
      i = -1; while (++i < n) weights[i] = 1;
    }

    science_stats_loessFiniteReal(xval);
    science_stats_loessFiniteReal(yval);
    science_stats_loessFiniteReal(weights);
    science_stats_loessStrictlyIncreasing(xval);

    if (n == 1) return [yval[0]];
    if (n == 2) return [yval[0], yval[1]];

    var bandwidthInPoints = Math.floor(bandwidth * n);

    if (bandwidthInPoints < 2) throw {error: "Bandwidth too small."};

    var res = [],
        residuals = [],
        robustnessWeights = [];

    // Do an initial fit and 'robustnessIters' robustness iterations.
    // This is equivalent to doing 'robustnessIters+1' robustness iterations
    // starting with all robustness weights set to 1.
    i = -1; while (++i < n) {
      res[i] = 0;
      residuals[i] = 0;
      robustnessWeights[i] = 1;
    }

    var iter = -1;
    while (++iter <= robustnessIters) {
      var bandwidthInterval = [0, bandwidthInPoints - 1];
      // At each x, compute a local weighted linear regression
      var x;
      i = -1; while (++i < n) {
        x = xval[i];

        // Find out the interval of source points on which
        // a regression is to be made.
        if (i > 0) {
          science_stats_loessUpdateBandwidthInterval(xval, weights, i, bandwidthInterval);
        }

        var ileft = bandwidthInterval[0],
            iright = bandwidthInterval[1];

        // Compute the point of the bandwidth interval that is
        // farthest from x
        var edge = (xval[i] - xval[ileft]) > (xval[iright] - xval[i]) ? ileft : iright;

        // Compute a least-squares linear fit weighted by
        // the product of robustness weights and the tricube
        // weight function.
        // See http://en.wikipedia.org/wiki/Linear_regression
        // (section "Univariate linear case")
        // and http://en.wikipedia.org/wiki/Weighted_least_squares
        // (section "Weighted least squares")
        var sumWeights = 0,
            sumX = 0,
            sumXSquared = 0,
            sumY = 0,
            sumXY = 0,
            denom = Math.abs(1 / (xval[edge] - x));

        for (var k = ileft; k <= iright; ++k) {
          var xk   = xval[k],
              yk   = yval[k],
              dist = k < i ? x - xk : xk - x,
              w    = science_stats_loessTricube(dist * denom) * robustnessWeights[k] * weights[k],
              xkw  = xk * w;
          sumWeights += w;
          sumX += xkw;
          sumXSquared += xk * xkw;
          sumY += yk * w;
          sumXY += yk * xkw;
        }

        var meanX = sumX / sumWeights,
            meanY = sumY / sumWeights,
            meanXY = sumXY / sumWeights,
            meanXSquared = sumXSquared / sumWeights;

        var beta = (Math.sqrt(Math.abs(meanXSquared - meanX * meanX)) < accuracy)
            ? 0 : ((meanXY - meanX * meanY) / (meanXSquared - meanX * meanX));

        var alpha = meanY - beta * meanX;

        res[i] = beta * x + alpha;
        residuals[i] = Math.abs(yval[i] - res[i]);
      }

      // No need to recompute the robustness weights at the last
      // iteration, they won't be needed anymore
      if (iter === robustnessIters) {
        break;
      }

      // Recompute the robustness weights.

      // Find the median residual.
      var sortedResiduals = residuals.slice();
      sortedResiduals.sort();
      var medianResidual = sortedResiduals[Math.floor(n / 2)];

      if (Math.abs(medianResidual) < accuracy)
        break;

      var arg,
          w;
      i = -1; while (++i < n) {
        arg = residuals[i] / (6 * medianResidual);
        robustnessWeights[i] = (arg >= 1) ? 0 : ((w = 1 - arg * arg) * w);
      }
    }

    return res;
  }

  smooth.bandwidth = function(x) {
    if (!arguments.length) return x;
    bandwidth = x;
    return smooth;
  };

  smooth.robustnessIterations = function(x) {
    if (!arguments.length) return x;
    robustnessIters = x;
    return smooth;
  };

  smooth.accuracy = function(x) {
    if (!arguments.length) return x;
    accuracy = x;
    return smooth;
  };

  return smooth;
};

function science_stats_loessFiniteReal(values) {
  var n = values.length,
      i = -1;

  while (++i < n) if (!isFinite(values[i])) return false;

  return true;
}

function science_stats_loessStrictlyIncreasing(xval) {
  var n = xval.length,
      i = 0;

  while (++i < n) if (xval[i - 1] >= xval[i]) return false;

  return true;
}

// Compute the tricube weight function.
// http://en.wikipedia.org/wiki/Local_regression#Weight_function
function science_stats_loessTricube(x) {
  return (x = 1 - x * x * x) * x * x;
}

// Given an index interval into xval that embraces a certain number of
// points closest to xval[i-1], update the interval so that it embraces
// the same number of points closest to xval[i], ignoring zero weights.
function science_stats_loessUpdateBandwidthInterval(
  xval, weights, i, bandwidthInterval) {

  var left = bandwidthInterval[0],
      right = bandwidthInterval[1];

  // The right edge should be adjusted if the next point to the right
  // is closer to xval[i] than the leftmost point of the current interval
  var nextRight = science_stats_loessNextNonzero(weights, right);
  if ((nextRight < xval.length) && (xval[nextRight] - xval[i]) < (xval[i] - xval[left])) {
    var nextLeft = science_stats_loessNextNonzero(weights, left);
    bandwidthInterval[0] = nextLeft;
    bandwidthInterval[1] = nextRight;
  }
}

function science_stats_loessNextNonzero(weights, i) {
  var j = i + 1;
  while (j < weights.length && weights[j] === 0) j++;
  return j;
}
// Welford's algorithm.
science.stats.mean = function(x) {
  var n = x.length;
  if (n === 0) return NaN;
  var m = 0,
      i = -1;
  while (++i < n) m += (x[i] - m) / (i + 1);
  return m;
};
science.stats.median = function(x) {
  return science.stats.quantiles(x, [.5])[0];
};
science.stats.mode = function(x) {
  x = x.slice().sort(science.ascending);
  var mode,
      n = x.length,
      i = -1,
      l = i,
      last = null,
      max = 0,
      tmp,
      v;
  while (++i < n) {
    if ((v = x[i]) !== last) {
      if ((tmp = i - l) > max) {
        max = tmp;
        mode = last;
      }
      last = v;
      l = i;
    }
  }
  return mode;
};
// Uses R's quantile algorithm type=7.
science.stats.quantiles = function(d, quantiles) {
  d = d.slice().sort(science.ascending);
  var n_1 = d.length - 1;
  return quantiles.map(function(q) {
    if (q === 0) return d[0];
    else if (q === 1) return d[n_1];

    var index = 1 + q * n_1,
        lo = Math.floor(index),
        h = index - lo,
        a = d[lo - 1];

    return h === 0 ? a : a + h * (d[lo] - a);
  });
};
// Unbiased estimate of a sample's variance.
// Also known as the sample variance, where the denominator is n - 1.
science.stats.variance = function(x) {
  var n = x.length;
  if (n < 1) return NaN;
  if (n === 1) return 0;
  var mean = science.stats.mean(x),
      i = -1,
      s = 0;
  while (++i < n) {
    var v = x[i] - mean;
    s += v * v;
  }
  return s / (n - 1);
};
})()
-												Move d3.stats to external lib, science.stats.js!

											
										
										
											2011-07-06 21:57:14 +04:00
+								(function(){science.stats = {};
-												Add bandwidth selectors for Gaussian kernels.

Thanks @jfirebaugh for the suggestion!  Implementation is based on that of R's
stats module:
<http://stat.ethz.ch/R-manual/R-patched/library/stats/html/bandwidth.html>

This brings a few additional operators to `d3.stats`, including a fairly
rudimentary implementation of variance, discussed at
<http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Two-pass_algorithm>.

											
										
										
											2011-05-10 22:06:24 +04:00
+								// Bandwidth selectors for Gaussian kernels.
 								// Based on R's implementations in `stats.bw`.
-												Move d3.stats to external lib, science.stats.js!

											
										
										
											2011-07-06 21:57:14 +04:00
+								science.stats.bandwidth = {
-												Add bandwidth selectors for Gaussian kernels.

Thanks @jfirebaugh for the suggestion!  Implementation is based on that of R's
stats module:
<http://stat.ethz.ch/R-manual/R-patched/library/stats/html/bandwidth.html>

This brings a few additional operators to `d3.stats`, including a fairly
rudimentary implementation of variance, discussed at
<http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Two-pass_algorithm>.

											
										
										
											2011-05-10 22:06:24 +04:00
 								  // Silverman, B. W. (1986) Density Estimation. London: Chapman and Hall.
 								  nrd0: function(x) {
-												Move d3.stats to external lib, science.stats.js!

											
										
										
											2011-07-06 21:57:14 +04:00
+								    var hi = Math.sqrt(science.stats.variance(x));
 								    if (!(lo = Math.min(hi, science.stats.iqr(x) / 1.34)))
-												Add bandwidth selectors for Gaussian kernels.

Thanks @jfirebaugh for the suggestion!  Implementation is based on that of R's
stats module:
<http://stat.ethz.ch/R-manual/R-patched/library/stats/html/bandwidth.html>

This brings a few additional operators to `d3.stats`, including a fairly
rudimentary implementation of variance, discussed at
<http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Two-pass_algorithm>.

											
										
										
											2011-05-10 22:06:24 +04:00
+								      (lo = hi) || (lo = Math.abs(x[1])) || (lo = 1);
 								    return .9 * lo * Math.pow(x.length, -.2);
 								  },
 								  // Scott, D. W. (1992) Multivariate Density Estimation: Theory, Practice, and
 								  // Visualization. Wiley.
 								  nrd: function(x) {
-												Move d3.stats to external lib, science.stats.js!

											
										
										
											2011-07-06 21:57:14 +04:00
+								    var h = science.stats.iqr(x) / 1.34;
 								    return 1.06 * Math.min(Math.sqrt(science.stats.variance(x)), h)
-												Add bandwidth selectors for Gaussian kernels.

Thanks @jfirebaugh for the suggestion!  Implementation is based on that of R's
stats module:
<http://stat.ethz.ch/R-manual/R-patched/library/stats/html/bandwidth.html>

This brings a few additional operators to `d3.stats`, including a fairly
rudimentary implementation of variance, discussed at
<http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Two-pass_algorithm>.

											
										
										
											2011-05-10 22:06:24 +04:00
+								      * Math.pow(x.length, -1/5);
 								  }
 								};
-												Update science.js to 1.7.0: fix science.stats.kde.

											
										
										
											2011-09-10 02:37:59 +04:00
+								science.stats.distance = {
 								  euclidean: function(a, b) {
 								    var n = a.length,
 								        i = -1,
 								        s = 0,
 								        x;
 								    while (++i < n) {
 								      x = a[i] - b[i];
 								      s += x * x;
 								    }
 								    return Math.sqrt(s);
 								  },
 								  manhattan: function(a, b) {
 								    var n = a.length,
 								        i = -1,
 								        s = 0;
 								    while (++i < n) s += Math.abs(a[i] - b[i]);
 								    return s;
 								  },
 								  minkowski: function(p) {
 								    return function(a, b) {
 								      var n = a.length,
 								          i = -1,
 								          s = 0;
 								      while (++i < n) s += Math.pow(Math.abs(a[i] - b[i]), p);
 								      return Math.pow(s, 1 / p);
 								    };
 								  },
 								  chebyshev: function(a, b) {
 								    var n = a.length,
 								        i = -1,
 								        max = 0,
 								        x;
 								    while (++i < n) {
 								      x = Math.abs(a[i] - b[i]);
 								      if (x > max) max = x;
 								    }
 								    return max;
 								  },
 								  hamming: function(a, b) {
 								    var n = a.length,
 								        i = -1,
 								        d = 0;
 								    while (++i < n) if (a[i] !== b[i]) d++;
 								    return d;
 								  },
 								  jaccard: function(a, b) {
 								    var n = a.length,
 								        i = -1,
 								        s = 0;
 								    while (++i < n) if (a[i] === b[i]) s++;
 								    return s / n;
 								  },
 								  braycurtis: function(a, b) {
 								    var n = a.length,
 								        i = -1,
 								        s0 = 0,
 								        s1 = 0,
 								        ai,
 								        bi;
 								    while (++i < n) {
 								      ai = a[i];
 								      bi = b[i];
 								      s0 += Math.abs(ai - bi);
 								      s1 += Math.abs(ai + bi);
 								    }
 								    return s0 / s1;
 								  }
 								};
 								// Based on implementation in http://picomath.org/.
 								science.stats.erf = function(x) {
 								  var a1 =  0.254829592,
 								      a2 = -0.284496736,
 								      a3 =  1.421413741,
 								      a4 = -1.453152027,
 								      a5 =  1.061405429,
 								      p  =  0.3275911;
 								  // Save the sign of x
 								  var sign = x < 0 ? -1 : 1;
 								  if (x < 0) {
 								    sign = -1;
 								    x = -x;
 								  }
 								  // A&S formula 7.1.26
 								  var t = 1 / (1 + p * x);
 								  return sign * (
 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1)
 								    * t * Math.exp(-x * x));
 								};
 								science.stats.phi = function(x) {
 								  return .5 * (1 + science.stats.erf(x / Math.SQRT2));
 								};
-												Add kernel density estimation.

Kernel density estimation is provided via `d3.stats.kde` and the kernel is
configurable; several common kernels are provided in `d3.stats.kernel`.

											
										
										
											2011-05-10 02:58:54 +04:00
+								// See <http://en.wikipedia.org/wiki/Kernel_(statistics)>.
-												Move d3.stats to external lib, science.stats.js!

											
										
										
											2011-07-06 21:57:14 +04:00
+								science.stats.kernel = {
-												Add kernel density estimation.

Kernel density estimation is provided via `d3.stats.kde` and the kernel is
configurable; several common kernels are provided in `d3.stats.kernel`.

											
										
										
											2011-05-10 02:58:54 +04:00
+								  uniform: function(u) {
 								    if (u <= 1 && u >= -1) return .5;
 								    return 0;
 								  },
 								  triangular: function(u) {
 								    if (u <= 1 && u >= -1) return 1 - Math.abs(u);
 								    return 0;
 								  },
 								  epanechnikov: function(u) {
 								    if (u <= 1 && u >= -1) return .75 * (1 - u * u);
 								    return 0;
 								  },
 								  quartic: function(u) {
 								    if (u <= 1 && u >= -1) {
 								      var tmp = 1 - u * u;
 								      return (15 / 16) * tmp * tmp;
 								    }
 								    return 0;
 								  },
 								  triweight: function(u) {
 								    if (u <= 1 && u >= -1) {
 								      var tmp = 1 - u * u;
 								      return (35 / 32) * tmp * tmp * tmp;
 								    }
 								    return 0;
 								  },
 								  gaussian: function(u) {
 								    return 1 / Math.sqrt(2 * Math.PI) * Math.exp(-.5 * u * u);
 								  },
 								  cosine: function(u) {
 								    if (u <= 1 && u >= -1) return Math.PI / 4 * Math.cos(Math.PI / 2 * u);
 								    return 0;
 								  }
 								};
 								// http://exploringdata.net/den_trac.htm
-												Move d3.stats to external lib, science.stats.js!

											
										
										
											2011-07-06 21:57:14 +04:00
+								science.stats.kde = function() {
 								  var kernel = science.stats.kernel.gaussian,
-												Add kernel density estimation.

Kernel density estimation is provided via `d3.stats.kde` and the kernel is
configurable; several common kernels are provided in `d3.stats.kernel`.

											
										
										
											2011-05-10 02:58:54 +04:00
+								      sample = [],
-												Move d3.stats to external lib, science.stats.js!

											
										
										
											2011-07-06 21:57:14 +04:00
+								      bandwidth = science.stats.bandwidth.nrd;
-												Add kernel density estimation.

Kernel density estimation is provided via `d3.stats.kde` and the kernel is
configurable; several common kernels are provided in `d3.stats.kernel`.

											
										
										
											2011-05-10 02:58:54 +04:00
-												Add bandwidth selectors for Gaussian kernels.

Thanks @jfirebaugh for the suggestion!  Implementation is based on that of R's
stats module:
<http://stat.ethz.ch/R-manual/R-patched/library/stats/html/bandwidth.html>

This brings a few additional operators to `d3.stats`, including a fairly
rudimentary implementation of variance, discussed at
<http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Two-pass_algorithm>.

											
										
										
											2011-05-10 22:06:24 +04:00
+								  function kde(points, i) {
 								    var bw = bandwidth.call(this, sample);
-												Add kernel density estimation.

Kernel density estimation is provided via `d3.stats.kde` and the kernel is
configurable; several common kernels are provided in `d3.stats.kernel`.

											
										
										
											2011-05-10 02:58:54 +04:00
+								    return points.map(function(x) {
 								      var i = -1,
 								          y = 0,
 								          n = sample.length;
 								      while (++i < n) {
-												Add bandwidth selectors for Gaussian kernels.

Thanks @jfirebaugh for the suggestion!  Implementation is based on that of R's
stats module:
<http://stat.ethz.ch/R-manual/R-patched/library/stats/html/bandwidth.html>

This brings a few additional operators to `d3.stats`, including a fairly
rudimentary implementation of variance, discussed at
<http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Two-pass_algorithm>.

											
										
										
											2011-05-10 22:06:24 +04:00
+								        y += kernel((x - sample[i]) / bw);
-												Add kernel density estimation.

Kernel density estimation is provided via `d3.stats.kde` and the kernel is
configurable; several common kernels are provided in `d3.stats.kernel`.

											
										
										
											2011-05-10 02:58:54 +04:00
+								      }
-												Add bandwidth selectors for Gaussian kernels.

Thanks @jfirebaugh for the suggestion!  Implementation is based on that of R's
stats module:
<http://stat.ethz.ch/R-manual/R-patched/library/stats/html/bandwidth.html>

This brings a few additional operators to `d3.stats`, including a fairly
rudimentary implementation of variance, discussed at
<http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Two-pass_algorithm>.

											
										
										
											2011-05-10 22:06:24 +04:00
+								      return [x, y / bw / n];
-												Add kernel density estimation.

Kernel density estimation is provided via `d3.stats.kde` and the kernel is
configurable; several common kernels are provided in `d3.stats.kernel`.

											
										
										
											2011-05-10 02:58:54 +04:00
+								    });
 								  }
 								  kde.kernel = function(x) {
 								    if (!arguments.length) return kernel;
 								    kernel = x;
 								    return kde;
 								  };
 								  kde.sample = function(x) {
 								    if (!arguments.length) return sample;
 								    sample = x;
 								    return kde;
 								  };
-												Add bandwidth selectors for Gaussian kernels.

Thanks @jfirebaugh for the suggestion!  Implementation is based on that of R's
stats module:
<http://stat.ethz.ch/R-manual/R-patched/library/stats/html/bandwidth.html>

This brings a few additional operators to `d3.stats`, including a fairly
rudimentary implementation of variance, discussed at
<http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Two-pass_algorithm>.

											
										
										
											2011-05-10 22:06:24 +04:00
+								  kde.bandwidth = function(x) {
 								    if (!arguments.length) return bandwidth;
-												Move d3.stats to external lib, science.stats.js!

											
										
										
											2011-07-06 21:57:14 +04:00
+								    bandwidth = science.functor(x);
-												Add kernel density estimation.

Kernel density estimation is provided via `d3.stats.kde` and the kernel is
configurable; several common kernels are provided in `d3.stats.kernel`.

											
										
										
											2011-05-10 02:58:54 +04:00
+								    return kde;
 								  };
 								  return kde;
 								};
-												Update science.js to 1.7.0: fix science.stats.kde.

											
										
										
											2011-09-10 02:37:59 +04:00
+								// Based on figue implementation by Jean-Yves Delort.
 								// http://code.google.com/p/figue/
 								science.stats.kmeans = function() {
 								  var distance = science.stats.distance.euclidean,
 								      maxIterations = 1000,
 								      k = 1;
 								  function kmeans(vectors) {
 								    var n = vectors.length,
 								        assignments = [],
 								        clusterSizes = [],
 								        repeat = 1,
 								        iterations = 0,
 								        centroids = science_stats_kmeansRandom(k, vectors),
 								        newCentroids,
 								        i,
 								        j,
 								        x,
 								        d,
 								        min,
 								        best;
 								    while (repeat && iterations < maxIterations) {
 								      // Assignment step.
 								      j = -1; while (++j < k) {
 								        clusterSizes[j] = 0;
 								      }
 								      i = -1; while (++i < n) {
 								        x = vectors[i];
 								        min = Infinity;
 								        j = -1; while (++j < k) {
 								          d = distance.call(this, centroids[j], x);
 								          if (d < min) {
 								            min = d;
 								            best = j;
 								          }
 								        }
 								        clusterSizes[assignments[i] = best]++;
 								      }
 								      // Update centroids step.
 								      newCentroids = [];
 								      i = -1; while (++i < n) {
 								        x = assignments[i];
 								        d = newCentroids[x];
 								        if (d == null) newCentroids[x] = vectors[i].slice();
 								        else {
 								          j = -1; while (++j < d.length) {
 								            d[j] += vectors[i][j];
 								          }
 								        }
 								      }
 								      j = -1; while (++j < k) {
 								        x = newCentroids[j];
 								        d = 1 / clusterSizes[j];
 								        i = -1; while (++i < x.length) x[i] *= d;
 								      }
 								      // Check convergence.
 								      repeat = 0;
 								      j = -1; while (++j < k) {
 								        if (!science_stats_kmeansCompare(newCentroids[j], centroids[j])) {
 								          repeat = 1;
 								          break;
 								        }
 								      }
 								      centroids = newCentroids;
 								      iterations++;
 								    }
 								    return {assignments: assignments, centroids: centroids};
 								  }
 								  kmeans.k = function(x) {
 								    if (!arguments.length) return k;
 								    k = x;
 								    return kmeans;
 								  };
 								  kmeans.distance = function(x) {
 								    if (!arguments.length) return distance;
 								    distance = x;
 								    return kmeans;
 								  };
 								  return kmeans;
 								};
 								function science_stats_kmeansCompare(a, b) {
 								  if (!a || !b || a.length !== b.length) return false;
 								  var n = a.length,
 								      i = -1;
 								  while (++i < n) if (a[i] !== b[i]) return false;
 								  return true;
 								}
 								// Returns an array of k distinct vectors randomly selected from the input
 								// array of vectors. Returns null if k > n or if there are less than k distinct
 								// objects in vectors.
 								function science_stats_kmeansRandom(k, vectors) {
 								  var n = vectors.length;
 								  if (k > n) return null;
 								  var selected_vectors = [];
 								  var selected_indices = [];
 								  var tested_indices = {};
 								  var tested = 0;
 								  var selected = 0;
 								  var i,
 								      vector,
 								      select;
 								  while (selected < k) {
 								    if (tested === n) return null;
 								    var random_index = Math.floor(Math.random() * n);
 								    if (random_index in tested_indices) continue;
 								    tested_indices[random_index] = 1;
 								    tested++;
 								    vector = vectors[random_index];
 								    select = true;
 								    for (i = 0; i < selected; i++) {
 								      if (science_stats_kmeansCompare(vector, selected_vectors[i])) {
 								        select = false;
 								        break;
 								      }
 								    }
 								    if (select) {
 								      selected_vectors[selected] = vector;
 								      selected_indices[selected] = random_index;
 								      selected++;
 								    }
 								  }
 								  return selected_vectors;
 								}
 								science.stats.hcluster = function() {
 								  var distance = science.stats.distance.euclidean,
 								      linkage = "simple"; // simple, complete or average
 								  function hcluster(vectors) {
 								    var n = vectors.length,
 								        dMin = [],
 								        cSize = [],
 								        distMatrix = [],
 								        clusters = [],
 								        c1,
 								        c2,
 								        c1Cluster,
 								        c2Cluster,
 								        p,
 								        root,
 								        i,
 								        j;
 								    // Initialise distance matrix and vector of closest clusters.
 								    i = -1; while (++i < n) {
 								      dMin[i] = 0;
 								      distMatrix[i] = [];
 								      j = -1; while (++j < n) {
 								        distMatrix[i][j] = i === j ? Infinity : distance(vectors[i] , vectors[j]);
 								        if (distMatrix[i][dMin[i]] > distMatrix[i][j]) dMin[i] = j;
 								      }
 								    }
 								    // create leaves of the tree
 								    i = -1; while (++i < n) {
 								      clusters[i] = [];
 								      clusters[i][0] = {
 								        left: null,
 								        right: null,
 								        dist: 0,
 								        centroid: vectors[i],
 								        size: 1,
 								        depth: 0
 								      };
 								      cSize[i] = 1;
 								    }
 								    // Main loop
 								    for (p = 0; p < n-1; p++) {
 								      // find the closest pair of clusters
 								      c1 = 0;
 								      for (i = 0; i < n; i++) {
 								        if (distMatrix[i][dMin[i]] < distMatrix[c1][dMin[c1]]) c1 = i;
 								      }
 								      c2 = dMin[c1];
 								      // create node to store cluster info
 								      c1Cluster = clusters[c1][0];
 								      c2Cluster = clusters[c2][0];
 								      newCluster = {
 								        left: c1Cluster,
 								        right: c2Cluster,
 								        dist: distMatrix[c1][c2],
 								        centroid: calculateCentroid(c1Cluster.size, c1Cluster.centroid,
 								          c2Cluster.size, c2Cluster.centroid),
 								        size: c1Cluster.size + c2Cluster.size,
 								        depth: 1 + Math.max(c1Cluster.depth, c2Cluster.depth)
 								      };
 								      clusters[c1].splice(0, 0, newCluster);
 								      cSize[c1] += cSize[c2];
 								      // overwrite row c1 with respect to the linkage type
 								      for (j = 0; j < n; j++) {
 								        switch (linkage) {
 								          case "single":
 								            if (distMatrix[c1][j] > distMatrix[c2][j])
 								              distMatrix[j][c1] = distMatrix[c1][j] = distMatrix[c2][j];
 								            break;
 								          case "complete":
 								            if (distMatrix[c1][j] < distMatrix[c2][j])
 								              distMatrix[j][c1] = distMatrix[c1][j] = distMatrix[c2][j];
 								            break;
 								          case "average":
 								            distMatrix[j][c1] = distMatrix[c1][j] = (cSize[c1] * distMatrix[c1][j] + cSize[c2] * distMatrix[c2][j]) / (cSize[c1] + cSize[j]);
 								            break;
 								        }
 								      }
 								      distMatrix[c1][c1] = Infinity;
 								      // infinity out old row c2 and column c2
 								      for (i = 0; i < n; i++)
 								        distMatrix[i][c2] = distMatrix[c2][i] = Infinity;
 								      // update dmin and replace ones that previous pointed to c2 to point to c1
 								      for (j = 0; j < n; j++) {
 								        if (dMin[j] == c2) dMin[j] = c1;
 								        if (distMatrix[c1][j] < distMatrix[c1][dMin[c1]]) dMin[c1] = j;
 								      }
 								      // keep track of the last added cluster
 								      root = newCluster;
 								    }
 								    return root;
 								  }
 								  hcluster.distance = function(x) {
 								    if (!arguments.length) return distance;
 								    distance = x;
 								    return hcluster;
 								  };
 								  return hcluster;
 								};
 								function calculateCentroid(c1Size, c1Centroid, c2Size, c2Centroid) {
 								  var newCentroid = [],
 								      newSize = c1Size + c2Size,
 								      n = c1Centroid.length,
 								      i = -1;
 								  while (++i < n) {
 								    newCentroid[i] = (c1Size * c1Centroid[i] + c2Size * c2Centroid[i]) / newSize;
 								  }
 								  return newCentroid;
 								}
-												Move d3.stats to external lib, science.stats.js!

											
										
										
											2011-07-06 21:57:14 +04:00
+								science.stats.iqr = function(x) {
 								  var quartiles = science.stats.quantiles(x, [.25, .75]);
-												Add bandwidth selectors for Gaussian kernels.

Thanks @jfirebaugh for the suggestion!  Implementation is based on that of R's
stats module:
<http://stat.ethz.ch/R-manual/R-patched/library/stats/html/bandwidth.html>

This brings a few additional operators to `d3.stats`, including a fairly
rudimentary implementation of variance, discussed at
<http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Two-pass_algorithm>.

											
										
										
											2011-05-10 22:06:24 +04:00
+								  return quartiles[1] - quartiles[0];
 								};
-												Update science.js to 1.7.0: fix science.stats.kde.

											
										
										
											2011-09-10 02:37:59 +04:00
+								// Based on org.apache.commons.math.analysis.interpolation.LoessInterpolator
 								// from http://commons.apache.org/math/
 								science.stats.loess = function() {
 								  var bandwidth = .3,
 								      robustnessIters = 2,
 								      accuracy = 1e-12;
 								  function smooth(xval, yval, weights) {
 								    var n = xval.length,
 								        i;
 								    if (n !== yval.length) throw {error: "Mismatched array lengths"};
 								    if (n == 0) throw {error: "At least one point required."};
 								    if (arguments.length < 3) {
 								      weights = [];
 								      i = -1; while (++i < n) weights[i] = 1;
 								    }
 								    science_stats_loessFiniteReal(xval);
 								    science_stats_loessFiniteReal(yval);
 								    science_stats_loessFiniteReal(weights);
 								    science_stats_loessStrictlyIncreasing(xval);
 								    if (n == 1) return [yval[0]];
 								    if (n == 2) return [yval[0], yval[1]];
 								    var bandwidthInPoints = Math.floor(bandwidth * n);
 								    if (bandwidthInPoints < 2) throw {error: "Bandwidth too small."};
 								    var res = [],
 								        residuals = [],
 								        robustnessWeights = [];
 								    // Do an initial fit and 'robustnessIters' robustness iterations.
 								    // This is equivalent to doing 'robustnessIters+1' robustness iterations
 								    // starting with all robustness weights set to 1.
 								    i = -1; while (++i < n) {
 								      res[i] = 0;
 								      residuals[i] = 0;
 								      robustnessWeights[i] = 1;
 								    }
 								    var iter = -1;
 								    while (++iter <= robustnessIters) {
 								      var bandwidthInterval = [0, bandwidthInPoints - 1];
 								      // At each x, compute a local weighted linear regression
 								      var x;
 								      i = -1; while (++i < n) {
 								        x = xval[i];
 								        // Find out the interval of source points on which
 								        // a regression is to be made.
 								        if (i > 0) {
 								          science_stats_loessUpdateBandwidthInterval(xval, weights, i, bandwidthInterval);
 								        }
 								        var ileft = bandwidthInterval[0],
 								            iright = bandwidthInterval[1];
 								        // Compute the point of the bandwidth interval that is
 								        // farthest from x
 								        var edge = (xval[i] - xval[ileft]) > (xval[iright] - xval[i]) ? ileft : iright;
 								        // Compute a least-squares linear fit weighted by
 								        // the product of robustness weights and the tricube
 								        // weight function.
 								        // See http://en.wikipedia.org/wiki/Linear_regression
 								        // (section "Univariate linear case")
 								        // and http://en.wikipedia.org/wiki/Weighted_least_squares
 								        // (section "Weighted least squares")
 								        var sumWeights = 0,
 								            sumX = 0,
 								            sumXSquared = 0,
 								            sumY = 0,
 								            sumXY = 0,
 								            denom = Math.abs(1 / (xval[edge] - x));
 								        for (var k = ileft; k <= iright; ++k) {
 								          var xk   = xval[k],
 								              yk   = yval[k],
 								              dist = k < i ? x - xk : xk - x,
 								              w    = science_stats_loessTricube(dist * denom) * robustnessWeights[k] * weights[k],
 								              xkw  = xk * w;
 								          sumWeights += w;
 								          sumX += xkw;
 								          sumXSquared += xk * xkw;
 								          sumY += yk * w;
 								          sumXY += yk * xkw;
 								        }
 								        var meanX = sumX / sumWeights,
 								            meanY = sumY / sumWeights,
 								            meanXY = sumXY / sumWeights,
 								            meanXSquared = sumXSquared / sumWeights;
 								        var beta = (Math.sqrt(Math.abs(meanXSquared - meanX * meanX)) < accuracy)
 								            ? 0 : ((meanXY - meanX * meanY) / (meanXSquared - meanX * meanX));
 								        var alpha = meanY - beta * meanX;
 								        res[i] = beta * x + alpha;
 								        residuals[i] = Math.abs(yval[i] - res[i]);
 								      }
 								      // No need to recompute the robustness weights at the last
 								      // iteration, they won't be needed anymore
 								      if (iter === robustnessIters) {
 								        break;
 								      }
 								      // Recompute the robustness weights.
 								      // Find the median residual.
 								      var sortedResiduals = residuals.slice();
 								      sortedResiduals.sort();
 								      var medianResidual = sortedResiduals[Math.floor(n / 2)];
 								      if (Math.abs(medianResidual) < accuracy)
 								        break;
 								      var arg,
 								          w;
 								      i = -1; while (++i < n) {
 								        arg = residuals[i] / (6 * medianResidual);
 								        robustnessWeights[i] = (arg >= 1) ? 0 : ((w = 1 - arg * arg) * w);
 								      }
 								    }
 								    return res;
 								  }
 								  smooth.bandwidth = function(x) {
 								    if (!arguments.length) return x;
 								    bandwidth = x;
 								    return smooth;
 								  };
 								  smooth.robustnessIterations = function(x) {
 								    if (!arguments.length) return x;
 								    robustnessIters = x;
 								    return smooth;
 								  };
 								  smooth.accuracy = function(x) {
 								    if (!arguments.length) return x;
 								    accuracy = x;
 								    return smooth;
 								  };
 								  return smooth;
 								};
 								function science_stats_loessFiniteReal(values) {
 								  var n = values.length,
 								      i = -1;
 								  while (++i < n) if (!isFinite(values[i])) return false;
 								  return true;
 								}
 								function science_stats_loessStrictlyIncreasing(xval) {
 								  var n = xval.length,
 								      i = 0;
 								  while (++i < n) if (xval[i - 1] >= xval[i]) return false;
 								  return true;
 								}
 								// Compute the tricube weight function.
 								// http://en.wikipedia.org/wiki/Local_regression#Weight_function
 								function science_stats_loessTricube(x) {
 								  return (x = 1 - x * x * x) * x * x;
 								}
 								// Given an index interval into xval that embraces a certain number of
 								// points closest to xval[i-1], update the interval so that it embraces
 								// the same number of points closest to xval[i], ignoring zero weights.
 								function science_stats_loessUpdateBandwidthInterval(
 								  xval, weights, i, bandwidthInterval) {
 								  var left = bandwidthInterval[0],
 								      right = bandwidthInterval[1];
 								  // The right edge should be adjusted if the next point to the right
 								  // is closer to xval[i] than the leftmost point of the current interval
 								  var nextRight = science_stats_loessNextNonzero(weights, right);
 								  if ((nextRight < xval.length) && (xval[nextRight] - xval[i]) < (xval[i] - xval[left])) {
 								    var nextLeft = science_stats_loessNextNonzero(weights, left);
 								    bandwidthInterval[0] = nextLeft;
 								    bandwidthInterval[1] = nextRight;
 								  }
 								}
 								function science_stats_loessNextNonzero(weights, i) {
 								  var j = i + 1;
 								  while (j < weights.length && weights[j] === 0) j++;
 								  return j;
 								}
-												Use Welford's algorithm for d3.stats.mean.

Thanks, @jfirebaugh!

											
										
										
											2011-05-11 19:42:24 +04:00
+								// Welford's algorithm.
-												Move d3.stats to external lib, science.stats.js!

											
										
										
											2011-07-06 21:57:14 +04:00
+								science.stats.mean = function(x) {
-												Return NaN for d3.stats.mean([]).

Also added some unit tests for d3.stats.mean.

											
										
										
											2011-06-07 02:51:05 +04:00
+								  var n = x.length;
 								  if (n === 0) return NaN;
-												Use Welford's algorithm for d3.stats.mean.

Thanks, @jfirebaugh!

											
										
										
											2011-05-11 19:42:24 +04:00
+								  var m = 0,
 								      i = -1;
 								  while (++i < n) m += (x[i] - m) / (i + 1);
 								  return m;
 								};
-												Move d3.stats to external lib, science.stats.js!

											
										
										
											2011-07-06 21:57:14 +04:00
+								science.stats.median = function(x) {
 								  return science.stats.quantiles(x, [.5])[0];
-												Add d3.sum and d3.stats.median.

											
										
										
											2011-05-11 11:35:12 +04:00
+								};
-												Move d3.stats to external lib, science.stats.js!

											
										
										
											2011-07-06 21:57:14 +04:00
+								science.stats.mode = function(x) {
 								  x = x.slice().sort(science.ascending);
-												Add d3.stats.mode.

											
										
										
											2011-05-11 13:03:39 +04:00
+								  var mode,
 								      n = x.length,
 								      i = -1,
 								      l = i,
 								      last = null,
 								      max = 0,
 								      tmp,
 								      v;
 								  while (++i < n) {
 								    if ((v = x[i]) !== last) {
 								      if ((tmp = i - l) > max) {
 								        max = tmp;
 								        mode = last;
 								      }
 								      last = v;
 								      l = i;
 								    }
 								  }
 								  return mode;
 								};
-												Fix d3.stats.quantiles and add more tests.

											
										
										
											2011-06-06 21:48:00 +04:00
+								// Uses R's quantile algorithm type=7.
-												Move d3.stats to external lib, science.stats.js!

											
										
										
											2011-07-06 21:57:14 +04:00
+								science.stats.quantiles = function(d, quantiles) {
 								  d = d.slice().sort(science.ascending);
-												Fix d3.stats.quantiles and add more tests.

											
										
										
											2011-06-06 21:48:00 +04:00
+								  var n_1 = d.length - 1;
-												Add bandwidth selectors for Gaussian kernels.

Thanks @jfirebaugh for the suggestion!  Implementation is based on that of R's
stats module:
<http://stat.ethz.ch/R-manual/R-patched/library/stats/html/bandwidth.html>

This brings a few additional operators to `d3.stats`, including a fairly
rudimentary implementation of variance, discussed at
<http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Two-pass_algorithm>.

											
										
										
											2011-05-10 22:06:24 +04:00
+								  return quantiles.map(function(q) {
-												Fix d3.stats.quantiles and add more tests.

											
										
										
											2011-06-06 21:48:00 +04:00
+								    if (q === 0) return d[0];
 								    else if (q === 1) return d[n_1];
 								    var index = 1 + q * n_1,
 								        lo = Math.floor(index),
 								        h = index - lo,
 								        a = d[lo - 1];
 								    return h === 0 ? a : a + h * (d[lo] - a);
-												Add bandwidth selectors for Gaussian kernels.

Thanks @jfirebaugh for the suggestion!  Implementation is based on that of R's
stats module:
<http://stat.ethz.ch/R-manual/R-patched/library/stats/html/bandwidth.html>

This brings a few additional operators to `d3.stats`, including a fairly
rudimentary implementation of variance, discussed at
<http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Two-pass_algorithm>.

											
										
										
											2011-05-10 22:06:24 +04:00
+								  });
 								};
-												Clarify d3.stats.variance.

Remove TODO comment as this seems numerically robust enough.  The same two-pass
algorithm is used in Haskell's "statistics" package.

											
										
										
											2011-05-11 19:52:37 +04:00
+								// Unbiased estimate of a sample's variance.
 								// Also known as the sample variance, where the denominator is n - 1.
-												Move d3.stats to external lib, science.stats.js!

											
										
										
											2011-07-06 21:57:14 +04:00
+								science.stats.variance = function(x) {
-												Extract stats code from Q-Q example.

											
										
										
											2011-06-06 11:57:35 +04:00
+								  var n = x.length;
 								  if (n < 1) return NaN;
 								  if (n === 1) return 0;
-												Move d3.stats to external lib, science.stats.js!

											
										
										
											2011-07-06 21:57:14 +04:00
+								  var mean = science.stats.mean(x),
-												Add bandwidth selectors for Gaussian kernels.

Thanks @jfirebaugh for the suggestion!  Implementation is based on that of R's
stats module:
<http://stat.ethz.ch/R-manual/R-patched/library/stats/html/bandwidth.html>

This brings a few additional operators to `d3.stats`, including a fairly
rudimentary implementation of variance, discussed at
<http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Two-pass_algorithm>.

											
										
										
											2011-05-10 22:06:24 +04:00
+								      i = -1,
 								      s = 0;
 								  while (++i < n) {
 								    var v = x[i] - mean;
 								    s += v * v;
 								  }
 								  return s / (n - 1);
 								};
-												Add kernel density estimation.

Kernel density estimation is provided via `d3.stats.kde` and the kernel is
configurable; several common kernels are provided in `d3.stats.kernel`.

											
										
										
											2011-05-10 02:58:54 +04:00
+								})()