2011-07-06 21:57:14 +04:00
|
|
|
|
(function(){science.stats = {};
|
2011-05-10 22:06:24 +04:00
|
|
|
|
// Bandwidth selectors for Gaussian kernels.
|
|
|
|
|
// Based on R's implementations in `stats.bw`.
|
2011-07-06 21:57:14 +04:00
|
|
|
|
science.stats.bandwidth = {
|
2011-05-10 22:06:24 +04:00
|
|
|
|
|
|
|
|
|
// Silverman, B. W. (1986) Density Estimation. London: Chapman and Hall.
|
|
|
|
|
nrd0: function(x) {
|
2011-07-06 21:57:14 +04:00
|
|
|
|
var hi = Math.sqrt(science.stats.variance(x));
|
|
|
|
|
if (!(lo = Math.min(hi, science.stats.iqr(x) / 1.34)))
|
2011-05-10 22:06:24 +04:00
|
|
|
|
(lo = hi) || (lo = Math.abs(x[1])) || (lo = 1);
|
|
|
|
|
return .9 * lo * Math.pow(x.length, -.2);
|
|
|
|
|
},
|
|
|
|
|
|
|
|
|
|
// Scott, D. W. (1992) Multivariate Density Estimation: Theory, Practice, and
|
|
|
|
|
// Visualization. Wiley.
|
|
|
|
|
nrd: function(x) {
|
2011-07-06 21:57:14 +04:00
|
|
|
|
var h = science.stats.iqr(x) / 1.34;
|
|
|
|
|
return 1.06 * Math.min(Math.sqrt(science.stats.variance(x)), h)
|
2011-05-10 22:06:24 +04:00
|
|
|
|
* Math.pow(x.length, -1/5);
|
|
|
|
|
}
|
|
|
|
|
};
|
2011-09-10 02:37:59 +04:00
|
|
|
|
science.stats.distance = {
|
|
|
|
|
euclidean: function(a, b) {
|
|
|
|
|
var n = a.length,
|
|
|
|
|
i = -1,
|
|
|
|
|
s = 0,
|
|
|
|
|
x;
|
|
|
|
|
while (++i < n) {
|
|
|
|
|
x = a[i] - b[i];
|
|
|
|
|
s += x * x;
|
|
|
|
|
}
|
|
|
|
|
return Math.sqrt(s);
|
|
|
|
|
},
|
|
|
|
|
manhattan: function(a, b) {
|
|
|
|
|
var n = a.length,
|
|
|
|
|
i = -1,
|
|
|
|
|
s = 0;
|
|
|
|
|
while (++i < n) s += Math.abs(a[i] - b[i]);
|
|
|
|
|
return s;
|
|
|
|
|
},
|
|
|
|
|
minkowski: function(p) {
|
|
|
|
|
return function(a, b) {
|
|
|
|
|
var n = a.length,
|
|
|
|
|
i = -1,
|
|
|
|
|
s = 0;
|
|
|
|
|
while (++i < n) s += Math.pow(Math.abs(a[i] - b[i]), p);
|
|
|
|
|
return Math.pow(s, 1 / p);
|
|
|
|
|
};
|
|
|
|
|
},
|
|
|
|
|
chebyshev: function(a, b) {
|
|
|
|
|
var n = a.length,
|
|
|
|
|
i = -1,
|
|
|
|
|
max = 0,
|
|
|
|
|
x;
|
|
|
|
|
while (++i < n) {
|
|
|
|
|
x = Math.abs(a[i] - b[i]);
|
|
|
|
|
if (x > max) max = x;
|
|
|
|
|
}
|
|
|
|
|
return max;
|
|
|
|
|
},
|
|
|
|
|
hamming: function(a, b) {
|
|
|
|
|
var n = a.length,
|
|
|
|
|
i = -1,
|
|
|
|
|
d = 0;
|
|
|
|
|
while (++i < n) if (a[i] !== b[i]) d++;
|
|
|
|
|
return d;
|
|
|
|
|
},
|
|
|
|
|
jaccard: function(a, b) {
|
|
|
|
|
var n = a.length,
|
|
|
|
|
i = -1,
|
|
|
|
|
s = 0;
|
|
|
|
|
while (++i < n) if (a[i] === b[i]) s++;
|
|
|
|
|
return s / n;
|
|
|
|
|
},
|
|
|
|
|
braycurtis: function(a, b) {
|
|
|
|
|
var n = a.length,
|
|
|
|
|
i = -1,
|
|
|
|
|
s0 = 0,
|
|
|
|
|
s1 = 0,
|
|
|
|
|
ai,
|
|
|
|
|
bi;
|
|
|
|
|
while (++i < n) {
|
|
|
|
|
ai = a[i];
|
|
|
|
|
bi = b[i];
|
|
|
|
|
s0 += Math.abs(ai - bi);
|
|
|
|
|
s1 += Math.abs(ai + bi);
|
|
|
|
|
}
|
|
|
|
|
return s0 / s1;
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
// Based on implementation in http://picomath.org/.
|
|
|
|
|
science.stats.erf = function(x) {
|
|
|
|
|
var a1 = 0.254829592,
|
|
|
|
|
a2 = -0.284496736,
|
|
|
|
|
a3 = 1.421413741,
|
|
|
|
|
a4 = -1.453152027,
|
|
|
|
|
a5 = 1.061405429,
|
|
|
|
|
p = 0.3275911;
|
|
|
|
|
|
|
|
|
|
// Save the sign of x
|
|
|
|
|
var sign = x < 0 ? -1 : 1;
|
|
|
|
|
if (x < 0) {
|
|
|
|
|
sign = -1;
|
|
|
|
|
x = -x;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// A&S formula 7.1.26
|
|
|
|
|
var t = 1 / (1 + p * x);
|
|
|
|
|
return sign * (
|
|
|
|
|
1 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1)
|
|
|
|
|
* t * Math.exp(-x * x));
|
|
|
|
|
};
|
|
|
|
|
science.stats.phi = function(x) {
|
|
|
|
|
return .5 * (1 + science.stats.erf(x / Math.SQRT2));
|
|
|
|
|
};
|
2011-05-10 02:58:54 +04:00
|
|
|
|
// See <http://en.wikipedia.org/wiki/Kernel_(statistics)>.
|
2011-07-06 21:57:14 +04:00
|
|
|
|
science.stats.kernel = {
|
2011-05-10 02:58:54 +04:00
|
|
|
|
uniform: function(u) {
|
|
|
|
|
if (u <= 1 && u >= -1) return .5;
|
|
|
|
|
return 0;
|
|
|
|
|
},
|
|
|
|
|
triangular: function(u) {
|
|
|
|
|
if (u <= 1 && u >= -1) return 1 - Math.abs(u);
|
|
|
|
|
return 0;
|
|
|
|
|
},
|
|
|
|
|
epanechnikov: function(u) {
|
|
|
|
|
if (u <= 1 && u >= -1) return .75 * (1 - u * u);
|
|
|
|
|
return 0;
|
|
|
|
|
},
|
|
|
|
|
quartic: function(u) {
|
|
|
|
|
if (u <= 1 && u >= -1) {
|
|
|
|
|
var tmp = 1 - u * u;
|
|
|
|
|
return (15 / 16) * tmp * tmp;
|
|
|
|
|
}
|
|
|
|
|
return 0;
|
|
|
|
|
},
|
|
|
|
|
triweight: function(u) {
|
|
|
|
|
if (u <= 1 && u >= -1) {
|
|
|
|
|
var tmp = 1 - u * u;
|
|
|
|
|
return (35 / 32) * tmp * tmp * tmp;
|
|
|
|
|
}
|
|
|
|
|
return 0;
|
|
|
|
|
},
|
|
|
|
|
gaussian: function(u) {
|
|
|
|
|
return 1 / Math.sqrt(2 * Math.PI) * Math.exp(-.5 * u * u);
|
|
|
|
|
},
|
|
|
|
|
cosine: function(u) {
|
|
|
|
|
if (u <= 1 && u >= -1) return Math.PI / 4 * Math.cos(Math.PI / 2 * u);
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
// http://exploringdata.net/den_trac.htm
|
2011-07-06 21:57:14 +04:00
|
|
|
|
science.stats.kde = function() {
|
|
|
|
|
var kernel = science.stats.kernel.gaussian,
|
2011-05-10 02:58:54 +04:00
|
|
|
|
sample = [],
|
2011-07-06 21:57:14 +04:00
|
|
|
|
bandwidth = science.stats.bandwidth.nrd;
|
2011-05-10 02:58:54 +04:00
|
|
|
|
|
2011-05-10 22:06:24 +04:00
|
|
|
|
function kde(points, i) {
|
|
|
|
|
var bw = bandwidth.call(this, sample);
|
2011-05-10 02:58:54 +04:00
|
|
|
|
return points.map(function(x) {
|
|
|
|
|
var i = -1,
|
|
|
|
|
y = 0,
|
|
|
|
|
n = sample.length;
|
|
|
|
|
while (++i < n) {
|
2011-05-10 22:06:24 +04:00
|
|
|
|
y += kernel((x - sample[i]) / bw);
|
2011-05-10 02:58:54 +04:00
|
|
|
|
}
|
2011-05-10 22:06:24 +04:00
|
|
|
|
return [x, y / bw / n];
|
2011-05-10 02:58:54 +04:00
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
kde.kernel = function(x) {
|
|
|
|
|
if (!arguments.length) return kernel;
|
|
|
|
|
kernel = x;
|
|
|
|
|
return kde;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
kde.sample = function(x) {
|
|
|
|
|
if (!arguments.length) return sample;
|
|
|
|
|
sample = x;
|
|
|
|
|
return kde;
|
|
|
|
|
};
|
|
|
|
|
|
2011-05-10 22:06:24 +04:00
|
|
|
|
kde.bandwidth = function(x) {
|
|
|
|
|
if (!arguments.length) return bandwidth;
|
2011-07-06 21:57:14 +04:00
|
|
|
|
bandwidth = science.functor(x);
|
2011-05-10 02:58:54 +04:00
|
|
|
|
return kde;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
return kde;
|
|
|
|
|
};
|
2011-09-10 02:37:59 +04:00
|
|
|
|
// Based on figue implementation by Jean-Yves Delort.
|
|
|
|
|
// http://code.google.com/p/figue/
|
|
|
|
|
science.stats.kmeans = function() {
|
|
|
|
|
var distance = science.stats.distance.euclidean,
|
|
|
|
|
maxIterations = 1000,
|
|
|
|
|
k = 1;
|
|
|
|
|
|
|
|
|
|
function kmeans(vectors) {
|
|
|
|
|
var n = vectors.length,
|
|
|
|
|
assignments = [],
|
|
|
|
|
clusterSizes = [],
|
|
|
|
|
repeat = 1,
|
|
|
|
|
iterations = 0,
|
|
|
|
|
centroids = science_stats_kmeansRandom(k, vectors),
|
|
|
|
|
newCentroids,
|
|
|
|
|
i,
|
|
|
|
|
j,
|
|
|
|
|
x,
|
|
|
|
|
d,
|
|
|
|
|
min,
|
|
|
|
|
best;
|
|
|
|
|
|
|
|
|
|
while (repeat && iterations < maxIterations) {
|
|
|
|
|
// Assignment step.
|
|
|
|
|
j = -1; while (++j < k) {
|
|
|
|
|
clusterSizes[j] = 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
i = -1; while (++i < n) {
|
|
|
|
|
x = vectors[i];
|
|
|
|
|
min = Infinity;
|
|
|
|
|
j = -1; while (++j < k) {
|
|
|
|
|
d = distance.call(this, centroids[j], x);
|
|
|
|
|
if (d < min) {
|
|
|
|
|
min = d;
|
|
|
|
|
best = j;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
clusterSizes[assignments[i] = best]++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Update centroids step.
|
|
|
|
|
newCentroids = [];
|
|
|
|
|
i = -1; while (++i < n) {
|
|
|
|
|
x = assignments[i];
|
|
|
|
|
d = newCentroids[x];
|
|
|
|
|
if (d == null) newCentroids[x] = vectors[i].slice();
|
|
|
|
|
else {
|
|
|
|
|
j = -1; while (++j < d.length) {
|
|
|
|
|
d[j] += vectors[i][j];
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
j = -1; while (++j < k) {
|
|
|
|
|
x = newCentroids[j];
|
|
|
|
|
d = 1 / clusterSizes[j];
|
|
|
|
|
i = -1; while (++i < x.length) x[i] *= d;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Check convergence.
|
|
|
|
|
repeat = 0;
|
|
|
|
|
j = -1; while (++j < k) {
|
|
|
|
|
if (!science_stats_kmeansCompare(newCentroids[j], centroids[j])) {
|
|
|
|
|
repeat = 1;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
centroids = newCentroids;
|
|
|
|
|
iterations++;
|
|
|
|
|
}
|
|
|
|
|
return {assignments: assignments, centroids: centroids};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
kmeans.k = function(x) {
|
|
|
|
|
if (!arguments.length) return k;
|
|
|
|
|
k = x;
|
|
|
|
|
return kmeans;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
kmeans.distance = function(x) {
|
|
|
|
|
if (!arguments.length) return distance;
|
|
|
|
|
distance = x;
|
|
|
|
|
return kmeans;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
return kmeans;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
function science_stats_kmeansCompare(a, b) {
|
|
|
|
|
if (!a || !b || a.length !== b.length) return false;
|
|
|
|
|
var n = a.length,
|
|
|
|
|
i = -1;
|
|
|
|
|
while (++i < n) if (a[i] !== b[i]) return false;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Returns an array of k distinct vectors randomly selected from the input
|
|
|
|
|
// array of vectors. Returns null if k > n or if there are less than k distinct
|
|
|
|
|
// objects in vectors.
|
|
|
|
|
function science_stats_kmeansRandom(k, vectors) {
|
|
|
|
|
var n = vectors.length;
|
|
|
|
|
if (k > n) return null;
|
|
|
|
|
|
|
|
|
|
var selected_vectors = [];
|
|
|
|
|
var selected_indices = [];
|
|
|
|
|
var tested_indices = {};
|
|
|
|
|
var tested = 0;
|
|
|
|
|
var selected = 0;
|
|
|
|
|
var i,
|
|
|
|
|
vector,
|
|
|
|
|
select;
|
|
|
|
|
|
|
|
|
|
while (selected < k) {
|
|
|
|
|
if (tested === n) return null;
|
|
|
|
|
|
|
|
|
|
var random_index = Math.floor(Math.random() * n);
|
|
|
|
|
if (random_index in tested_indices) continue;
|
|
|
|
|
|
|
|
|
|
tested_indices[random_index] = 1;
|
|
|
|
|
tested++;
|
|
|
|
|
vector = vectors[random_index];
|
|
|
|
|
select = true;
|
|
|
|
|
for (i = 0; i < selected; i++) {
|
|
|
|
|
if (science_stats_kmeansCompare(vector, selected_vectors[i])) {
|
|
|
|
|
select = false;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (select) {
|
|
|
|
|
selected_vectors[selected] = vector;
|
|
|
|
|
selected_indices[selected] = random_index;
|
|
|
|
|
selected++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return selected_vectors;
|
|
|
|
|
}
|
|
|
|
|
science.stats.hcluster = function() {
|
|
|
|
|
var distance = science.stats.distance.euclidean,
|
|
|
|
|
linkage = "simple"; // simple, complete or average
|
|
|
|
|
|
|
|
|
|
function hcluster(vectors) {
|
|
|
|
|
var n = vectors.length,
|
|
|
|
|
dMin = [],
|
|
|
|
|
cSize = [],
|
|
|
|
|
distMatrix = [],
|
|
|
|
|
clusters = [],
|
|
|
|
|
c1,
|
|
|
|
|
c2,
|
|
|
|
|
c1Cluster,
|
|
|
|
|
c2Cluster,
|
|
|
|
|
p,
|
|
|
|
|
root,
|
|
|
|
|
i,
|
|
|
|
|
j;
|
|
|
|
|
|
|
|
|
|
// Initialise distance matrix and vector of closest clusters.
|
|
|
|
|
i = -1; while (++i < n) {
|
|
|
|
|
dMin[i] = 0;
|
|
|
|
|
distMatrix[i] = [];
|
|
|
|
|
j = -1; while (++j < n) {
|
|
|
|
|
distMatrix[i][j] = i === j ? Infinity : distance(vectors[i] , vectors[j]);
|
|
|
|
|
if (distMatrix[i][dMin[i]] > distMatrix[i][j]) dMin[i] = j;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// create leaves of the tree
|
|
|
|
|
i = -1; while (++i < n) {
|
|
|
|
|
clusters[i] = [];
|
|
|
|
|
clusters[i][0] = {
|
|
|
|
|
left: null,
|
|
|
|
|
right: null,
|
|
|
|
|
dist: 0,
|
|
|
|
|
centroid: vectors[i],
|
|
|
|
|
size: 1,
|
|
|
|
|
depth: 0
|
|
|
|
|
};
|
|
|
|
|
cSize[i] = 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Main loop
|
|
|
|
|
for (p = 0; p < n-1; p++) {
|
|
|
|
|
// find the closest pair of clusters
|
|
|
|
|
c1 = 0;
|
|
|
|
|
for (i = 0; i < n; i++) {
|
|
|
|
|
if (distMatrix[i][dMin[i]] < distMatrix[c1][dMin[c1]]) c1 = i;
|
|
|
|
|
}
|
|
|
|
|
c2 = dMin[c1];
|
|
|
|
|
|
|
|
|
|
// create node to store cluster info
|
|
|
|
|
c1Cluster = clusters[c1][0];
|
|
|
|
|
c2Cluster = clusters[c2][0];
|
|
|
|
|
|
|
|
|
|
newCluster = {
|
|
|
|
|
left: c1Cluster,
|
|
|
|
|
right: c2Cluster,
|
|
|
|
|
dist: distMatrix[c1][c2],
|
|
|
|
|
centroid: calculateCentroid(c1Cluster.size, c1Cluster.centroid,
|
|
|
|
|
c2Cluster.size, c2Cluster.centroid),
|
|
|
|
|
size: c1Cluster.size + c2Cluster.size,
|
|
|
|
|
depth: 1 + Math.max(c1Cluster.depth, c2Cluster.depth)
|
|
|
|
|
};
|
|
|
|
|
clusters[c1].splice(0, 0, newCluster);
|
|
|
|
|
cSize[c1] += cSize[c2];
|
|
|
|
|
|
|
|
|
|
// overwrite row c1 with respect to the linkage type
|
|
|
|
|
for (j = 0; j < n; j++) {
|
|
|
|
|
switch (linkage) {
|
|
|
|
|
case "single":
|
|
|
|
|
if (distMatrix[c1][j] > distMatrix[c2][j])
|
|
|
|
|
distMatrix[j][c1] = distMatrix[c1][j] = distMatrix[c2][j];
|
|
|
|
|
break;
|
|
|
|
|
case "complete":
|
|
|
|
|
if (distMatrix[c1][j] < distMatrix[c2][j])
|
|
|
|
|
distMatrix[j][c1] = distMatrix[c1][j] = distMatrix[c2][j];
|
|
|
|
|
break;
|
|
|
|
|
case "average":
|
|
|
|
|
distMatrix[j][c1] = distMatrix[c1][j] = (cSize[c1] * distMatrix[c1][j] + cSize[c2] * distMatrix[c2][j]) / (cSize[c1] + cSize[j]);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
distMatrix[c1][c1] = Infinity;
|
|
|
|
|
|
|
|
|
|
// infinity out old row c2 and column c2
|
|
|
|
|
for (i = 0; i < n; i++)
|
|
|
|
|
distMatrix[i][c2] = distMatrix[c2][i] = Infinity;
|
|
|
|
|
|
|
|
|
|
// update dmin and replace ones that previous pointed to c2 to point to c1
|
|
|
|
|
for (j = 0; j < n; j++) {
|
|
|
|
|
if (dMin[j] == c2) dMin[j] = c1;
|
|
|
|
|
if (distMatrix[c1][j] < distMatrix[c1][dMin[c1]]) dMin[c1] = j;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// keep track of the last added cluster
|
|
|
|
|
root = newCluster;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return root;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
hcluster.distance = function(x) {
|
|
|
|
|
if (!arguments.length) return distance;
|
|
|
|
|
distance = x;
|
|
|
|
|
return hcluster;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
return hcluster;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
function calculateCentroid(c1Size, c1Centroid, c2Size, c2Centroid) {
|
|
|
|
|
var newCentroid = [],
|
|
|
|
|
newSize = c1Size + c2Size,
|
|
|
|
|
n = c1Centroid.length,
|
|
|
|
|
i = -1;
|
|
|
|
|
while (++i < n) {
|
|
|
|
|
newCentroid[i] = (c1Size * c1Centroid[i] + c2Size * c2Centroid[i]) / newSize;
|
|
|
|
|
}
|
|
|
|
|
return newCentroid;
|
|
|
|
|
}
|
2011-07-06 21:57:14 +04:00
|
|
|
|
science.stats.iqr = function(x) {
|
|
|
|
|
var quartiles = science.stats.quantiles(x, [.25, .75]);
|
2011-05-10 22:06:24 +04:00
|
|
|
|
return quartiles[1] - quartiles[0];
|
|
|
|
|
};
|
2011-09-10 02:37:59 +04:00
|
|
|
|
// Based on org.apache.commons.math.analysis.interpolation.LoessInterpolator
|
|
|
|
|
// from http://commons.apache.org/math/
|
|
|
|
|
science.stats.loess = function() {
|
|
|
|
|
var bandwidth = .3,
|
|
|
|
|
robustnessIters = 2,
|
|
|
|
|
accuracy = 1e-12;
|
|
|
|
|
|
|
|
|
|
function smooth(xval, yval, weights) {
|
|
|
|
|
var n = xval.length,
|
|
|
|
|
i;
|
|
|
|
|
|
|
|
|
|
if (n !== yval.length) throw {error: "Mismatched array lengths"};
|
|
|
|
|
if (n == 0) throw {error: "At least one point required."};
|
|
|
|
|
|
|
|
|
|
if (arguments.length < 3) {
|
|
|
|
|
weights = [];
|
|
|
|
|
i = -1; while (++i < n) weights[i] = 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
science_stats_loessFiniteReal(xval);
|
|
|
|
|
science_stats_loessFiniteReal(yval);
|
|
|
|
|
science_stats_loessFiniteReal(weights);
|
|
|
|
|
science_stats_loessStrictlyIncreasing(xval);
|
|
|
|
|
|
|
|
|
|
if (n == 1) return [yval[0]];
|
|
|
|
|
if (n == 2) return [yval[0], yval[1]];
|
|
|
|
|
|
|
|
|
|
var bandwidthInPoints = Math.floor(bandwidth * n);
|
|
|
|
|
|
|
|
|
|
if (bandwidthInPoints < 2) throw {error: "Bandwidth too small."};
|
|
|
|
|
|
|
|
|
|
var res = [],
|
|
|
|
|
residuals = [],
|
|
|
|
|
robustnessWeights = [];
|
|
|
|
|
|
|
|
|
|
// Do an initial fit and 'robustnessIters' robustness iterations.
|
|
|
|
|
// This is equivalent to doing 'robustnessIters+1' robustness iterations
|
|
|
|
|
// starting with all robustness weights set to 1.
|
|
|
|
|
i = -1; while (++i < n) {
|
|
|
|
|
res[i] = 0;
|
|
|
|
|
residuals[i] = 0;
|
|
|
|
|
robustnessWeights[i] = 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var iter = -1;
|
|
|
|
|
while (++iter <= robustnessIters) {
|
|
|
|
|
var bandwidthInterval = [0, bandwidthInPoints - 1];
|
|
|
|
|
// At each x, compute a local weighted linear regression
|
|
|
|
|
var x;
|
|
|
|
|
i = -1; while (++i < n) {
|
|
|
|
|
x = xval[i];
|
|
|
|
|
|
|
|
|
|
// Find out the interval of source points on which
|
|
|
|
|
// a regression is to be made.
|
|
|
|
|
if (i > 0) {
|
|
|
|
|
science_stats_loessUpdateBandwidthInterval(xval, weights, i, bandwidthInterval);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var ileft = bandwidthInterval[0],
|
|
|
|
|
iright = bandwidthInterval[1];
|
|
|
|
|
|
|
|
|
|
// Compute the point of the bandwidth interval that is
|
|
|
|
|
// farthest from x
|
|
|
|
|
var edge = (xval[i] - xval[ileft]) > (xval[iright] - xval[i]) ? ileft : iright;
|
|
|
|
|
|
|
|
|
|
// Compute a least-squares linear fit weighted by
|
|
|
|
|
// the product of robustness weights and the tricube
|
|
|
|
|
// weight function.
|
|
|
|
|
// See http://en.wikipedia.org/wiki/Linear_regression
|
|
|
|
|
// (section "Univariate linear case")
|
|
|
|
|
// and http://en.wikipedia.org/wiki/Weighted_least_squares
|
|
|
|
|
// (section "Weighted least squares")
|
|
|
|
|
var sumWeights = 0,
|
|
|
|
|
sumX = 0,
|
|
|
|
|
sumXSquared = 0,
|
|
|
|
|
sumY = 0,
|
|
|
|
|
sumXY = 0,
|
|
|
|
|
denom = Math.abs(1 / (xval[edge] - x));
|
|
|
|
|
|
|
|
|
|
for (var k = ileft; k <= iright; ++k) {
|
|
|
|
|
var xk = xval[k],
|
|
|
|
|
yk = yval[k],
|
|
|
|
|
dist = k < i ? x - xk : xk - x,
|
|
|
|
|
w = science_stats_loessTricube(dist * denom) * robustnessWeights[k] * weights[k],
|
|
|
|
|
xkw = xk * w;
|
|
|
|
|
sumWeights += w;
|
|
|
|
|
sumX += xkw;
|
|
|
|
|
sumXSquared += xk * xkw;
|
|
|
|
|
sumY += yk * w;
|
|
|
|
|
sumXY += yk * xkw;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var meanX = sumX / sumWeights,
|
|
|
|
|
meanY = sumY / sumWeights,
|
|
|
|
|
meanXY = sumXY / sumWeights,
|
|
|
|
|
meanXSquared = sumXSquared / sumWeights;
|
|
|
|
|
|
|
|
|
|
var beta = (Math.sqrt(Math.abs(meanXSquared - meanX * meanX)) < accuracy)
|
|
|
|
|
? 0 : ((meanXY - meanX * meanY) / (meanXSquared - meanX * meanX));
|
|
|
|
|
|
|
|
|
|
var alpha = meanY - beta * meanX;
|
|
|
|
|
|
|
|
|
|
res[i] = beta * x + alpha;
|
|
|
|
|
residuals[i] = Math.abs(yval[i] - res[i]);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// No need to recompute the robustness weights at the last
|
|
|
|
|
// iteration, they won't be needed anymore
|
|
|
|
|
if (iter === robustnessIters) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Recompute the robustness weights.
|
|
|
|
|
|
|
|
|
|
// Find the median residual.
|
|
|
|
|
var sortedResiduals = residuals.slice();
|
|
|
|
|
sortedResiduals.sort();
|
|
|
|
|
var medianResidual = sortedResiduals[Math.floor(n / 2)];
|
|
|
|
|
|
|
|
|
|
if (Math.abs(medianResidual) < accuracy)
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
var arg,
|
|
|
|
|
w;
|
|
|
|
|
i = -1; while (++i < n) {
|
|
|
|
|
arg = residuals[i] / (6 * medianResidual);
|
|
|
|
|
robustnessWeights[i] = (arg >= 1) ? 0 : ((w = 1 - arg * arg) * w);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return res;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
smooth.bandwidth = function(x) {
|
|
|
|
|
if (!arguments.length) return x;
|
|
|
|
|
bandwidth = x;
|
|
|
|
|
return smooth;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
smooth.robustnessIterations = function(x) {
|
|
|
|
|
if (!arguments.length) return x;
|
|
|
|
|
robustnessIters = x;
|
|
|
|
|
return smooth;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
smooth.accuracy = function(x) {
|
|
|
|
|
if (!arguments.length) return x;
|
|
|
|
|
accuracy = x;
|
|
|
|
|
return smooth;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
return smooth;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
function science_stats_loessFiniteReal(values) {
|
|
|
|
|
var n = values.length,
|
|
|
|
|
i = -1;
|
|
|
|
|
|
|
|
|
|
while (++i < n) if (!isFinite(values[i])) return false;
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function science_stats_loessStrictlyIncreasing(xval) {
|
|
|
|
|
var n = xval.length,
|
|
|
|
|
i = 0;
|
|
|
|
|
|
|
|
|
|
while (++i < n) if (xval[i - 1] >= xval[i]) return false;
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Compute the tricube weight function.
|
|
|
|
|
// http://en.wikipedia.org/wiki/Local_regression#Weight_function
|
|
|
|
|
function science_stats_loessTricube(x) {
|
|
|
|
|
return (x = 1 - x * x * x) * x * x;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Given an index interval into xval that embraces a certain number of
|
|
|
|
|
// points closest to xval[i-1], update the interval so that it embraces
|
|
|
|
|
// the same number of points closest to xval[i], ignoring zero weights.
|
|
|
|
|
function science_stats_loessUpdateBandwidthInterval(
|
|
|
|
|
xval, weights, i, bandwidthInterval) {
|
|
|
|
|
|
|
|
|
|
var left = bandwidthInterval[0],
|
|
|
|
|
right = bandwidthInterval[1];
|
|
|
|
|
|
|
|
|
|
// The right edge should be adjusted if the next point to the right
|
|
|
|
|
// is closer to xval[i] than the leftmost point of the current interval
|
|
|
|
|
var nextRight = science_stats_loessNextNonzero(weights, right);
|
|
|
|
|
if ((nextRight < xval.length) && (xval[nextRight] - xval[i]) < (xval[i] - xval[left])) {
|
|
|
|
|
var nextLeft = science_stats_loessNextNonzero(weights, left);
|
|
|
|
|
bandwidthInterval[0] = nextLeft;
|
|
|
|
|
bandwidthInterval[1] = nextRight;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function science_stats_loessNextNonzero(weights, i) {
|
|
|
|
|
var j = i + 1;
|
|
|
|
|
while (j < weights.length && weights[j] === 0) j++;
|
|
|
|
|
return j;
|
|
|
|
|
}
|
2011-05-11 19:42:24 +04:00
|
|
|
|
// Welford's algorithm.
|
2011-07-06 21:57:14 +04:00
|
|
|
|
science.stats.mean = function(x) {
|
2011-06-07 02:51:05 +04:00
|
|
|
|
var n = x.length;
|
|
|
|
|
if (n === 0) return NaN;
|
2011-05-11 19:42:24 +04:00
|
|
|
|
var m = 0,
|
|
|
|
|
i = -1;
|
|
|
|
|
while (++i < n) m += (x[i] - m) / (i + 1);
|
|
|
|
|
return m;
|
|
|
|
|
};
|
2011-07-06 21:57:14 +04:00
|
|
|
|
science.stats.median = function(x) {
|
|
|
|
|
return science.stats.quantiles(x, [.5])[0];
|
2011-05-11 11:35:12 +04:00
|
|
|
|
};
|
2011-07-06 21:57:14 +04:00
|
|
|
|
science.stats.mode = function(x) {
|
|
|
|
|
x = x.slice().sort(science.ascending);
|
2011-05-11 13:03:39 +04:00
|
|
|
|
var mode,
|
|
|
|
|
n = x.length,
|
|
|
|
|
i = -1,
|
|
|
|
|
l = i,
|
|
|
|
|
last = null,
|
|
|
|
|
max = 0,
|
|
|
|
|
tmp,
|
|
|
|
|
v;
|
|
|
|
|
while (++i < n) {
|
|
|
|
|
if ((v = x[i]) !== last) {
|
|
|
|
|
if ((tmp = i - l) > max) {
|
|
|
|
|
max = tmp;
|
|
|
|
|
mode = last;
|
|
|
|
|
}
|
|
|
|
|
last = v;
|
|
|
|
|
l = i;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return mode;
|
|
|
|
|
};
|
2011-06-06 21:48:00 +04:00
|
|
|
|
// Uses R's quantile algorithm type=7.
|
2011-07-06 21:57:14 +04:00
|
|
|
|
science.stats.quantiles = function(d, quantiles) {
|
|
|
|
|
d = d.slice().sort(science.ascending);
|
2011-06-06 21:48:00 +04:00
|
|
|
|
var n_1 = d.length - 1;
|
2011-05-10 22:06:24 +04:00
|
|
|
|
return quantiles.map(function(q) {
|
2011-06-06 21:48:00 +04:00
|
|
|
|
if (q === 0) return d[0];
|
|
|
|
|
else if (q === 1) return d[n_1];
|
|
|
|
|
|
|
|
|
|
var index = 1 + q * n_1,
|
|
|
|
|
lo = Math.floor(index),
|
|
|
|
|
h = index - lo,
|
|
|
|
|
a = d[lo - 1];
|
|
|
|
|
|
|
|
|
|
return h === 0 ? a : a + h * (d[lo] - a);
|
2011-05-10 22:06:24 +04:00
|
|
|
|
});
|
|
|
|
|
};
|
2011-05-11 19:52:37 +04:00
|
|
|
|
// Unbiased estimate of a sample's variance.
|
|
|
|
|
// Also known as the sample variance, where the denominator is n - 1.
|
2011-07-06 21:57:14 +04:00
|
|
|
|
science.stats.variance = function(x) {
|
2011-06-06 11:57:35 +04:00
|
|
|
|
var n = x.length;
|
|
|
|
|
if (n < 1) return NaN;
|
|
|
|
|
if (n === 1) return 0;
|
2011-07-06 21:57:14 +04:00
|
|
|
|
var mean = science.stats.mean(x),
|
2011-05-10 22:06:24 +04:00
|
|
|
|
i = -1,
|
|
|
|
|
s = 0;
|
|
|
|
|
while (++i < n) {
|
|
|
|
|
var v = x[i] - mean;
|
|
|
|
|
s += v * v;
|
|
|
|
|
}
|
|
|
|
|
return s / (n - 1);
|
|
|
|
|
};
|
2011-05-10 02:58:54 +04:00
|
|
|
|
})()
|