(function(f){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g.ssck = f()}})(function(){var define,module,exports;return (function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o 1 ) { return null; } return binomialDistribution(1, p); } module.exports = bernoulliDistribution; },{"./binomial_distribution":4}],4:[function(require,module,exports){ 'use strict'; var epsilon = require('./epsilon'); var factorial = require('./factorial'); /** * The [Binomial Distribution](http://en.wikipedia.org/wiki/Binomial_distribution) is the discrete probability * distribution of the number of successes in a sequence of n independent yes/no experiments, each of which yields * success with probability `probability`. Such a success/failure experiment is also called a Bernoulli experiment or * Bernoulli trial; when trials = 1, the Binomial Distribution is a Bernoulli Distribution. * * @param {number} trials number of trials to simulate * @param {number} probability * @returns {number} output */ function binomialDistribution(trials, probability) { // Check that `p` is a valid probability (0 ≤ p ≤ 1), // that `n` is an integer, strictly positive. if (probability < 0 || probability > 1 || trials <= 0 || trials % 1 !== 0) { return null; } // We initialize `x`, the random variable, and `accumulator`, an accumulator // for the cumulative distribution function to 0. `distribution_functions` // is the object we'll return with the `probability_of_x` and the // `cumulativeProbability_of_x`, as well as the calculated mean & // variance. We iterate until the `cumulativeProbability_of_x` is // within `epsilon` of 1.0. var x = 0, cumulativeProbability = 0, cells = {}; // This algorithm iterates through each potential outcome, // until the `cumulativeProbability` is very close to 1, at // which point we've defined the vast majority of outcomes do { // a [probability mass function](https://en.wikipedia.org/wiki/Probability_mass_function) cells[x] = factorial(trials) / (factorial(x) * factorial(trials - x)) * (Math.pow(probability, x) * Math.pow(1 - probability, trials - x)); cumulativeProbability += cells[x]; x++; // when the cumulativeProbability is nearly 1, we've calculated // the useful range of this distribution } while (cumulativeProbability < 1 - epsilon); return cells; } module.exports = binomialDistribution; },{"./epsilon":10,"./factorial":12}],5:[function(require,module,exports){ 'use strict'; /** * **Percentage Points of the χ2 (Chi-Squared) Distribution** * * The [χ2 (Chi-Squared) Distribution](http://en.wikipedia.org/wiki/Chi-squared_distribution) is used in the common * chi-squared tests for goodness of fit of an observed distribution to a theoretical one, the independence of two * criteria of classification of qualitative data, and in confidence interval estimation for a population standard * deviation of a normal distribution from a sample standard deviation. * * Values from Appendix 1, Table III of William W. Hines & Douglas C. Montgomery, "Probability and Statistics in * Engineering and Management Science", Wiley (1980). */ var chiSquaredDistributionTable = { 1: { 0.995: 0.00, 0.99: 0.00, 0.975: 0.00, 0.95: 0.00, 0.9: 0.02, 0.5: 0.45, 0.1: 2.71, 0.05: 3.84, 0.025: 5.02, 0.01: 6.63, 0.005: 7.88 }, 2: { 0.995: 0.01, 0.99: 0.02, 0.975: 0.05, 0.95: 0.10, 0.9: 0.21, 0.5: 1.39, 0.1: 4.61, 0.05: 5.99, 0.025: 7.38, 0.01: 9.21, 0.005: 10.60 }, 3: { 0.995: 0.07, 0.99: 0.11, 0.975: 0.22, 0.95: 0.35, 0.9: 0.58, 0.5: 2.37, 0.1: 6.25, 0.05: 7.81, 0.025: 9.35, 0.01: 11.34, 0.005: 12.84 }, 4: { 0.995: 0.21, 0.99: 0.30, 0.975: 0.48, 0.95: 0.71, 0.9: 1.06, 0.5: 3.36, 0.1: 7.78, 0.05: 9.49, 0.025: 11.14, 0.01: 13.28, 0.005: 14.86 }, 5: { 0.995: 0.41, 0.99: 0.55, 0.975: 0.83, 0.95: 1.15, 0.9: 1.61, 0.5: 4.35, 0.1: 9.24, 0.05: 11.07, 0.025: 12.83, 0.01: 15.09, 0.005: 16.75 }, 6: { 0.995: 0.68, 0.99: 0.87, 0.975: 1.24, 0.95: 1.64, 0.9: 2.20, 0.5: 5.35, 0.1: 10.65, 0.05: 12.59, 0.025: 14.45, 0.01: 16.81, 0.005: 18.55 }, 7: { 0.995: 0.99, 0.99: 1.25, 0.975: 1.69, 0.95: 2.17, 0.9: 2.83, 0.5: 6.35, 0.1: 12.02, 0.05: 14.07, 0.025: 16.01, 0.01: 18.48, 0.005: 20.28 }, 8: { 0.995: 1.34, 0.99: 1.65, 0.975: 2.18, 0.95: 2.73, 0.9: 3.49, 0.5: 7.34, 0.1: 13.36, 0.05: 15.51, 0.025: 17.53, 0.01: 20.09, 0.005: 21.96 }, 9: { 0.995: 1.73, 0.99: 2.09, 0.975: 2.70, 0.95: 3.33, 0.9: 4.17, 0.5: 8.34, 0.1: 14.68, 0.05: 16.92, 0.025: 19.02, 0.01: 21.67, 0.005: 23.59 }, 10: { 0.995: 2.16, 0.99: 2.56, 0.975: 3.25, 0.95: 3.94, 0.9: 4.87, 0.5: 9.34, 0.1: 15.99, 0.05: 18.31, 0.025: 20.48, 0.01: 23.21, 0.005: 25.19 }, 11: { 0.995: 2.60, 0.99: 3.05, 0.975: 3.82, 0.95: 4.57, 0.9: 5.58, 0.5: 10.34, 0.1: 17.28, 0.05: 19.68, 0.025: 21.92, 0.01: 24.72, 0.005: 26.76 }, 12: { 0.995: 3.07, 0.99: 3.57, 0.975: 4.40, 0.95: 5.23, 0.9: 6.30, 0.5: 11.34, 0.1: 18.55, 0.05: 21.03, 0.025: 23.34, 0.01: 26.22, 0.005: 28.30 }, 13: { 0.995: 3.57, 0.99: 4.11, 0.975: 5.01, 0.95: 5.89, 0.9: 7.04, 0.5: 12.34, 0.1: 19.81, 0.05: 22.36, 0.025: 24.74, 0.01: 27.69, 0.005: 29.82 }, 14: { 0.995: 4.07, 0.99: 4.66, 0.975: 5.63, 0.95: 6.57, 0.9: 7.79, 0.5: 13.34, 0.1: 21.06, 0.05: 23.68, 0.025: 26.12, 0.01: 29.14, 0.005: 31.32 }, 15: { 0.995: 4.60, 0.99: 5.23, 0.975: 6.27, 0.95: 7.26, 0.9: 8.55, 0.5: 14.34, 0.1: 22.31, 0.05: 25.00, 0.025: 27.49, 0.01: 30.58, 0.005: 32.80 }, 16: { 0.995: 5.14, 0.99: 5.81, 0.975: 6.91, 0.95: 7.96, 0.9: 9.31, 0.5: 15.34, 0.1: 23.54, 0.05: 26.30, 0.025: 28.85, 0.01: 32.00, 0.005: 34.27 }, 17: { 0.995: 5.70, 0.99: 6.41, 0.975: 7.56, 0.95: 8.67, 0.9: 10.09, 0.5: 16.34, 0.1: 24.77, 0.05: 27.59, 0.025: 30.19, 0.01: 33.41, 0.005: 35.72 }, 18: { 0.995: 6.26, 0.99: 7.01, 0.975: 8.23, 0.95: 9.39, 0.9: 10.87, 0.5: 17.34, 0.1: 25.99, 0.05: 28.87, 0.025: 31.53, 0.01: 34.81, 0.005: 37.16 }, 19: { 0.995: 6.84, 0.99: 7.63, 0.975: 8.91, 0.95: 10.12, 0.9: 11.65, 0.5: 18.34, 0.1: 27.20, 0.05: 30.14, 0.025: 32.85, 0.01: 36.19, 0.005: 38.58 }, 20: { 0.995: 7.43, 0.99: 8.26, 0.975: 9.59, 0.95: 10.85, 0.9: 12.44, 0.5: 19.34, 0.1: 28.41, 0.05: 31.41, 0.025: 34.17, 0.01: 37.57, 0.005: 40.00 }, 21: { 0.995: 8.03, 0.99: 8.90, 0.975: 10.28, 0.95: 11.59, 0.9: 13.24, 0.5: 20.34, 0.1: 29.62, 0.05: 32.67, 0.025: 35.48, 0.01: 38.93, 0.005: 41.40 }, 22: { 0.995: 8.64, 0.99: 9.54, 0.975: 10.98, 0.95: 12.34, 0.9: 14.04, 0.5: 21.34, 0.1: 30.81, 0.05: 33.92, 0.025: 36.78, 0.01: 40.29, 0.005: 42.80 }, 23: { 0.995: 9.26, 0.99: 10.20, 0.975: 11.69, 0.95: 13.09, 0.9: 14.85, 0.5: 22.34, 0.1: 32.01, 0.05: 35.17, 0.025: 38.08, 0.01: 41.64, 0.005: 44.18 }, 24: { 0.995: 9.89, 0.99: 10.86, 0.975: 12.40, 0.95: 13.85, 0.9: 15.66, 0.5: 23.34, 0.1: 33.20, 0.05: 36.42, 0.025: 39.36, 0.01: 42.98, 0.005: 45.56 }, 25: { 0.995: 10.52, 0.99: 11.52, 0.975: 13.12, 0.95: 14.61, 0.9: 16.47, 0.5: 24.34, 0.1: 34.28, 0.05: 37.65, 0.025: 40.65, 0.01: 44.31, 0.005: 46.93 }, 26: { 0.995: 11.16, 0.99: 12.20, 0.975: 13.84, 0.95: 15.38, 0.9: 17.29, 0.5: 25.34, 0.1: 35.56, 0.05: 38.89, 0.025: 41.92, 0.01: 45.64, 0.005: 48.29 }, 27: { 0.995: 11.81, 0.99: 12.88, 0.975: 14.57, 0.95: 16.15, 0.9: 18.11, 0.5: 26.34, 0.1: 36.74, 0.05: 40.11, 0.025: 43.19, 0.01: 46.96, 0.005: 49.65 }, 28: { 0.995: 12.46, 0.99: 13.57, 0.975: 15.31, 0.95: 16.93, 0.9: 18.94, 0.5: 27.34, 0.1: 37.92, 0.05: 41.34, 0.025: 44.46, 0.01: 48.28, 0.005: 50.99 }, 29: { 0.995: 13.12, 0.99: 14.26, 0.975: 16.05, 0.95: 17.71, 0.9: 19.77, 0.5: 28.34, 0.1: 39.09, 0.05: 42.56, 0.025: 45.72, 0.01: 49.59, 0.005: 52.34 }, 30: { 0.995: 13.79, 0.99: 14.95, 0.975: 16.79, 0.95: 18.49, 0.9: 20.60, 0.5: 29.34, 0.1: 40.26, 0.05: 43.77, 0.025: 46.98, 0.01: 50.89, 0.005: 53.67 }, 40: { 0.995: 20.71, 0.99: 22.16, 0.975: 24.43, 0.95: 26.51, 0.9: 29.05, 0.5: 39.34, 0.1: 51.81, 0.05: 55.76, 0.025: 59.34, 0.01: 63.69, 0.005: 66.77 }, 50: { 0.995: 27.99, 0.99: 29.71, 0.975: 32.36, 0.95: 34.76, 0.9: 37.69, 0.5: 49.33, 0.1: 63.17, 0.05: 67.50, 0.025: 71.42, 0.01: 76.15, 0.005: 79.49 }, 60: { 0.995: 35.53, 0.99: 37.48, 0.975: 40.48, 0.95: 43.19, 0.9: 46.46, 0.5: 59.33, 0.1: 74.40, 0.05: 79.08, 0.025: 83.30, 0.01: 88.38, 0.005: 91.95 }, 70: { 0.995: 43.28, 0.99: 45.44, 0.975: 48.76, 0.95: 51.74, 0.9: 55.33, 0.5: 69.33, 0.1: 85.53, 0.05: 90.53, 0.025: 95.02, 0.01: 100.42, 0.005: 104.22 }, 80: { 0.995: 51.17, 0.99: 53.54, 0.975: 57.15, 0.95: 60.39, 0.9: 64.28, 0.5: 79.33, 0.1: 96.58, 0.05: 101.88, 0.025: 106.63, 0.01: 112.33, 0.005: 116.32 }, 90: { 0.995: 59.20, 0.99: 61.75, 0.975: 65.65, 0.95: 69.13, 0.9: 73.29, 0.5: 89.33, 0.1: 107.57, 0.05: 113.14, 0.025: 118.14, 0.01: 124.12, 0.005: 128.30 }, 100: { 0.995: 67.33, 0.99: 70.06, 0.975: 74.22, 0.95: 77.93, 0.9: 82.36, 0.5: 99.33, 0.1: 118.50, 0.05: 124.34, 0.025: 129.56, 0.01: 135.81, 0.005: 140.17 } }; module.exports = chiSquaredDistributionTable; },{}],6:[function(require,module,exports){ 'use strict'; var mean = require('./mean'); var chiSquaredDistributionTable = require('./chi_squared_distribution_table'); /** * The [χ2 (Chi-Squared) Goodness-of-Fit Test](http://en.wikipedia.org/wiki/Goodness_of_fit#Pearson.27s_chi-squared_test) * uses a measure of goodness of fit which is the sum of differences between observed and expected outcome frequencies * (that is, counts of observations), each squared and divided by the number of observations expected given the * hypothesized distribution. The resulting χ2 statistic, `chiSquared`, can be compared to the chi-squared distribution * to determine the goodness of fit. In order to determine the degrees of freedom of the chi-squared distribution, one * takes the total number of observed frequencies and subtracts the number of estimated parameters. The test statistic * follows, approximately, a chi-square distribution with (k − c) degrees of freedom where `k` is the number of non-empty * cells and `c` is the number of estimated parameters for the distribution. * * @param {Array} data * @param {Function} distributionType a function that returns a point in a distribution: * for instance, binomial, bernoulli, or poisson * @param {number} significance * @returns {number} chi squared goodness of fit * @example * // Data from Poisson goodness-of-fit example 10-19 in William W. Hines & Douglas C. Montgomery, * // "Probability and Statistics in Engineering and Management Science", Wiley (1980). * var data1019 = [ * 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, * 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, * 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, * 2, 2, 2, 2, 2, 2, 2, 2, 2, * 3, 3, 3, 3 * ]; * ss.chiSquaredGoodnessOfFit(data1019, ss.poissonDistribution, 0.05)); //= false */ function chiSquaredGoodnessOfFit(data, distributionType, significance) { // Estimate from the sample data, a weighted mean. var inputMean = mean(data), // Calculated value of the χ2 statistic. chiSquared = 0, // Degrees of freedom, calculated as (number of class intervals - // number of hypothesized distribution parameters estimated - 1) degreesOfFreedom, // Number of hypothesized distribution parameters estimated, expected to be supplied in the distribution test. // Lose one degree of freedom for estimating `lambda` from the sample data. c = 1, // The hypothesized distribution. // Generate the hypothesized distribution. hypothesizedDistribution = distributionType(inputMean), observedFrequencies = [], expectedFrequencies = [], k; // Create an array holding a histogram from the sample data, of // the form `{ value: numberOfOcurrences }` for (var i = 0; i < data.length; i++) { if (observedFrequencies[data[i]] === undefined) { observedFrequencies[data[i]] = 0; } observedFrequencies[data[i]]++; } // The histogram we created might be sparse - there might be gaps // between values. So we iterate through the histogram, making // sure that instead of undefined, gaps have 0 values. for (i = 0; i < observedFrequencies.length; i++) { if (observedFrequencies[i] === undefined) { observedFrequencies[i] = 0; } } // Create an array holding a histogram of expected data given the // sample size and hypothesized distribution. for (k in hypothesizedDistribution) { if (k in observedFrequencies) { expectedFrequencies[k] = hypothesizedDistribution[k] * data.length; } } // Working backward through the expected frequencies, collapse classes // if less than three observations are expected for a class. // This transformation is applied to the observed frequencies as well. for (k = expectedFrequencies.length - 1; k >= 0; k--) { if (expectedFrequencies[k] < 3) { expectedFrequencies[k - 1] += expectedFrequencies[k]; expectedFrequencies.pop(); observedFrequencies[k - 1] += observedFrequencies[k]; observedFrequencies.pop(); } } // Iterate through the squared differences between observed & expected // frequencies, accumulating the `chiSquared` statistic. for (k = 0; k < observedFrequencies.length; k++) { chiSquared += Math.pow( observedFrequencies[k] - expectedFrequencies[k], 2) / expectedFrequencies[k]; } // Calculate degrees of freedom for this test and look it up in the // `chiSquaredDistributionTable` in order to // accept or reject the goodness-of-fit of the hypothesized distribution. degreesOfFreedom = observedFrequencies.length - c - 1; return chiSquaredDistributionTable[degreesOfFreedom][significance] < chiSquared; } module.exports = chiSquaredGoodnessOfFit; },{"./chi_squared_distribution_table":5,"./mean":21}],7:[function(require,module,exports){ 'use strict'; /** * Split an array into chunks of a specified size. This function * has the same behavior as [PHP's array_chunk](http://php.net/manual/en/function.array-chunk.php) * function, and thus will insert smaller-sized chunks at the end if * the input size is not divisible by the chunk size. * * `sample` is expected to be an array, and `chunkSize` a number. * The `sample` array can contain any kind of data. * * @param {Array} sample any array of values * @param {number} chunkSize size of each output array * @returns {Array} a chunked array * @example * console.log(chunk([1, 2, 3, 4], 2)); // [[1, 2], [3, 4]] */ function chunk(sample, chunkSize) { // a list of result chunks, as arrays in an array var output = []; // `chunkSize` must be zero or higher - otherwise the loop below, // in which we call `start += chunkSize`, will loop infinitely. // So, we'll detect and return null in that case to indicate // invalid input. if (chunkSize <= 0) { return null; } // `start` is the index at which `.slice` will start selecting // new array elements for (var start = 0; start < sample.length; start += chunkSize) { // for each chunk, slice that part of the array and add it // to the output. The `.slice` function does not change // the original array. output.push(sample.slice(start, start + chunkSize)); } return output; } module.exports = chunk; },{}],8:[function(require,module,exports){ 'use strict'; var sortedUniqueCount = require('./sorted_unique_count'), numericSort = require('./numeric_sort'); /** * Create a new column x row matrix. * * @private * @param {number} columns * @param {number} rows * @return {Array>} matrix * @example * makeMatrix(10, 10); */ function makeMatrix(columns, rows) { var matrix = []; for (var i = 0; i < columns; i++) { var column = []; for (var j = 0; j < rows; j++) { column.push(0); } matrix.push(column); } return matrix; } /** * Ckmeans clustering is an improvement on heuristic-based clustering * approaches like Jenks. The algorithm was developed in * [Haizhou Wang and Mingzhou Song](http://journal.r-project.org/archive/2011-2/RJournal_2011-2_Wang+Song.pdf) * as a [dynamic programming](https://en.wikipedia.org/wiki/Dynamic_programming) approach * to the problem of clustering numeric data into groups with the least * within-group sum-of-squared-deviations. * * Minimizing the difference within groups - what Wang & Song refer to as * `withinss`, or within sum-of-squares, means that groups are optimally * homogenous within and the data is split into representative groups. * This is very useful for visualization, where you may want to represent * a continuous variable in discrete color or style groups. This function * can provide groups that emphasize differences between data. * * Being a dynamic approach, this algorithm is based on two matrices that * store incrementally-computed values for squared deviations and backtracking * indexes. * * Unlike the [original implementation](https://cran.r-project.org/web/packages/Ckmeans.1d.dp/index.html), * this implementation does not include any code to automatically determine * the optimal number of clusters: this information needs to be explicitly * provided. * * ### References * _Ckmeans.1d.dp: Optimal k-means Clustering in One Dimension by Dynamic * Programming_ Haizhou Wang and Mingzhou Song ISSN 2073-4859 * * from The R Journal Vol. 3/2, December 2011 * @param {Array} data input data, as an array of number values * @param {number} nClusters number of desired classes. This cannot be * greater than the number of values in the data array. * @returns {Array>} clustered input * @example * ckmeans([-1, 2, -1, 2, 4, 5, 6, -1, 2, -1], 3); * // The input, clustered into groups of similar numbers. * //= [[-1, -1, -1, -1], [2, 2, 2], [4, 5, 6]]); */ function ckmeans(data, nClusters) { if (nClusters > data.length) { throw new Error('Cannot generate more classes than there are data values'); } var sorted = numericSort(data), // we'll use this as the maximum number of clusters uniqueCount = sortedUniqueCount(sorted); // if all of the input values are identical, there's one cluster // with all of the input in it. if (uniqueCount === 1) { return [sorted]; } // named 'D' originally var matrix = makeMatrix(nClusters, sorted.length), // named 'B' originally backtrackMatrix = makeMatrix(nClusters, sorted.length); // This is a dynamic programming way to solve the problem of minimizing // within-cluster sum of squares. It's similar to linear regression // in this way, and this calculation incrementally computes the // sum of squares that are later read. // The outer loop iterates through clusters, from 0 to nClusters. for (var cluster = 0; cluster < nClusters; cluster++) { // At the start of each loop, the mean starts as the first element var firstClusterMean = sorted[0]; for (var sortedIdx = Math.max(cluster, 1); sortedIdx < sorted.length; sortedIdx++) { if (cluster === 0) { // Increase the running sum of squares calculation by this // new value var squaredDifference = Math.pow( sorted[sortedIdx] - firstClusterMean, 2); matrix[cluster][sortedIdx] = matrix[cluster][sortedIdx - 1] + ((sortedIdx - 1) / sortedIdx) * squaredDifference; // We're computing a running mean by taking the previous // mean value, multiplying it by the number of elements // seen so far, and then dividing it by the number of // elements total. var newSum = sortedIdx * firstClusterMean + sorted[sortedIdx]; firstClusterMean = newSum / sortedIdx; } else { var sumSquaredDistances = 0, meanXJ = 0; for (var j = sortedIdx; j >= cluster; j--) { sumSquaredDistances += (sortedIdx - j) / (sortedIdx - j + 1) * Math.pow(sorted[j] - meanXJ, 2); meanXJ = (sorted[j] + ((sortedIdx - j) * meanXJ)) / (sortedIdx - j + 1); if (j === sortedIdx) { matrix[cluster][sortedIdx] = sumSquaredDistances; backtrackMatrix[cluster][sortedIdx] = j; if (j > 0) { matrix[cluster][sortedIdx] += matrix[cluster - 1][j - 1]; } } else { if (j === 0) { if (sumSquaredDistances <= matrix[cluster][sortedIdx]) { matrix[cluster][sortedIdx] = sumSquaredDistances; backtrackMatrix[cluster][sortedIdx] = j; } } else if (sumSquaredDistances + matrix[cluster - 1][j - 1] < matrix[cluster][sortedIdx]) { matrix[cluster][sortedIdx] = sumSquaredDistances + matrix[cluster - 1][j - 1]; backtrackMatrix[cluster][sortedIdx] = j; } } } } } } // The real work of Ckmeans clustering happens in the matrix generation: // the generated matrices encode all possible clustering combinations, and // once they're generated we can solve for the best clustering groups // very quickly. var clusters = [], clusterRight = backtrackMatrix[0].length - 1; // Backtrack the clusters from the dynamic programming matrix. This // starts at the bottom-right corner of the matrix (if the top-left is 0, 0), // and moves the cluster target with the loop. for (cluster = backtrackMatrix.length - 1; cluster >= 0; cluster--) { var clusterLeft = backtrackMatrix[cluster][clusterRight]; // fill the cluster from the sorted input by taking a slice of the // array. the backtrack matrix makes this easy - it stores the // indexes where the cluster should start and end. clusters[cluster] = sorted.slice(clusterLeft, clusterRight + 1); if (cluster > 0) { clusterRight = clusterLeft - 1; } } return clusters; } module.exports = ckmeans; },{"./numeric_sort":26,"./sorted_unique_count":42}],9:[function(require,module,exports){ 'use strict'; var standardNormalTable = require('./standard_normal_table'); /** * **[Cumulative Standard Normal Probability](http://en.wikipedia.org/wiki/Standard_normal_table)** * * Since probability tables cannot be * printed for every normal distribution, as there are an infinite variety * of normal distributions, it is common practice to convert a normal to a * standard normal and then use the standard normal table to find probabilities. * * You can use `.5 + .5 * errorFunction(x / Math.sqrt(2))` to calculate the probability * instead of looking it up in a table. * * @param {number} z * @returns {number} cumulative standard normal probability */ function cumulativeStdNormalProbability(z) { // Calculate the position of this value. var absZ = Math.abs(z), // Each row begins with a different // significant digit: 0.5, 0.6, 0.7, and so on. Each value in the table // corresponds to a range of 0.01 in the input values, so the value is // multiplied by 100. index = Math.min(Math.round(absZ * 100), standardNormalTable.length - 1); // The index we calculate must be in the table as a positive value, // but we still pay attention to whether the input is positive // or negative, and flip the output value as a last step. if (z >= 0) { return standardNormalTable[index]; } else { // due to floating-point arithmetic, values in the table with // 4 significant figures can nevertheless end up as repeating // fractions when they're computed here. return +(1 - standardNormalTable[index]).toFixed(4); } } module.exports = cumulativeStdNormalProbability; },{"./standard_normal_table":44}],10:[function(require,module,exports){ 'use strict'; /** * We use `ε`, epsilon, as a stopping criterion when we want to iterate * until we're "close enough". * * This is used in calculations like the binomialDistribution, in which * the process of finding a value is [iterative](https://en.wikipedia.org/wiki/Iterative_method): * it progresses until it is close enough. */ var epsilon = 0.0001; module.exports = epsilon; },{}],11:[function(require,module,exports){ 'use strict'; /** * **[Gaussian error function](http://en.wikipedia.org/wiki/Error_function)** * * The `errorFunction(x/(sd * Math.sqrt(2)))` is the probability that a value in a * normal distribution with standard deviation sd is within x of the mean. * * This function returns a numerical approximation to the exact value. * * @param {number} x input * @return {number} error estimation * @example * errorFunction(1); //= 0.8427 */ function errorFunction(x) { var t = 1 / (1 + 0.5 * Math.abs(x)); var tau = t * Math.exp(-Math.pow(x, 2) - 1.26551223 + 1.00002368 * t + 0.37409196 * Math.pow(t, 2) + 0.09678418 * Math.pow(t, 3) - 0.18628806 * Math.pow(t, 4) + 0.27886807 * Math.pow(t, 5) - 1.13520398 * Math.pow(t, 6) + 1.48851587 * Math.pow(t, 7) - 0.82215223 * Math.pow(t, 8) + 0.17087277 * Math.pow(t, 9)); if (x >= 0) { return 1 - tau; } else { return tau - 1; } } module.exports = errorFunction; },{}],12:[function(require,module,exports){ 'use strict'; /** * A [Factorial](https://en.wikipedia.org/wiki/Factorial), usually written n!, is the product of all positive * integers less than or equal to n. Often factorial is implemented * recursively, but this iterative approach is significantly faster * and simpler. * * @param {number} n input * @returns {number} factorial: n! * @example * console.log(factorial(5)); // 120 */ function factorial(n) { // factorial is mathematically undefined for negative numbers if (n < 0 ) { return null; } // typically you'll expand the factorial function going down, like // 5! = 5 * 4 * 3 * 2 * 1. This is going in the opposite direction, // counting from 2 up to the number in question, and since anything // multiplied by 1 is itself, the loop only needs to start at 2. var accumulator = 1; for (var i = 2; i <= n; i++) { // for each number up to and including the number `n`, multiply // the accumulator my that number. accumulator *= i; } return accumulator; } module.exports = factorial; },{}],13:[function(require,module,exports){ 'use strict'; /** * The [Geometric Mean](https://en.wikipedia.org/wiki/Geometric_mean) is * a mean function that is more useful for numbers in different * ranges. * * This is the nth root of the input numbers multiplied by each other. * * The geometric mean is often useful for * **[proportional growth](https://en.wikipedia.org/wiki/Geometric_mean#Proportional_growth)**: given * growth rates for multiple years, like _80%, 16.66% and 42.85%_, a simple * mean will incorrectly estimate an average growth rate, whereas a geometric * mean will correctly estimate a growth rate that, over those years, * will yield the same end value. * * This runs on `O(n)`, linear time in respect to the array * * @param {Array} x input array * @returns {number} geometric mean * @example * var growthRates = [1.80, 1.166666, 1.428571]; * var averageGrowth = geometricMean(growthRates); * var averageGrowthRates = [averageGrowth, averageGrowth, averageGrowth]; * var startingValue = 10; * var startingValueMean = 10; * growthRates.forEach(function(rate) { * startingValue *= rate; * }); * averageGrowthRates.forEach(function(rate) { * startingValueMean *= rate; * }); * startingValueMean === startingValue; */ function geometricMean(x) { // The mean of no numbers is null if (x.length === 0) { return null; } // the starting value. var value = 1; for (var i = 0; i < x.length; i++) { // the geometric mean is only valid for positive numbers if (x[i] <= 0) { return null; } // repeatedly multiply the value by each number value *= x[i]; } return Math.pow(value, 1 / x.length); } module.exports = geometricMean; },{}],14:[function(require,module,exports){ 'use strict'; /** * The [Harmonic Mean](https://en.wikipedia.org/wiki/Harmonic_mean) is * a mean function typically used to find the average of rates. * This mean is calculated by taking the reciprocal of the arithmetic mean * of the reciprocals of the input numbers. * * This is a [measure of central tendency](https://en.wikipedia.org/wiki/Central_tendency): * a method of finding a typical or central value of a set of numbers. * * This runs on `O(n)`, linear time in respect to the array. * * @param {Array} x input * @returns {number} harmonic mean * @example * ss.harmonicMean([2, 3]) //= 2.4 */ function harmonicMean(x) { // The mean of no numbers is null if (x.length === 0) { return null; } var reciprocalSum = 0; for (var i = 0; i < x.length; i++) { // the harmonic mean is only valid for positive numbers if (x[i] <= 0) { return null; } reciprocalSum += 1 / x[i]; } // divide n by the the reciprocal sum return x.length / reciprocalSum; } module.exports = harmonicMean; },{}],15:[function(require,module,exports){ 'use strict'; var quantile = require('./quantile'); /** * The [Interquartile range](http://en.wikipedia.org/wiki/Interquartile_range) is * a measure of statistical dispersion, or how scattered, spread, or * concentrated a distribution is. It's computed as the difference between * the third quartile and first quartile. * * @param {Array} sample * @returns {number} interquartile range: the span between lower and upper quartile, * 0.25 and 0.75 * @example * interquartileRange([0, 1, 2, 3]); //= 2 */ function interquartileRange(sample) { // We can't derive quantiles from an empty list if (sample.length === 0) { return null; } // Interquartile range is the span between the upper quartile, // at `0.75`, and lower quartile, `0.25` return quantile(sample, 0.75) - quantile(sample, 0.25); } module.exports = interquartileRange; },{"./quantile":30}],16:[function(require,module,exports){ 'use strict'; /** * The Inverse [Gaussian error function](http://en.wikipedia.org/wiki/Error_function) * returns a numerical approximation to the value that would have caused * `errorFunction()` to return x. * * @param {number} x value of error function * @returns {number} estimated inverted value */ function inverseErrorFunction(x) { var a = (8 * (Math.PI - 3)) / (3 * Math.PI * (4 - Math.PI)); var inv = Math.sqrt(Math.sqrt( Math.pow(2 / (Math.PI * a) + Math.log(1 - x * x) / 2, 2) - Math.log(1 - x * x) / a) - (2 / (Math.PI * a) + Math.log(1 - x * x) / 2)); if (x >= 0) { return inv; } else { return -inv; } } module.exports = inverseErrorFunction; },{}],17:[function(require,module,exports){ 'use strict'; /** * [Simple linear regression](http://en.wikipedia.org/wiki/Simple_linear_regression) * is a simple way to find a fitted line * between a set of coordinates. This algorithm finds the slope and y-intercept of a regression line * using the least sum of squares. * * @param {Array>} data an array of two-element of arrays, * like `[[0, 1], [2, 3]]` * @returns {Object} object containing slope and intersect of regression line * @example * linearRegression([[0, 0], [1, 1]]); // { m: 1, b: 0 } */ function linearRegression(data) { var m, b; // Store data length in a local variable to reduce // repeated object property lookups var dataLength = data.length; //if there's only one point, arbitrarily choose a slope of 0 //and a y-intercept of whatever the y of the initial point is if (dataLength === 1) { m = 0; b = data[0][1]; } else { // Initialize our sums and scope the `m` and `b` // variables that define the line. var sumX = 0, sumY = 0, sumXX = 0, sumXY = 0; // Use local variables to grab point values // with minimal object property lookups var point, x, y; // Gather the sum of all x values, the sum of all // y values, and the sum of x^2 and (x*y) for each // value. // // In math notation, these would be SS_x, SS_y, SS_xx, and SS_xy for (var i = 0; i < dataLength; i++) { point = data[i]; x = point[0]; y = point[1]; sumX += x; sumY += y; sumXX += x * x; sumXY += x * y; } // `m` is the slope of the regression line m = ((dataLength * sumXY) - (sumX * sumY)) / ((dataLength * sumXX) - (sumX * sumX)); // `b` is the y-intercept of the line. b = (sumY / dataLength) - ((m * sumX) / dataLength); } // Return both values as an object. return { m: m, b: b }; } module.exports = linearRegression; },{}],18:[function(require,module,exports){ 'use strict'; /** * Given the output of `linearRegression`: an object * with `m` and `b` values indicating slope and intercept, * respectively, generate a line function that translates * x values into y values. * * @param {Object} mb object with `m` and `b` members, representing * slope and intersect of desired line * @returns {Function} method that computes y-value at any given * x-value on the line. * @example * var l = linearRegressionLine(linearRegression([[0, 0], [1, 1]])); * l(0) //= 0 * l(2) //= 2 */ function linearRegressionLine(mb) { // Return a function that computes a `y` value for each // x value it is given, based on the values of `b` and `a` // that we just computed. return function(x) { return mb.b + (mb.m * x); }; } module.exports = linearRegressionLine; },{}],19:[function(require,module,exports){ 'use strict'; var median = require('./median'); /** * The [Median Absolute Deviation](http://en.wikipedia.org/wiki/Median_absolute_deviation) is * a robust measure of statistical * dispersion. It is more resilient to outliers than the standard deviation. * * @param {Array} x input array * @returns {number} median absolute deviation * @example * mad([1, 1, 2, 2, 4, 6, 9]); //= 1 */ function mad(x) { // The mad of nothing is null if (!x || x.length === 0) { return null; } var medianValue = median(x), medianAbsoluteDeviations = []; // Make a list of absolute deviations from the median for (var i = 0; i < x.length; i++) { medianAbsoluteDeviations.push(Math.abs(x[i] - medianValue)); } // Find the median value of that list return median(medianAbsoluteDeviations); } module.exports = mad; },{"./median":22}],20:[function(require,module,exports){ 'use strict'; /** * This computes the maximum number in an array. * * This runs on `O(n)`, linear time in respect to the array * * @param {Array} x input * @returns {number} maximum value * @example * console.log(max([1, 2, 3, 4])); // 4 */ function max(x) { var value; for (var i = 0; i < x.length; i++) { // On the first iteration of this loop, max is // undefined and is thus made the maximum element in the array if (x[i] > value || value === undefined) { value = x[i]; } } return value; } module.exports = max; },{}],21:[function(require,module,exports){ 'use strict'; var sum = require('./sum'); /** * The mean, _also known as average_, * is the sum of all values over the number of values. * This is a [measure of central tendency](https://en.wikipedia.org/wiki/Central_tendency): * a method of finding a typical or central value of a set of numbers. * * This runs on `O(n)`, linear time in respect to the array * * @param {Array} x input values * @returns {number} mean * @example * console.log(mean([0, 10])); // 5 */ function mean(x) { // The mean of no numbers is null if (x.length === 0) { return null; } return sum(x) / x.length; } module.exports = mean; },{"./sum":45}],22:[function(require,module,exports){ 'use strict'; var numericSort = require('./numeric_sort'); /** * The [median](http://en.wikipedia.org/wiki/Median) is * the middle number of a list. This is often a good indicator of 'the middle' * when there are outliers that skew the `mean()` value. * This is a [measure of central tendency](https://en.wikipedia.org/wiki/Central_tendency): * a method of finding a typical or central value of a set of numbers. * * The median isn't necessarily one of the elements in the list: the value * can be the average of two elements if the list has an even length * and the two central values are different. * * @param {Array} x input * @returns {number} median value * @example * var incomes = [10, 2, 5, 100, 2, 1]; * median(incomes); //= 3.5 */ function median(x) { // The median of an empty list is null if (x.length === 0) { return null; } // Sorting the array makes it easy to find the center, but // use `.slice()` to ensure the original array `x` is not modified var sorted = numericSort(x); // If the length of the list is odd, it's the central number if (sorted.length % 2 === 1) { return sorted[(sorted.length - 1) / 2]; // Otherwise, the median is the average of the two numbers // at the center of the list } else { var a = sorted[(sorted.length / 2) - 1]; var b = sorted[(sorted.length / 2)]; return (a + b) / 2; } } module.exports = median; },{"./numeric_sort":26}],23:[function(require,module,exports){ 'use strict'; /** * The min is the lowest number in the array. This runs on `O(n)`, linear time in respect to the array * * @param {Array} x input * @returns {number} minimum value * @example * min([1, 5, -10, 100, 2]); // -100 */ function min(x) { var value; for (var i = 0; i < x.length; i++) { // On the first iteration of this loop, min is // undefined and is thus made the minimum element in the array if (x[i] < value || value === undefined) { value = x[i]; } } return value; } module.exports = min; },{}],24:[function(require,module,exports){ 'use strict'; /** * **Mixin** simple_statistics to a single Array instance if provided * or the Array native object if not. This is an optional * feature that lets you treat simple_statistics as a native feature * of Javascript. * * @param {Object} ss simple statistics * @param {Array} [array=] a single array instance which will be augmented * with the extra methods. If omitted, mixin will apply to all arrays * by changing the global `Array.prototype`. * @returns {*} the extended Array, or Array.prototype if no object * is given. * * @example * var myNumbers = [1, 2, 3]; * mixin(ss, myNumbers); * console.log(myNumbers.sum()); // 6 */ function mixin(ss, array) { var support = !!(Object.defineProperty && Object.defineProperties); // Coverage testing will never test this error. /* istanbul ignore next */ if (!support) { throw new Error('without defineProperty, simple-statistics cannot be mixed in'); } // only methods which work on basic arrays in a single step // are supported var arrayMethods = ['median', 'standardDeviation', 'sum', 'sampleSkewness', 'mean', 'min', 'max', 'quantile', 'geometricMean', 'harmonicMean', 'root_mean_square']; // create a closure with a method name so that a reference // like `arrayMethods[i]` doesn't follow the loop increment function wrap(method) { return function() { // cast any arguments into an array, since they're // natively objects var args = Array.prototype.slice.apply(arguments); // make the first argument the array itself args.unshift(this); // return the result of the ss method return ss[method].apply(ss, args); }; } // select object to extend var extending; if (array) { // create a shallow copy of the array so that our internal // operations do not change it by reference extending = array.slice(); } else { extending = Array.prototype; } // for each array function, define a function that gets // the array as the first argument. // We use [defineProperty](https://developer.mozilla.org/en-US/docs/JavaScript/Reference/Global_Objects/Object/defineProperty) // because it allows these properties to be non-enumerable: // `for (var in x)` loops will not run into problems with this // implementation. for (var i = 0; i < arrayMethods.length; i++) { Object.defineProperty(extending, arrayMethods[i], { value: wrap(arrayMethods[i]), configurable: true, enumerable: false, writable: true }); } return extending; } module.exports = mixin; },{}],25:[function(require,module,exports){ 'use strict'; var numericSort = require('./numeric_sort'); /** * The [mode](http://bit.ly/W5K4Yt) is the number that appears in a list the highest number of times. * There can be multiple modes in a list: in the event of a tie, this * algorithm will return the most recently seen mode. * * This is a [measure of central tendency](https://en.wikipedia.org/wiki/Central_tendency): * a method of finding a typical or central value of a set of numbers. * * This runs on `O(n)`, linear time in respect to the array. * * @param {Array} x input * @returns {number} mode * @example * mode([0, 0, 1]); //= 0 */ function mode(x) { // Handle edge cases: // The median of an empty list is null if (x.length === 0) { return null; } else if (x.length === 1) { return x[0]; } // Sorting the array lets us iterate through it below and be sure // that every time we see a new number it's new and we'll never // see the same number twice var sorted = numericSort(x); // This assumes it is dealing with an array of size > 1, since size // 0 and 1 are handled immediately. Hence it starts at index 1 in the // array. var last = sorted[0], // store the mode as we find new modes value, // store how many times we've seen the mode maxSeen = 0, // how many times the current candidate for the mode // has been seen seenThis = 1; // end at sorted.length + 1 to fix the case in which the mode is // the highest number that occurs in the sequence. the last iteration // compares sorted[i], which is undefined, to the highest number // in the series for (var i = 1; i < sorted.length + 1; i++) { // we're seeing a new number pass by if (sorted[i] !== last) { // the last number is the new mode since we saw it more // often than the old one if (seenThis > maxSeen) { maxSeen = seenThis; value = last; } seenThis = 1; last = sorted[i]; // if this isn't a new number, it's one more occurrence of // the potential mode } else { seenThis++; } } return value; } module.exports = mode; },{"./numeric_sort":26}],26:[function(require,module,exports){ 'use strict'; /** * Sort an array of numbers by their numeric value, ensuring that the * array is not changed in place. * * This is necessary because the default behavior of .sort * in JavaScript is to sort arrays as string values * * [1, 10, 12, 102, 20].sort() * // output * [1, 10, 102, 12, 20] * * @param {Array} array input array * @return {Array} sorted array * @example * numericSort([3, 2, 1]) // [1, 2, 3] */ function numericSort(array) { return array // ensure the array is changed in-place .slice() // comparator function that treats input as numeric .sort(function(a, b) { return a - b; }); } module.exports = numericSort; },{}],27:[function(require,module,exports){ 'use strict'; /** * This is a single-layer [Perceptron Classifier](http://en.wikipedia.org/wiki/Perceptron) that takes * arrays of numbers and predicts whether they should be classified * as either 0 or 1 (negative or positive examples). * @class * @example * // Create the model * var p = new PerceptronModel(); * // Train the model with input with a diagonal boundary. * for (var i = 0; i < 5; i++) { * p.train([1, 1], 1); * p.train([0, 1], 0); * p.train([1, 0], 0); * p.train([0, 0], 0); * } * p.predict([0, 0]); // 0 * p.predict([0, 1]); // 0 * p.predict([1, 0]); // 0 * p.predict([1, 1]); // 1 */ function PerceptronModel() { // The weights, or coefficients of the model; // weights are only populated when training with data. this.weights = []; // The bias term, or intercept; it is also a weight but // it's stored separately for convenience as it is always // multiplied by one. this.bias = 0; } /** * **Predict**: Use an array of features with the weight array and bias * to predict whether an example is labeled 0 or 1. * * @param {Array} features an array of features as numbers * @returns {number} 1 if the score is over 0, otherwise 0 */ PerceptronModel.prototype.predict = function(features) { // Only predict if previously trained // on the same size feature array(s). if (features.length !== this.weights.length) { return null; } // Calculate the sum of features times weights, // with the bias added (implicitly times one). var score = 0; for (var i = 0; i < this.weights.length; i++) { score += this.weights[i] * features[i]; } score += this.bias; // Classify as 1 if the score is over 0, otherwise 0. if (score > 0) { return 1; } else { return 0; } }; /** * **Train** the classifier with a new example, which is * a numeric array of features and a 0 or 1 label. * * @param {Array} features an array of features as numbers * @param {number} label either 0 or 1 * @returns {PerceptronModel} this */ PerceptronModel.prototype.train = function(features, label) { // Require that only labels of 0 or 1 are considered. if (label !== 0 && label !== 1) { return null; } // The length of the feature array determines // the length of the weight array. // The perceptron will continue learning as long as // it keeps seeing feature arrays of the same length. // When it sees a new data shape, it initializes. if (features.length !== this.weights.length) { this.weights = features; this.bias = 1; } // Make a prediction based on current weights. var prediction = this.predict(features); // Update the weights if the prediction is wrong. if (prediction !== label) { var gradient = label - prediction; for (var i = 0; i < this.weights.length; i++) { this.weights[i] += gradient * features[i]; } this.bias += gradient; } return this; }; module.exports = PerceptronModel; },{}],28:[function(require,module,exports){ 'use strict'; var epsilon = require('./epsilon'); var factorial = require('./factorial'); /** * The [Poisson Distribution](http://en.wikipedia.org/wiki/Poisson_distribution) * is a discrete probability distribution that expresses the probability * of a given number of events occurring in a fixed interval of time * and/or space if these events occur with a known average rate and * independently of the time since the last event. * * The Poisson Distribution is characterized by the strictly positive * mean arrival or occurrence rate, `λ`. * * @param {number} lambda location poisson distribution * @returns {number} value of poisson distribution at that point */ function poissonDistribution(lambda) { // Check that lambda is strictly positive if (lambda <= 0) { return null; } // our current place in the distribution var x = 0, // and we keep track of the current cumulative probability, in // order to know when to stop calculating chances. cumulativeProbability = 0, // the calculated cells to be returned cells = {}; // This algorithm iterates through each potential outcome, // until the `cumulativeProbability` is very close to 1, at // which point we've defined the vast majority of outcomes do { // a [probability mass function](https://en.wikipedia.org/wiki/Probability_mass_function) cells[x] = (Math.pow(Math.E, -lambda) * Math.pow(lambda, x)) / factorial(x); cumulativeProbability += cells[x]; x++; // when the cumulativeProbability is nearly 1, we've calculated // the useful range of this distribution } while (cumulativeProbability < 1 - epsilon); return cells; } module.exports = poissonDistribution; },{"./epsilon":10,"./factorial":12}],29:[function(require,module,exports){ 'use strict'; var epsilon = require('./epsilon'); var inverseErrorFunction = require('./inverse_error_function'); /** * The [Probit](http://en.wikipedia.org/wiki/Probit) * is the inverse of cumulativeStdNormalProbability(), * and is also known as the normal quantile function. * * It returns the number of standard deviations from the mean * where the p'th quantile of values can be found in a normal distribution. * So, for example, probit(0.5 + 0.6827/2) ≈ 1 because 68.27% of values are * normally found within 1 standard deviation above or below the mean. * * @param {number} p * @returns {number} probit */ function probit(p) { if (p === 0) { p = epsilon; } else if (p >= 1) { p = 1 - epsilon; } return Math.sqrt(2) * inverseErrorFunction(2 * p - 1); } module.exports = probit; },{"./epsilon":10,"./inverse_error_function":16}],30:[function(require,module,exports){ 'use strict'; var quantileSorted = require('./quantile_sorted'); var numericSort = require('./numeric_sort'); /** * The [quantile](https://en.wikipedia.org/wiki/Quantile): * this is a population quantile, since we assume to know the entire * dataset in this library. This is an implementation of the * [Quantiles of a Population](http://en.wikipedia.org/wiki/Quantile#Quantiles_of_a_population) * algorithm from wikipedia. * * Sample is a one-dimensional array of numbers, * and p is either a decimal number from 0 to 1 or an array of decimal * numbers from 0 to 1. * In terms of a k/q quantile, p = k/q - it's just dealing with fractions or dealing * with decimal values. * When p is an array, the result of the function is also an array containing the appropriate * quantiles in input order * * @param {Array} sample a sample from the population * @param {number} p the desired quantile, as a number between 0 and 1 * @returns {number} quantile * @example * var data = [3, 6, 7, 8, 8, 9, 10, 13, 15, 16, 20]; * quantile(data, 1); //= max(data); * quantile(data, 0); //= min(data); * quantile(data, 0.5); //= 9 */ function quantile(sample, p) { // We can't derive quantiles from an empty list if (sample.length === 0) { return null; } // Sort a copy of the array. We'll need a sorted array to index // the values in sorted order. var sorted = numericSort(sample); if (p.length) { // Initialize the result array var results = []; // For each requested quantile for (var i = 0; i < p.length; i++) { results[i] = quantileSorted(sorted, p[i]); } return results; } else { return quantileSorted(sorted, p); } } module.exports = quantile; },{"./numeric_sort":26,"./quantile_sorted":31}],31:[function(require,module,exports){ 'use strict'; /** * This is the internal implementation of quantiles: when you know * that the order is sorted, you don't need to re-sort it, and the computations * are faster. * * @param {Array} sample input data * @param {number} p desired quantile: a number between 0 to 1, inclusive * @returns {number} quantile value * @example * var data = [3, 6, 7, 8, 8, 9, 10, 13, 15, 16, 20]; * quantileSorted(data, 1); //= max(data); * quantileSorted(data, 0); //= min(data); * quantileSorted(data, 0.5); //= 9 */ function quantileSorted(sample, p) { var idx = (sample.length) * p; if (p < 0 || p > 1) { return null; } else if (p === 1) { // If p is 1, directly return the last element return sample[sample.length - 1]; } else if (p === 0) { // If p is 0, directly return the first element return sample[0]; } else if (idx % 1 !== 0) { // If p is not integer, return the next element in array return sample[Math.ceil(idx) - 1]; } else if (sample.length % 2 === 0) { // If the list has even-length, we'll take the average of this number // and the next value, if there is one return (sample[idx - 1] + sample[idx]) / 2; } else { // Finally, in the simple case of an integer value // with an odd-length list, return the sample value at the index. return sample[idx]; } } module.exports = quantileSorted; },{}],32:[function(require,module,exports){ 'use strict'; /** * The [R Squared](http://en.wikipedia.org/wiki/Coefficient_of_determination) * value of data compared with a function `f` * is the sum of the squared differences between the prediction * and the actual value. * * @param {Array>} data input data: this should be doubly-nested * @param {Function} func function called on `[i][0]` values within the dataset * @returns {number} r-squared value * @example * var samples = [[0, 0], [1, 1]]; * var regressionLine = linearRegressionLine(linearRegression(samples)); * rSquared(samples, regressionLine); //= 1 this line is a perfect fit */ function rSquared(data, func) { if (data.length < 2) { return 1; } // Compute the average y value for the actual // data set in order to compute the // _total sum of squares_ var sum = 0, average; for (var i = 0; i < data.length; i++) { sum += data[i][1]; } average = sum / data.length; // Compute the total sum of squares - the // squared difference between each point // and the average of all points. var sumOfSquares = 0; for (var j = 0; j < data.length; j++) { sumOfSquares += Math.pow(average - data[j][1], 2); } // Finally estimate the error: the squared // difference between the estimate and the actual data // value at each point. var err = 0; for (var k = 0; k < data.length; k++) { err += Math.pow(data[k][1] - func(data[k][0]), 2); } // As the error grows larger, its ratio to the // sum of squares increases and the r squared // value grows lower. return 1 - (err / sumOfSquares); } module.exports = rSquared; },{}],33:[function(require,module,exports){ 'use strict'; /** * The Root Mean Square (RMS) is * a mean function used as a measure of the magnitude of a set * of numbers, regardless of their sign. * This is the square root of the mean of the squares of the * input numbers. * This runs on `O(n)`, linear time in respect to the array * * @param {Array} x input * @returns {number} root mean square * @example * rootMeanSquare([-1, 1, -1, 1]); //= 1 */ function rootMeanSquare(x) { if (x.length === 0) { return null; } var sumOfSquares = 0; for (var i = 0; i < x.length; i++) { sumOfSquares += Math.pow(x[i], 2); } return Math.sqrt(sumOfSquares / x.length); } module.exports = rootMeanSquare; },{}],34:[function(require,module,exports){ 'use strict'; var shuffle = require('./shuffle'); /** * Create a [simple random sample](http://en.wikipedia.org/wiki/Simple_random_sample) * from a given array of `n` elements. * * The sampled values will be in any order, not necessarily the order * they appear in the input. * * @param {Array} array input array. can contain any type * @param {number} n count of how many elements to take * @param {Function} [randomSource=Math.random] an optional source of entropy * instead of Math.random * @return {Array} subset of n elements in original array * @example * var values = [1, 2, 4, 5, 6, 7, 8, 9]; * sample(values, 3); // returns 3 random values, like [2, 5, 8]; */ function sample(array, n, randomSource) { // shuffle the original array using a fisher-yates shuffle var shuffled = shuffle(array, randomSource); // and then return a subset of it - the first `n` elements. return shuffled.slice(0, n); } module.exports = sample; },{"./shuffle":40}],35:[function(require,module,exports){ 'use strict'; var sampleCovariance = require('./sample_covariance'); var sampleStandardDeviation = require('./sample_standard_deviation'); /** * The [correlation](http://en.wikipedia.org/wiki/Correlation_and_dependence) is * a measure of how correlated two datasets are, between -1 and 1 * * @param {Array} x first input * @param {Array} y second input * @returns {number} sample correlation * @example * var a = [1, 2, 3, 4, 5, 6]; * var b = [2, 2, 3, 4, 5, 60]; * sampleCorrelation(a, b); //= 0.691 */ function sampleCorrelation(x, y) { var cov = sampleCovariance(x, y), xstd = sampleStandardDeviation(x), ystd = sampleStandardDeviation(y); if (cov === null || xstd === null || ystd === null) { return null; } return cov / xstd / ystd; } module.exports = sampleCorrelation; },{"./sample_covariance":36,"./sample_standard_deviation":38}],36:[function(require,module,exports){ 'use strict'; var mean = require('./mean'); /** * [Sample covariance](https://en.wikipedia.org/wiki/Sample_mean_and_sampleCovariance) of two datasets: * how much do the two datasets move together? * x and y are two datasets, represented as arrays of numbers. * * @param {Array} x first input * @param {Array} y second input * @returns {number} sample covariance * @example * var x = [1, 2, 3, 4, 5, 6]; * var y = [6, 5, 4, 3, 2, 1]; * sampleCovariance(x, y); //= -3.5 */ function sampleCovariance(x, y) { // The two datasets must have the same length which must be more than 1 if (x.length <= 1 || x.length !== y.length) { return null; } // determine the mean of each dataset so that we can judge each // value of the dataset fairly as the difference from the mean. this // way, if one dataset is [1, 2, 3] and [2, 3, 4], their covariance // does not suffer because of the difference in absolute values var xmean = mean(x), ymean = mean(y), sum = 0; // for each pair of values, the covariance increases when their // difference from the mean is associated - if both are well above // or if both are well below // the mean, the covariance increases significantly. for (var i = 0; i < x.length; i++) { sum += (x[i] - xmean) * (y[i] - ymean); } // this is Bessels' Correction: an adjustment made to sample statistics // that allows for the reduced degree of freedom entailed in calculating // values from samples rather than complete populations. var besselsCorrection = x.length - 1; // the covariance is weighted by the length of the datasets. return sum / besselsCorrection; } module.exports = sampleCovariance; },{"./mean":21}],37:[function(require,module,exports){ 'use strict'; var sumNthPowerDeviations = require('./sum_nth_power_deviations'); var sampleStandardDeviation = require('./sample_standard_deviation'); /** * [Skewness](http://en.wikipedia.org/wiki/Skewness) is * a measure of the extent to which a probability distribution of a * real-valued random variable "leans" to one side of the mean. * The skewness value can be positive or negative, or even undefined. * * Implementation is based on the adjusted Fisher-Pearson standardized * moment coefficient, which is the version found in Excel and several * statistical packages including Minitab, SAS and SPSS. * * @param {Array} x input * @returns {number} sample skewness * @example * var data = [2, 4, 6, 3, 1]; * sampleSkewness(data); //= 0.5901286564 */ function sampleSkewness(x) { // The skewness of less than three arguments is null if (x.length < 3) { return null; } var n = x.length, cubedS = Math.pow(sampleStandardDeviation(x), 3), sumCubedDeviations = sumNthPowerDeviations(x, 3); return n * sumCubedDeviations / ((n - 1) * (n - 2) * cubedS); } module.exports = sampleSkewness; },{"./sample_standard_deviation":38,"./sum_nth_power_deviations":46}],38:[function(require,module,exports){ 'use strict'; var sampleVariance = require('./sample_variance'); /** * The [standard deviation](http://en.wikipedia.org/wiki/Standard_deviation) * is the square root of the variance. * * @param {Array} x input array * @returns {number} sample standard deviation * @example * ss.sampleStandardDeviation([2, 4, 4, 4, 5, 5, 7, 9]); * //= 2.138 */ function sampleStandardDeviation(x) { // The standard deviation of no numbers is null if (x.length <= 1) { return null; } return Math.sqrt(sampleVariance(x)); } module.exports = sampleStandardDeviation; },{"./sample_variance":39}],39:[function(require,module,exports){ 'use strict'; var sumNthPowerDeviations = require('./sum_nth_power_deviations'); /* * The [sample variance](https://en.wikipedia.org/wiki/Variance#Sample_variance) * is the sum of squared deviations from the mean. The sample variance * is distinguished from the variance by the usage of [Bessel's Correction](https://en.wikipedia.org/wiki/Bessel's_correction): * instead of dividing the sum of squared deviations by the length of the input, * it is divided by the length minus one. This corrects the bias in estimating * a value from a set that you don't know if full. * * References: * * [Wolfram MathWorld on Sample Variance](http://mathworld.wolfram.com/SampleVariance.html) * * @param {Array} x input array * @return {number} sample variance * @example * sampleVariance([1, 2, 3, 4, 5]); //= 2.5 */ function sampleVariance(x) { // The variance of no numbers is null if (x.length <= 1) { return null; } var sumSquaredDeviationsValue = sumNthPowerDeviations(x, 2); // this is Bessels' Correction: an adjustment made to sample statistics // that allows for the reduced degree of freedom entailed in calculating // values from samples rather than complete populations. var besselsCorrection = x.length - 1; // Find the mean value of that list return sumSquaredDeviationsValue / besselsCorrection; } module.exports = sampleVariance; },{"./sum_nth_power_deviations":46}],40:[function(require,module,exports){ 'use strict'; var shuffleInPlace = require('./shuffle_in_place'); /* * A [Fisher-Yates shuffle](http://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle) * is a fast way to create a random permutation of a finite set. This is * a function around `shuffle_in_place` that adds the guarantee that * it will not modify its input. * * @param {Array} sample an array of any kind of element * @param {Function} [randomSource=Math.random] an optional entropy source * @return {Array} shuffled version of input * @example * var shuffled = shuffle([1, 2, 3, 4]); * shuffled; // = [2, 3, 1, 4] or any other random permutation */ function shuffle(sample, randomSource) { // slice the original array so that it is not modified sample = sample.slice(); // and then shuffle that shallow-copied array, in place return shuffleInPlace(sample.slice(), randomSource); } module.exports = shuffle; },{"./shuffle_in_place":41}],41:[function(require,module,exports){ 'use strict'; /* * A [Fisher-Yates shuffle](http://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle) * in-place - which means that it **will change the order of the original * array by reference**. * * This is an algorithm that generates a random [permutation](https://en.wikipedia.org/wiki/Permutation) * of a set. * * @param {Array} sample input array * @param {Function} [randomSource=Math.random] an optional source of entropy * @returns {Array} sample * @example * var sample = [1, 2, 3, 4]; * shuffleInPlace(sample); * // sample is shuffled to a value like [2, 1, 4, 3] */ function shuffleInPlace(sample, randomSource) { // a custom random number source can be provided if you want to use // a fixed seed or another random number generator, like // [random-js](https://www.npmjs.org/package/random-js) randomSource = randomSource || Math.random; // store the current length of the sample to determine // when no elements remain to shuffle. var length = sample.length; // temporary is used to hold an item when it is being // swapped between indices. var temporary; // The index to swap at each stage. var index; // While there are still items to shuffle while (length > 0) { // chose a random index within the subset of the array // that is not yet shuffled index = Math.floor(randomSource() * length--); // store the value that we'll move temporarily temporary = sample[length]; // swap the value at `sample[length]` with `sample[index]` sample[length] = sample[index]; sample[index] = temporary; } return sample; } module.exports = shuffleInPlace; },{}],42:[function(require,module,exports){ 'use strict'; /** * For a sorted input, counting the number of unique values * is possible in constant time and constant memory. This is * a simple implementation of the algorithm. * * Values are compared with `===`, so objects and non-primitive objects * are not handled in any special way. * * @param {Array} input an array of primitive values. * @returns {number} count of unique values * @example * sortedUniqueCount([1, 2, 3]); // 3 * sortedUniqueCount([1, 1, 1]); // 1 */ function sortedUniqueCount(input) { var uniqueValueCount = 0, lastSeenValue; for (var i = 0; i < input.length; i++) { if (i === 0 || input[i] !== lastSeenValue) { lastSeenValue = input[i]; uniqueValueCount++; } } return uniqueValueCount; } module.exports = sortedUniqueCount; },{}],43:[function(require,module,exports){ 'use strict'; var variance = require('./variance'); /** * The [standard deviation](http://en.wikipedia.org/wiki/Standard_deviation) * is the square root of the variance. It's useful for measuring the amount * of variation or dispersion in a set of values. * * Standard deviation is only appropriate for full-population knowledge: for * samples of a population, {@link sampleStandardDeviation} is * more appropriate. * * @param {Array} x input * @returns {number} standard deviation * @example * var scores = [2, 4, 4, 4, 5, 5, 7, 9]; * variance(scores); //= 4 * standardDeviation(scores); //= 2 */ function standardDeviation(x) { // The standard deviation of no numbers is null if (x.length === 0) { return null; } return Math.sqrt(variance(x)); } module.exports = standardDeviation; },{"./variance":49}],44:[function(require,module,exports){ 'use strict'; var SQRT_2PI = Math.sqrt(2 * Math.PI); function cumulativeDistribution(z) { var sum = z, tmp = z; // 15 iterations are enough for 4-digit precision for (var i = 1; i < 15; i++) { tmp *= z * z / (2 * i + 1); sum += tmp; } return Math.round((0.5 + (sum / SQRT_2PI) * Math.exp(-z * z / 2)) * 1e4) / 1e4; } /** * A standard normal table, also called the unit normal table or Z table, * is a mathematical table for the values of Φ (phi), which are the values of * the cumulative distribution function of the normal distribution. * It is used to find the probability that a statistic is observed below, * above, or between values on the standard normal distribution, and by * extension, any normal distribution. * * The probabilities are calculated using the * [Cumulative distribution function](https://en.wikipedia.org/wiki/Normal_distribution#Cumulative_distribution_function). * The table used is the cumulative, and not cumulative from 0 to mean * (even though the latter has 5 digits precision, instead of 4). */ var standardNormalTable = []; for (var z = 0; z <= 3.09; z += 0.01) { standardNormalTable.push(cumulativeDistribution(z)); } module.exports = standardNormalTable; },{}],45:[function(require,module,exports){ 'use strict'; /** * The [sum](https://en.wikipedia.org/wiki/Summation) of an array * is the result of adding all numbers together, starting from zero. * * This runs on `O(n)`, linear time in respect to the array * * @param {Array} x input * @return {number} sum of all input numbers * @example * console.log(sum([1, 2, 3])); // 6 */ function sum(x) { var value = 0; for (var i = 0; i < x.length; i++) { value += x[i]; } return value; } module.exports = sum; },{}],46:[function(require,module,exports){ 'use strict'; var mean = require('./mean'); /** * The sum of deviations to the Nth power. * When n=2 it's the sum of squared deviations. * When n=3 it's the sum of cubed deviations. * * @param {Array} x * @param {number} n power * @returns {number} sum of nth power deviations * @example * var input = [1, 2, 3]; * // since the variance of a set is the mean squared * // deviations, we can calculate that with sumNthPowerDeviations: * var variance = sumNthPowerDeviations(input) / input.length; */ function sumNthPowerDeviations(x, n) { var meanValue = mean(x), sum = 0; for (var i = 0; i < x.length; i++) { sum += Math.pow(x[i] - meanValue, n); } return sum; } module.exports = sumNthPowerDeviations; },{"./mean":21}],47:[function(require,module,exports){ 'use strict'; var standardDeviation = require('./standard_deviation'); var mean = require('./mean'); /** * This is to compute [a one-sample t-test](https://en.wikipedia.org/wiki/Student%27s_t-test#One-sample_t-test), comparing the mean * of a sample to a known value, x. * * in this case, we're trying to determine whether the * population mean is equal to the value that we know, which is `x` * here. usually the results here are used to look up a * [p-value](http://en.wikipedia.org/wiki/P-value), which, for * a certain level of significance, will let you determine that the * null hypothesis can or cannot be rejected. * * @param {Array} sample an array of numbers as input * @param {number} x expected vale of the population mean * @returns {number} value * @example * tTest([1, 2, 3, 4, 5, 6], 3.385); //= 0.16494154 */ function tTest(sample, x) { // The mean of the sample var sampleMean = mean(sample); // The standard deviation of the sample var sd = standardDeviation(sample); // Square root the length of the sample var rootN = Math.sqrt(sample.length); // Compute the known value against the sample, // returning the t value return (sampleMean - x) / (sd / rootN); } module.exports = tTest; },{"./mean":21,"./standard_deviation":43}],48:[function(require,module,exports){ 'use strict'; var mean = require('./mean'); var sampleVariance = require('./sample_variance'); /** * This is to compute [two sample t-test](http://en.wikipedia.org/wiki/Student's_t-test). * Tests whether "mean(X)-mean(Y) = difference", ( * in the most common case, we often have `difference == 0` to test if two samples * are likely to be taken from populations with the same mean value) with * no prior knowledge on standard deviations of both samples * other than the fact that they have the same standard deviation. * * Usually the results here are used to look up a * [p-value](http://en.wikipedia.org/wiki/P-value), which, for * a certain level of significance, will let you determine that the * null hypothesis can or cannot be rejected. * * `diff` can be omitted if it equals 0. * * [This is used to confirm or deny](http://www.monarchlab.org/Lab/Research/Stats/2SampleT.aspx) * a null hypothesis that the two populations that have been sampled into * `sampleX` and `sampleY` are equal to each other. * * @param {Array} sampleX a sample as an array of numbers * @param {Array} sampleY a sample as an array of numbers * @param {number} [difference=0] * @returns {number} test result * @example * ss.tTestTwoSample([1, 2, 3, 4], [3, 4, 5, 6], 0); //= -2.1908902300206643 */ function tTestTwoSample(sampleX, sampleY, difference) { var n = sampleX.length, m = sampleY.length; // If either sample doesn't actually have any values, we can't // compute this at all, so we return `null`. if (!n || !m) { return null; } // default difference (mu) is zero if (!difference) { difference = 0; } var meanX = mean(sampleX), meanY = mean(sampleY); var weightedVariance = ((n - 1) * sampleVariance(sampleX) + (m - 1) * sampleVariance(sampleY)) / (n + m - 2); return (meanX - meanY - difference) / Math.sqrt(weightedVariance * (1 / n + 1 / m)); } module.exports = tTestTwoSample; },{"./mean":21,"./sample_variance":39}],49:[function(require,module,exports){ 'use strict'; var sumNthPowerDeviations = require('./sum_nth_power_deviations'); /** * The [variance](http://en.wikipedia.org/wiki/Variance) * is the sum of squared deviations from the mean. * * This is an implementation of variance, not sample variance: * see the `sampleVariance` method if you want a sample measure. * * @param {Array} x a population * @returns {number} variance: a value greater than or equal to zero. * zero indicates that all values are identical. * @example * ss.variance([1, 2, 3, 4, 5, 6]); //= 2.917 */ function variance(x) { // The variance of no numbers is null if (x.length === 0) { return null; } // Find the mean of squared deviations between the // mean value and each value. return sumNthPowerDeviations(x, 2) / x.length; } module.exports = variance; },{"./sum_nth_power_deviations":46}],50:[function(require,module,exports){ 'use strict'; /** * The [Z-Score, or Standard Score](http://en.wikipedia.org/wiki/Standard_score). * * The standard score is the number of standard deviations an observation * or datum is above or below the mean. Thus, a positive standard score * represents a datum above the mean, while a negative standard score * represents a datum below the mean. It is a dimensionless quantity * obtained by subtracting the population mean from an individual raw * score and then dividing the difference by the population standard * deviation. * * The z-score is only defined if one knows the population parameters; * if one only has a sample set, then the analogous computation with * sample mean and sample standard deviation yields the * Student's t-statistic. * * @param {number} x * @param {number} mean * @param {number} standardDeviation * @return {number} z score * @example * ss.zScore(78, 80, 5); //= -0.4 */ function zScore(x, mean, standardDeviation) { return (x - mean) / standardDeviation; } module.exports = zScore; },{}]},{},[1])(1) });