// # [Jenks natural breaks optimization](http://en.wikipedia.org/wiki/Jenks_natural_breaks_optimization) // // Implementations: [1](http://danieljlewis.org/files/2010/06/Jenks.pdf) (python), // [2](https://github.com/vvoovv/djeo-jenks/blob/master/main.js) (buggy), // [3](https://github.com/simogeo/geostats/blob/master/lib/geostats.js#L407) (works) var jenks = function jenks(data, n_classes) { // Compute the matrices required for Jenks breaks. These matrices // can be used for any classing of data with `classes <= n_classes` function getMatrices(data, n_classes) { // in the original implementation, these matrices are referred to // as `LC` and `OP` // // * lower_class_limits (LC): optimal lower class limits // * variance_combinations (OP): optimal variance combinations for all classes var lower_class_limits = [], variance_combinations = [], // loop counters i, j, // the variance, as computed at each step in the calculation variance = 0; // Initialize and fill each matrix with zeroes for (i = 0; i < data.length + 1; i++) { var tmp1 = [], tmp2 = []; for (j = 0; j < n_classes + 1; j++) { tmp1.push(0); tmp2.push(0); } lower_class_limits.push(tmp1); variance_combinations.push(tmp2); } for (i = 1; i < n_classes + 1; i++) { lower_class_limits[1][i] = 1; variance_combinations[1][i] = 0; // in the original implementation, 9999999 is used but // since Javascript has `Infinity`, we use that. for (j = 2; j < data.length + 1; j++) { variance_combinations[j][i] = Infinity; } } for (var l = 2; l < data.length + 1; l++) { // `SZ` originally. this is the sum of the values seen thus // far when calculating variance. var sum = 0, // `ZSQ` originally. the sum of squares of values seen // thus far sum_squares = 0, // `WT` originally. This is the number of w = 0, // `IV` originally i4 = 0; // in several instances, you could say `Math.pow(x, 2)` // instead of `x * x`, but this is slower in some browsers // introduces an unnecessary concept. for (var m = 1; m < l + 1; m++) { // `III` originally var lower_class_limit = l - m + 1, val = data[lower_class_limit - 1]; // here we're estimating variance for each potential classing // of the data, for each potential number of classes. `w` // is the number of data points considered so far. w++; // increase the current sum and sum-of-squares sum += val; sum_squares += val * val; // the variance at this point in the sequence is the difference // between the sum of squares and the total x 2, over the number // of samples. variance = sum_squares - (sum * sum) / w; i4 = lower_class_limit - 1; if (i4 !== 0) { for (j = 2; j < n_classes + 1; j++) { // if adding this element to an existing class // will increase its variance beyond the limit, break // the class at this point, setting the lower_class_limit // at this point. if (variance_combinations[l][j] >= (variance + variance_combinations[i4][j - 1])) { lower_class_limits[l][j] = lower_class_limit; variance_combinations[l][j] = variance + variance_combinations[i4][j - 1]; } } } } lower_class_limits[l][1] = 1; variance_combinations[l][1] = variance; } // return the two matrices. for just providing breaks, only // `lower_class_limits` is needed, but variances can be useful to // evaluage goodness of fit. return { lower_class_limits: lower_class_limits, variance_combinations: variance_combinations }; } // the second part of the jenks recipe: take the calculated matrices // and derive an array of n breaks. function breaks(data, lower_class_limits, n_classes) { var k = data.length - 1, kclass = [], countNum = n_classes; // the calculation of classes will never include the upper and // lower bounds, so we need to explicitly set them kclass[n_classes] = data[data.length - 1]; kclass[0] = data[0]; // the lower_class_limits matrix is used as indexes into itself // here: the `k` variable is reused in each iteration. while (countNum > 1) { kclass[countNum - 1] = data[lower_class_limits[k][countNum] - 2]; k = lower_class_limits[k][countNum] - 1; countNum--; } return kclass; } if (n_classes > data.length) return null; // sort data in numerical order, since this is expected // by the matrices function data = data.slice().sort(function(a, b) { return a - b; }); // get our basic matrices var matrices = getMatrices(data, n_classes), // we only need lower class limits here lower_class_limits = matrices.lower_class_limits; // extract n_classes out of the computed matrices return breaks(data, lower_class_limits, n_classes); }