This is an example of trimming outliers from datasets to allow the plotting of a histogram. The synthetic dataset in random.csv has had some obvious outliers added to it making it difficult to plot using d3's native histogram. Chauvenet's criterion has been used to trim the outliers allowing a 'better' representation of the data. This gist/bl.ock has been created in response to this Stack Overflow question.
To allow implementation of Chauvenet's criterion required use of Jason Davies science library and the histogram is based on Mike Bostocks example here.
xxxxxxxxxx
<html>
<meta charset="utf-8">
<head>
<script src="https://d3js.org/d3.v3.min.js"></script>
<style type="text/css">
body {
font: 10px sans-serif;
}
.bar rect {
fill: steelblue;
shape-rendering: crispEdges;
}
.bar text {
fill: #fff;
}
.axis path, .axis line {
fill: none;
stroke: #000;
shape-rendering: crispEdges;
}
</style>
</head>
<body>
<div style="width: 500px; height; 500px; position: absolute; top: 150px; background-color: red"></div>
<script type="text/javascript">
d3.csv("random.csv", function(error, values){
var dataArray =[];
values.forEach(function (d,i) {
dataArray[i] = +d.x
});
// trim outliers
var trimmed = chauvenet(dataArray);
var margin = {top: 10, right: 30, bottom: 30, left: 30},
width = 960 - margin.left - margin.right,
height = 500 - margin.top - margin.bottom;
var x = d3.scale.linear()
.domain(d3.extent(trimmed))
.range([0, width]);
// Generate a histogram using twenty uniformly-spaced bins.
var data = d3.layout.histogram()
.bins(x.ticks(20))(trimmed);
var y = d3.scale.linear()
.domain([0, d3.max(data, function(d) { return d.y; })])
.range([height, 0]);
var xAxis = d3.svg.axis()
.scale(x)
.orient("bottom");
var svg = d3.select("body").append("svg")
.attr("width", width + margin.left + margin.right)
.attr("height", height + margin.top + margin.bottom)
.append("g")
.attr("transform", "translate(" + margin.left + "," + margin.top + ")");
var bar = svg.selectAll(".bar")
.data(data)
.enter().append("g")
.attr("class", "bar")
.attr("transform", function(d) { return "translate(" + x(d.x) + "," + y(d.y) + ")"; });
bar.append("rect")
.attr("x", 1)
.attr("width", function (d,i) { return x(data[i].dx + d3.min(trimmed)) - 1; })
.attr("height", function(d) { return height - y(d.y); });
bar.append("text")
.attr("dy", ".75em")
.attr("y", 6)
.attr("x", x(data[0].dx + d3.min(trimmed)) / 2)
.attr("text-anchor", "middle")
.text(function(d) { return formatCount(d.y); });
svg.append("g")
.attr("class", "x axis")
.attr("transform", "translate(0," + height + ")")
.call(xAxis);
});
// Borrowed from Jason Davies science library https://github.com/jasondavies/science.js/blob/master/science.v1.js
variance = function(x) {
var n = x.length;
if (n < 1) return NaN;
if (n === 1) return 0;
var mean = d3.mean(x),
i = -1,
s = 0;
while (++i < n) {
var v = x[i] - mean;
s += v * v;
}
return s / (n - 1);
};
//A test for outliers https://en.wikipedia.org/wiki/Chauvenet%27s_criterion
function chauvenet (x) {
var dMax = 3;
var mean = d3.mean(x);
var stdv = Math.sqrt(variance(x));
var counter = 0;
var temp = [];
for (var i = 0; i < x.length; i++) {
if(dMax > (Math.abs(x[i] - mean))/stdv) {
temp[counter] = x[i];
counter = counter + 1;
}
};
return temp
}
</script>
</body>
</html>
Modified http://d3js.org/d3.v3.min.js to a secure url
https://d3js.org/d3.v3.min.js