This gist contains a visualization of results from analysing DonorsChoose.org project data with Spark. The Spark code and notebooks can be found in the following repo: https://github.com/Jay-Oh-eN/data-scientists-guide-apache-spark
For the workshop (and after) we will use a Gitter chatroom to keep the conversation going: https://gitter.im/Jay-Oh-eN/data-scientists-guide-apache-spark.
And/or please do not hesitate to reach out to me directly via email at jondinu@gmail.com or over twitter @clearspandex
The presentation can be found on Slideshare here.
Data: http://data.donorschoose.org/open-data/project-data/
forked from Jay-Oh-eN's block: The Data Scientist's Guide to Apache Spark: Interactive Visualization of Results
xxxxxxxxxx
<html>
<head>
<meta charset="utf-8">
<script src="https://d3js.org/d3.v3.min.js"></script>
<style>
body {
font-family: futura;
}
.axis {
font-family: arial;
font-size: 0.7em;
}
.axis text {
font-size: 0.75em;
stroke: none;
font-family: 'Gill Sans', 'Gill Sans MT', Calibri, sans-serif;
}
text {
fill: black;
}
path {
fill: none;
stroke: black;
stroke-width: 1px;
}
.tick {
fill: none;
stroke: black;
}
rect {
fill: #4eb0bb;
}
#tooltip {
position: absolute;
width: 90px;
height: auto;
padding: 5px;
margin-bottom: 10px;
background-color: white;
-webkit-box-shadow: 4px 4px 10px rgba(0, 0, 0, 0.4);
-moz-box-shadow: 4px 4px 10px rgba(0, 0, 0, 0.4);
box-shadow: 4px 4px 10px rgba(0, 0, 0, 0.4);
pointer-events: none;
}
#tooltip.hidden {
display: none;
}
#tooltip p {
margin: 0;
font-family: sans-serif;
font-size: 16px;
line-height: 20px;
}
</style>
<script type="text/javascript">
// https://github.com/mbostock/d3/wiki/Time-Formatting
format = d3.time.format("%Y-%m");
function draw(data) {
"use strict";
// add a null data element so the axis extends past the last
// bar of the histogram.
data.push({ count: 0, index: 5000});
// set margins according to Mike Bostock's margin conventions
// https://bl.ocks.org/mbostock/3019563
var margin = {top: 25, right: 40, bottom: 50, left: 75};
// set height and width of chart
var width = 960 - margin.left - margin.right,
height = 500 - margin.top - margin.bottom;
// specify column we want to plot
var field = 'count';
// append the SVG tag with height and width to accommodate for margins
var svg = d3.select("body")
.append("svg")
.attr("width", width + margin.left + margin.right)
.attr("height", height + margin.top + margin.bottom)
.append('g')
.attr('class','chart')
.attr("transform", "translate(" + margin.left + "," + margin.top + ")");
// Append the title for the graph
svg.append('text')
.attr('x', width / 2)
.attr('y', 0)
.style('text-anchor', 'middle')
.text("DonorsChoose.org Project Price")
// bind our data to svg circles for the scatter plot
svg.selectAll("rect")
.data(data)
.enter()
.append("rect")
// maximum count
var max_y = d3.max(data, function(d) {
return +d[field];
});
// get min/max price
var price_extent = d3.extent(data, function(d){
return +d['index'];
});
// Create x-axis scale mapping dates -> pixels
var price_scale = d3.scale.linear()
.range([0, width])
.domain(price_extent);
// Create y-axis scale mapping price -> pixels
var measure_scale = d3.scale.linear()
.range([height, 0])
.domain([0, max_y]);
// Create D3 axis object from time_scale for the x-axis
var price_axis = d3.svg.axis()
.scale(price_scale)
.tickFormat(d3.format('$'));
// Create D3 axis object from measure_scale for the y-axis
var measure_axis = d3.svg.axis()
.scale(measure_scale)
.orient("left");
// Append SVG to page corresponding to the D3 x-axis
var xaxis = svg.append('g')
.attr('class', 'x axis')
.attr('transform', "translate(0," + height + ")")
.call(price_axis);
// Append SVG to page corresponding to the D3 y-axis
svg.append('g')
.attr('class', 'y axis')
.call(measure_axis);
// add label to y-axis
d3.select(".y.axis")
.append("text")
.attr('class', 'label')
.text("Count")
.attr("transform", "rotate(-90)")
.attr("x", -(height / 2)).attr('y', -40)
.style("text-anchor", "middle")
.style("font-size", "1.5em");
var bin_width = (width / data.length) - 2;
// based on the data bound to each svg circle,
// change its center-x (cx) and center-y (cy)
// coordinates
var bars = d3.selectAll('rect')
.attr('x', function(d) {
return price_scale(d['index']) + 1;
})
.attr('width', bin_width)
.attr('y', function(d) {
return measure_scale(+d[field]);
})
.attr('height', function(d) {
return height - measure_scale(+d[field]);
});
bars.on("mousemove", function(d) {
// get current positions
var pos = d3.mouse(this);
var tool = d3.select("#tooltip");
var format = d3.format('$.2f');
//move the tooltip into position
var xpos = pos[0] + margin.left - parseFloat(tool.style('width')) / 2;
var ypos = pos[1] + margin.top - parseFloat(tool.style('height')) - 15;
// move tooltip div
tool.style("left", xpos + "px")
.style("top", ypos + "px");
// set price display
// tool.select("#key")
// .text(format(d['index']));
//set count value display
tool.select("#value")
.text(d['count']);
// show the tooltip
d3.select("#tooltip").classed("hidden", false);
})
.on("mouseout", function() {
// hide tooltip
d3.select("#tooltip").classed("hidden", true);
});
};
</script>
</head>
<body>
<script type="text/javascript">
// load our data file asynchronously and pass the data
// to the draw function once it is loaded.
d3.json("data.json", draw);
</script>
<div id="tooltip" class='hidden'>
<!-- <p><strong>Price: </strong><span id="key"></span></p> -->
<p style="margin-top: 5px"><strong>Count: </strong><span id="value"></span></p>
</div>
</body>
</html>
Modified http://d3js.org/d3.v3.min.js to a secure url
https://d3js.org/d3.v3.min.js