This example shows how to make a horizontal bar chart with D3.js and Google Fonts. It is derived from example 105 of the screencast Introduction to D3.js.
The data shown in this example is from the List of cities proper by population Wikipedia page. The data used in the original example used city populations from GeoNames, which seemed to be inaccurate for the top 10 cities.
xxxxxxxxxx
<html lang="en">
<head>
<title>CS590DV Final Project Visualization Template</title>
<meta charset="UTF-8">
<link rel="stylesheet" type="text/css" href="https://dc-js.github.io/dc.js/css/bootstrap.min.css">
<link rel="stylesheet" type="text/css" href="https://dc-js.github.io/dc.js/css/dc.css" />
<link rel="stylesheet" type="text/css" href="pca.css" />
<link href='https://fonts.googleapis.com/css?family=Poiret+One' rel='stylesheet' type='text/css'>
<script src="https://dc-js.github.io/dc.js/js/d3.js"></script>
<script src="https://dc-js.github.io/dc.js/js/crossfilter.js"></script>
<script src="https://dc-js.github.io/dc.js/js/dc.js"></script>
<script src="https://cdn.jsdelivr.net/gh/crossfilter/reductio/reductio.js"></script>
<script src="https://npmcdn.com/universe@latest/universe.js"></script>
<script src="https://www.lactame.com/lib/ml/2.0.0/ml.min.js"></script>
<script src="https://code.jquery.com/jquery-3.2.1.min.js"></script>
<script>
$(document).ready(function() {
$("a").on('click', function(event) {
if(this.hash !== "") {
event.preventDefault();
var hash = this.hash;
$('html, body').animate({ scrollTop: $(hash).offset().top }, 800, function(){ window.location.hash = hash; });
}
});
});
</script>
<style> .dc-chart g.row text {fill: black;}
body, html, .container {
height: 100%;
}
body {
background-image: url(bgi2.jpg);
background-size: cover;
background-attachment: fixed;
}
#ch1{
width:555px;
height:415px;
margin: 1%;
padding:0;
border:0;
align:center;
float:left;
}
#ch2{
width:555px;
height:415px;
margin: 1%;
padding:0;
border:0;
float:left;
}
#opt {
width: 8em;
font-size: 10px;
margin: 2px;
padding: 0px;
}
#sec {
width: 30%;
height:60px;
float:left;
font-size: 10px;
}
#annotate{
width:100%;
font-size:10px;
color:black ;
overflow:scroll;
height:120px
}
h1{
font-size: 50px;
align:center;
color:crimson;
}
h3{
font-size: 40px;
align:left;
color:black;
}
h5{
font-size: 10px;
margin: 2px;
padding: 0px;
}
.dc-chart g.row text {
fill: black;
}
.dropdown {
border: 1px solid #ccc;
width: 8em;
font-size: 10px;
border-radius: 3px;
overflow: visible;
}
.container{
width:1200px;
}
input{
width: 4.5em;
font-size: 10px;
}
.form{
width:10%;
}
#nav {
padding: 0;
margin-left: auto;
margin-right: auto;
margin-top: 15px;
text-align: right;
}
#logo {
padding: 0;
margin-left: auto;
margin-right: auto;
margin-top: 20px;
text-align: center;
}
#logo img {
height: 125px;
width: 125px;
vertical-align: middle;
}
#nav img {
vertical-align: bottom;
}
.axis .label {
font-size: 10pt;
}
.axis path, .axis line {
fill: none;
stroke: #000;
shape-rendering: crispEdges;
}
.y.axis path, .y.axis line {
stroke: none;
}
h6 span {
font-size:14px;
font-weight:normal;
}
h2 {
float: right;
}
h4 span {
font-size:14px;
font-weight:normal;
}
</style>
</head>
<body><div id="section1" class="container" >
<div align="left"><h3 style="color:magenta">Feature Selection and Analysis</h3></div>
<div align="center">
<img id="feature" src="feature.jpg" width="700">
</div>
<div id="nav"></div>
<p style="color:black"><strong>Feature selection methods aid you in your mission to create an accurate predictive model. They help you by choosing features that will give you as good or better accuracy whilst requiring less data.
Feature selection methods can be used to identify and remove unneeded, irrelevant and redundant attributes from data that do not contribute to the accuracy of a predictive model or may in fact decrease the accuracy of the model.
Fewer attributes is desirable because it reduces the complexity of the model, and a simpler model is simpler to understand and explain.</strong>
</p>
<p style="color:black"><strong>
Information gain looks at each feature in isolation, computes its information gain and measures how important and relevant it is to the class label (alert type). Computing the information gain for a feature involves computing the entropy of the class label (alert type) for the entire dataset and subtracting the conditional entropies for each possible value of that feature. The entropy calculation requires a frequency count of the class label by feature value. In more details, all instances (alerts) are selected with some feature value v, then the number of occurrences of each class within those instances are counted, and the entropy for v is computed. This step is repeated for each possible value v of the feature. The entropy of a subset can be computed more easily by constructing a count matrix, which tallies the class membership of the training examples by feature value.</strong>
</p>
<div align="center">
<img id="IG" src="IG.png" width="800">
</div>
<p style="color:black"><strong>
Another approach is to reduce the dimensionality of the feature space and poke around in this reduced feature space. A basic technique well-suited for this problem is the Principal Component Analysis which tries to find the directions of most variation in the data set.</strong></p>
<div>
<div id="ch1">
<div align="center" ><h2>PCA-Transformed Data</h2></div>
<div align="center" id="scatter"></div>
</div>
<div id="ch2">
<div align="center" ><h2>Selection-Enabled Row Chart</h2></div>
<div align="center" id="row" style="margin-bottom:20px"></div>
<br><br>
<div align="center" ><h2>Features to Include in the PCA</h2></div>
<table align="center">
<tr>
<td><input class="pcaDimensions" type="checkbox" onchange="draw_graphs(false)" value="age" checked="true"> Age<br></td>
<td><input class="pcaDimensions" type="checkbox" onchange="draw_graphs(false)" value="duration" checked="true"> Duration<br></td>
</tr><tr>
<td><input class="pcaDimensions" type="checkbox" onchange="draw_graphs(false)" value="campaign"> Campaign<br></td>
<td><input class="pcaDimensions" type="checkbox" onchange="draw_graphs(false)" value="consumer confidence index"> consumer confidence index<br></td>
</tr><tr>
<td><input class="pcaDimensions" type="checkbox" onchange="draw_graphs(false)" value="consumer price index"> consumer price index<br></td>
<td><input class="pcaDimensions" type="checkbox" onchange="draw_graphs(false)" value="employment variation rate"> employment variation rate<br></td>
</tr><tr>
<td><input class="pcaDimensions" type="checkbox" onchange="draw_graphs(false)" value="employees"> employees<br></td>
<td><input class="pcaDimensions" type="checkbox" onchange="draw_graphs(false)" value="euribor3m"> euribor3m<br></td>
</tr><tr>
<td><input class="pcaDimensions" type="checkbox" onchange="draw_graphs(false)" value="Lifetime Post Impressions by people who have liked your Page"> Lifetime Post Impressions by people who have liked your Page<br></td>
<td><input class="pcaDimensions" type="checkbox" onchange="draw_graphs(false)" value="pdays"> pdays<br></td>
</tr><tr>
</tr>
</table>
</div>
<script type="text/javascript">
var var0="y";
var var2= "job";
var var3= "marital";
var var4="month";
var var5="weekday";
var var6="education";
var var7="housing";
var var8="loan";
var var9="default";
var var1="contact";
var var20="poutcome";
var var10="consumer price index";
var var11="employment variation rate";
var var12="campaign";
var var13="previous";
var var14="pdays";
var var15="age";
var var16="duration";
var var17="euribor3m";
var var18="employees";
var var19="consumer confidence index";
// Define the charts globally so that they don't get redefined across multiple calls to draw_graphs()
var scatterplot = dc.scatterPlot("#scatter"),
rowChart = dc.rowChart("#row");
// A helper function to get the list of attributes that should be included in the PCA
function getPCACheckboxValues(){
var pcaDimensionCheckboxes=document.getElementsByClassName("pcaDimensions"),
pcaDimensions=[];
for(var i=0; i<pcaDimensionCheckboxes.length; i++){
if(pcaDimensionCheckboxes[i].checked){
pcaDimensions.push(pcaDimensionCheckboxes[i].value);
}
}
return pcaDimensions;
}
// Functions to access the quantities needed to update the PCA scatterplot
function pcaAccessor(d){return [d.pcaCoords.c1, d.pcaCoords.c2, d.recId];};
function pcaAccessorC1(d){return d.pcaCoords.c1;};
function pcaAccessorC2(d){return d.pcaCoords.c2;};
// Function that acts as a callback for checkboxes and is used to initialize the page
function draw_graphs(redrawAll){
/* Load the data. */
d3.csv("dataset_Facebook_no_na.csv", function(dataset) {
dataset.forEach(function(d,i) {
d[var4]=+d[var4];
d[var5]=+d[var5];
d[var10]=+d[var10];
d[var11]=+d[var11];
d[var12]=+d[var12];
d[var13]=+d[var13];
d[var14]=+d[var14];
d[var15]=+d[var15];
d[var16]=+d[var16];
d[var17]=+d[var17];
d[var18]=+d[var18];
d[var19]=+d[var19];
/* Here we're creating a placeholder for the PCA coordinate values. We also need to create a record
ID so that when we have the filtered data, we can know which records to update in the full dataset.
Later, we'll have a callback that will change the values of c1 and c2, replacing them with the
actual PCA components. */
d.pcaCoords={c1:0,c2:0};
d.recId=i;
});
// Create a crossfilter index
var ndx = crossfilter(dataset);
/* Create a dimension that groups by (c1,c2,recID), which ensures that every record has a unique group.
Then, we create the scatterplot, which we'll modify in the callback functions. */
var pcaDim=ndx.dimension(pcaAccessor),
pcaGroup=pcaDim.group();
scatterplot
.dimension(pcaDim).group(pcaGroup)
.x(d3.scale.linear().domain([1.1*d3.min(dataset,pcaAccessorC1), 1.1*d3.max(dataset,pcaAccessorC1)]))
.y(d3.scale.linear().domain([1.1*d3.min(dataset,pcaAccessorC2), 1.1*d3.max(dataset,pcaAccessorC2)]))
.xAxisLabel("First Principal Component")
.yAxisLabel("Second Principal Component")
.width(550).height(550)
.clipPadding(10)
.excludedOpacity(0.5)
/* This is the callback to recompute the PCA in response to changes in the filter/checkboxes. Every chart
that can update the crossfilter must either A) specify this function as the callback for the "filtered"
event, or B) use a callback function that subsequently calls this function. Otherwise, the PCA will not
be recomputed on the newly filtered data. */
function update_pca_coords() {
/* Retrieve the list of attributes that should be included in the PCA. */
var pcaDimensions = getPCACheckboxValues();
/* Get the data that satisfies the current filters. Then, create a PCA object and use it to transform the
data by representing it on the principal components (this is done using pcaObject.predict()). */
var filteredData = pcaDim.top(Infinity),
pcaData = filteredData.map(function(d){return pcaDimensions.map(function(e){ return d[e];});}),
pcaRecIDs = filteredData.map(function(d){return d.recId;}),
pcaObject = new ML.Stat.PCA(pcaData, {}),
pcaProjData = pcaObject.predict(pcaData);
/* For each record in the filtered set, update its PCA coordinates in the dataset. */
for(var i=0; i<pcaProjData.length; i++){
var id = pcaRecIDs[i];
dataset[id].pcaCoords.c1 = pcaProjData[i][0];
dataset[id].pcaCoords.c2 = pcaProjData[i][1];
}
/* Redefine the pca dimension and group to incorporate the newly computed values. */
pcaDim=ndx.dimension(pcaAccessor);
pcaGroup=pcaDim.group();
/* Finally, replace the scatterplot's dimension and group with the new ones. */
scatterplot
.dimension(pcaDim).group(pcaGroup)
.x(d3.scale.linear().domain([1.1*d3.min(dataset, pcaAccessorC1), 1.1*d3.max(dataset, pcaAccessorC1)]))
.y(d3.scale.linear().domain([1.1*d3.min(dataset, pcaAccessorC2), 1.1*d3.max(dataset, pcaAccessorC2)]))
.data(dataset.map(function(d){return {key:pcaAccessor(d), value:1};}));
};
// Call the update function to get the correct PCA coordinates
update_pca_coords();
// Specify a bar chart to demonstrate how the PCA updates based on the filters
var rowDim=ndx.dimension(function(d) {return d["marital"];}),
rowgroup=rowDim.group().reduce(
function (p, v) {
++p.count;
p.sum+= +v["duration"];
p.average = Math.round(p.sum / p.count);
return p;
},
function (p, v) {
--p.count;
p.sum-= +v["duration"];
p.average = Math.round(p.sum / p.count);
return p;
},
function () {
return {
count: 0,
sum: 0,
average: 0
};
}
);
rowChart
.width(550).height(250)
.dimension(rowDim)
.group(rowgroup)
.valueAccessor(function(p){return p.value["average"];})
.rowsCap(16)
.title(function(d) {
return "job: "+ (d.key) + "\nAverage of \"duration\": "+ ( d.value["average"]);})
.elasticX(true)
.on("filtered", update_pca_coords); // IMPORTANT: Make sure update_pca gets called on filter updates ***
/* Redraw the whole screen if needed, otherwise just update the PCA scatterplot using its internal render
function (which causes it to update smoothly when checkboxes are toggled). */
if(redrawAll){
dc.renderAll();
}
scatterplot.redrawGroup();
});
};
draw_graphs(true);
</script>
</div>
</body>
</html>
Updated missing url https://rawgit.com/crossfilter/reductio/master/reductio.js to https://cdn.jsdelivr.net/gh/crossfilter/reductio/reductio.js
https://dc-js.github.io/dc.js/js/d3.js
https://dc-js.github.io/dc.js/js/crossfilter.js
https://dc-js.github.io/dc.js/js/dc.js
https://rawgit.com/crossfilter/reductio/master/reductio.js
https://npmcdn.com/universe@latest/universe.js
https://www.lactame.com/lib/ml/2.0.0/ml.min.js
https://code.jquery.com/jquery-3.2.1.min.js