A visualization to demonstrate how the frequency of vocabulary items (lemmas) in Homer follows a Zipfian distribution (at least for all open, non-functional cateogories). Here lemmas occuring 25 times or less are excluded for performance reasons.
For more information, see Zipf's Law.
xxxxxxxxxx
<html>
<head>
<meta charset="utf-8">
<title>The Zipfian Desert of Homeric Vocabulary Items: The Odyssey</title>
<script src="jquery-2.0.3.min.js" type="text/javascript"></script>
<script src="https://d3js.org/d3.v3.min.js" charset="utf-8"></script>
<script src="bootstrap.min.js" type="text/javascript"></script>
<link href="bootstrap.min.css" rel="stylesheet" type="text/css">
<style type="text/css">
body {
background-color: black;
}
.container {
width: 1120px;
}
.hero-unit {
background-image: url('notebook.png')
}
#NBmore:hover {
cursor: pointer;
}
.nb {
margin: 10px 0px;
}
#scatter {
width: 560px;
height: 480px;
margin: 0 5px 0 5px;
}
.mult {
margin: 0px 5px;
padding: 0px 2px;
display: inline-block;
}
#circle {
display: block;
margin: 0px 10px 0px 570px;
}
.axis path, .axis line {
fill: none;
stroke: #000;
shape-rendering: crispEdges;
}
text {
font-family: sans-serif;
font-size: 8px;
}
text.category {
font-size: 12px;
font-variant: small-caps;
}
</style>
</head>
<body>
<div class="container">
<div class="hero-unit" id="main">
<p class="lead">
The Zipfian Desert: Frequency of Vocabulary Items in Homer's Odyssey <span class="pull-right"><span id="scale">Linear</span> Scale</span>
</p>
<div class="row-fluid">
<div class="alert" id="nb">
<button type="button" class="close" data-dismiss="alert">×</button>
<a id="NBmore"><strong>NB:</strong> click for more info</a>
<div id="NBtext" style="display:none;">
<p class="nb">Excluding lemmas occurring 25 times or fewer...</p>
<p class="nb">See <a href="https://en.wikipedia.org/wiki/Zipf%27s_law" title="Wikipedia: Zipf's Law" target="_blank">Zipf's Law</a> for more information.</p>
</div>
</div>
<div class="row-fluid">
<button id="numLemmas" class="btn">Sort by Number of Lemmas in Category</button>
<button id="cumFreq" class="btn active">Sort by Cumulative Frequency of Lemmas in Category</button>
<button id="toggleScale" class="btn pull-right">Toggle Scale</button>
</div>
</div>
<div id="scatter" class="pull-left">
</div>
</div>
</div>
<script type="text/javascript">
$('#NBmore').click(function() {
$('#NBtext').slideToggle('slow', function() {
});
$('#NBmore').toggle('slow');
});
var w = 410;
var h = 480;
var m = 120;
var pad = 20;
var odyssey;
var typesPOS = 15;
var col = d3.scale.category20();
var uniquePOS = [];
var byPOS = {};
var POScategories = {
'n': 'Nouns',
'v': 'Verbs',
't': 'Participles',
'a': 'Adjectives',
'd': 'Adverbs',
'l': 'Articles',
'g': 'Particles',
'c': 'Conjunctions',
'r': 'Prepositions',
'p': 'Pronouns',
'm': 'Numerals',
'i': 'Interjections',
'e': 'Exclamations',
'u': 'Punctuation',
'dash': 'Other'
};
var currentScale;
var oldClass;
var oldFill;
var multClass;
var firstMult;
var radius = function(i) {
return Math.sqrt((i/Math.PI));
};
d3.csv('odyssey_lemmas.csv', function(data) {
var odyssey = createNodes(data);
bubble(odyssey);
});
var svg = d3.select("#main")
.append("svg:svg")
.attr("width", w)
.attr("height", h)
.attr("id", "circle");
var gridSVG = d3.select('#scatter');
var createNodes = function(data) {
var nodes = [];
for (j = 1; j < data.length; j++) {
var node = {
id: j,
lemma: data[j]['lemma'],
pos: data[j]['POS'],
count: parseInt(data[j]['frequency']),
x: Math.random() * w
};
if (data[j]['frequency'] > 25) {
nodes.push(node);
var pos = data[j]['POS'];
var idx = uniquePOS.indexOf(pos);
if (idx === -1) {
uniquePOS.push(pos);
byPOS[pos] = [];
byPOS[pos].push({
'key': data[j]['lemma'],
'value': parseInt(data[j]['frequency'])
});
}
else {
byPOS[pos].push({
'key': data[j]['lemma'],
'value': parseInt(data[j]['frequency'])
});
}
}
}
typesPOS = uniquePOS.length;
for (j = 0; j < nodes.length; j++) {
nodes[j].y = uniquePOS.indexOf(nodes[j].pos) * h/typesPOS;
}
sortByTotalFrequency(uniquePOS, byPOS);
for (j = 0; j < uniquePOS.length; j++) {
scatter('linear', uniquePOS[j], byPOS[uniquePOS[j]]);
}
return nodes;
};
var sortByTotalFrequency = function(posList, posDict) {
var sorted = [];
for (j = 0; j < uniquePOS.length; j++) {
var lemmas = posDict[posList[j]];
var cumFreq = 0;
var added = false;
for (i = 0; i < lemmas.length; i++) {
cumFreq += lemmas[i].value;
}
lemmas.cumFreq = cumFreq;
if (sorted.length === 0) {
sorted.push(posList[j]);
}
else {
for (i = 0; i < sorted.length; i++) {
var thisCumFreq = posDict[sorted[i]].cumFreq;
if (thisCumFreq < cumFreq) {
sorted.splice(i, 0, posList[j]);
added = true;
break;
}
}
if (!added) {
sorted.push(posList[j]);
}
added = false;
}
}
uniquePOS = sorted;
}
var sortByNumLemmas = function(posList, posDict) {
var sorted = [];
for (j = 0; j < uniquePOS.length; j++) {
var lemmas = posDict[posList[j]];
var currLen = lemmas.length;
var added = false;
if (sorted.length === 0) {
sorted.push(posList[j]);
}
else {
for (i = 0; i < sorted.length; i++) {
if (posDict[sorted[i]].length < currLen) {
sorted.splice(i, 0, posList[j]);
added = true;
break;
}
}
if (!added) {
sorted.push(posList[j]);
}
added = false;
}
}
uniquePOS = sorted;
}
var bubble = function(data) {
var nodes = svg.selectAll("circle")
.data(data)
.enter()
.append("svg:circle")
.attr("r", 0)
.attr("fill", function(d) {
return col(d.pos);
})
.attr("cx", function(d) {
return d.x - (w/2 - d.x);
})
.attr("cy", function(d) {
return d.y - (h/2 - d.y);
})
.attr("r", function(d) {
return radius(d.count);
})
.attr("class", function(d) {
return "POS-" + d.pos;
});
var force = d3.layout.force()
.nodes(data)
.size([w, h])
.charge(function(d) {
return -(Math.pow(radius(d.count), 2.0)/7);
})
.on("tick", function(e) {
nodes
.attr("cx", function(d) {
return d.x + (w/2 - d.x) * e.alpha;
})
.attr("cy", function(d) {
return d.y + (h/2 - d.y) * e.alpha;
});
});
force.start();
$('[class^="POS-"]').hover(function() {
oldClass = $(this).attr('class');
oldFill = $(this).attr('fill');
$('.' + oldClass).attr("class", oldClass + " focus");
svg.selectAll(".focus")
.attr("fill", "black")
.attr("opacity", 0.7);
multClass = oldClass.replace('POS', 'cat');
$('.' + multClass).attr("class", "mult " + multClass + " focus");
gridSVG.selectAll(".focus text.category")
.attr("text-decoration", "underline");
}, function() {
svg.selectAll(".focus")
.attr("fill", oldFill)
.attr("opacity", 1.0);
$('.' + oldClass).attr("class", oldClass);
gridSVG.selectAll(".focus text.category")
.attr("text-decoration", "none");
$('.' + multClass).attr("class", "mult " + multClass);
});
};
var scatter = function(currentScale, posCode, data) {
if (posCode === '-') {
posCode = 'dash';
}
var msvg = gridSVG.append("svg:svg")
.attr("class", "mult " + "cat-" + posCode)
.attr("width", m)
.attr("height", m);
msvg.append("text")
.attr("x", pad * 1.5)
.attr("y", m/2)
.attr("class", "category")
.text(function() {
return POScategories[posCode];
});
var xScale, yScale, xAxis, yAxis;
firstMult = d3.select('.mult');
var current = firstMult.attr("class").replace('mult cat-', '');
if (currentScale === 'log') {
yScale = d3.scale.log()
.domain(d3.extent(data, function(d) {
return d.value;
}))
.range([m - pad, pad]);
xScale = d3.scale.log()
.domain([1, data.length])
.range([pad + 5, m - pad]);
xAxis = d3.svg.axis()
.scale(xScale)
.orient("bottom")
.innerTickSize(0)
.outerTickSize(0)
.tickValues([]);
yAxis = d3.svg.axis()
.scale(yScale)
.orient("left")
.innerTickSize(0)
.outerTickSize(0)
.tickValues([]);
if (current === posCode) {
firstMult.append("text")
.attr("text-anchor", "middle")
.attr("x", 60)
.attr("y", 115)
.text("log(Rank)")
.attr("id", "x-label");
firstMult.append("text")
.attr("x", -90)
.attr("y", 15)
.attr("transform", "rotate(-90)")
.text("log(Frequency)")
.attr("id", "y-label");
}
}
else {
yScale = d3.scale.linear()
.domain(d3.extent(data, function(d) {
return d.value;
}))
.range([m - pad, pad]);
xScale = d3.scale.linear()
.domain([1, data.length])
.range([pad + 5, m - pad]);
if (data.length === 1) {
yScale.domain([0, d3.max(data, function(d) {
return d.value
})]);
}
xAxis = d3.svg.axis()
.scale(xScale)
.orient("bottom")
.ticks(2)
.innerTickSize(0)
.outerTickSize(0)
.tickValues([1, data.length])
.tickFormat(d3.format(".0f"));
yAxis = d3.svg.axis()
.scale(yScale)
.orient("left")
.innerTickSize(0)
.outerTickSize(0)
.tickValues([d3.max(data, function(d) {
return d.value;
})]);
if (current === posCode) {
firstMult.append("text")
.attr("text-anchor", "middle")
.attr("x", 60)
.attr("y", 115)
.text("Rank")
.attr("id", "x-label");
firstMult.append("text")
.attr("x", -80)
.attr("y", 15)
.attr("transform", "rotate(-90)")
.text("Frequency")
.attr("id", "y-label");
}
}
var key = function(d) {
return d.key;
};
msvg.selectAll("circle")
.data(data, key)
.enter()
.append("circle")
.attr("cx", function(d, i) {
return xScale(i + 1);
})
.attr("cy", function(d) {
return yScale(d.value);
})
.attr("r", 2)
// .attr("stroke", "black")
.attr("fill", function(d) {
return col(posCode);
})
.attr("class", posCode);
msvg.append("svg:g")
.attr("class", "x axis " + posCode)
.call(xAxis);
msvg.append("svg:g")
.attr("class", "y axis " + posCode)
.call(yAxis);
d3.selectAll('.x.axis')
// m - pad = 100
.attr("transform", "translate(0, 100)");
d3.selectAll('.y.axis')
// pad = 20
.attr("transform", "translate(25,0)");
};
$('#numLemmas').click(function() {
$('.mult').remove();
$('#cumFreq').removeClass('active');
$('#numLemmas').addClass('active');
sortByNumLemmas(uniquePOS, byPOS);
for (j = 0; j < uniquePOS.length; j++) {
scatter(currentScale, uniquePOS[j], byPOS[uniquePOS[j]]);
}
});
$('#cumFreq').click(function() {
$('.mult').remove();
$('#numLemmas').removeClass('active');
$('#cumFreq').addClass('active');
sortByTotalFrequency(uniquePOS, byPOS);
for (j = 0; j < uniquePOS.length; j++) {
scatter(currentScale, uniquePOS[j], byPOS[uniquePOS[j]]);
}
});
var updateScatter = function(newScale, posCode, data) {
if (posCode === '-') {
posCode = 'dash';
}
var oldData = data;
var xScale, yScale, xAxis, yAxis;
if (newScale === 'log') {
yScale = d3.scale.log()
.domain(d3.extent(data, function(d) {
return d.value;
}))
.range([m - pad, pad]);
xScale = d3.scale.log()
.domain([1, data.length])
.range([pad + 5, m - pad]);
xAxis = d3.svg.axis()
.scale(xScale)
.orient("bottom")
.innerTickSize(0)
.outerTickSize(0)
.tickValues([]);
yAxis = d3.svg.axis()
.scale(yScale)
.orient("left")
.innerTickSize(0)
.outerTickSize(0)
.tickValues([]);
}
else {
yScale = d3.scale.linear()
.domain(d3.extent(data, function(d) {
return d.value;
}))
.range([m - pad, pad]);
xScale = d3.scale.linear()
.domain([1, data.length])
.range([pad + 5, m - pad]);
if (data.length === 1) {
yScale.domain([0, d3.max(data, function(d) {
return d.value
})]);
}
xAxis = d3.svg.axis()
.scale(xScale)
.orient("bottom")
.ticks(2)
.innerTickSize(0)
.outerTickSize(0)
.tickValues([1, data.length])
.tickFormat(d3.format(".0f"));
yAxis = d3.svg.axis()
.scale(yScale)
.orient("left")
.innerTickSize(0)
.outerTickSize(0)
.tickValues([d3.max(data, function(d) {
return d.value;
})]);
}
gridSVG.selectAll("circle." + posCode)
.transition()
.duration(1000)
.attr("cx", function(d, i) {
return xScale(i + 1);
})
.attr("cy", function(d) {
return yScale(d.value);
});
gridSVG.select(".y.axis." + posCode)
.call(yAxis);
gridSVG.select(".x.axis." + posCode)
.call(xAxis);
};
$('#toggleScale').click(function() {
if (currentScale === 'log') {
for (j = 0; j < uniquePOS.length; j++) {
updateScatter('linear', uniquePOS[j], byPOS[uniquePOS[j]]);
}
$('#scale').html('Linear');
currentScale = 'linear';
firstMult.select('#x-label')
.text("Rank");
firstMult.select('#y-label')
.attr("x", -80)
.text("Frequency");
}
else {
for (j = 0; j < uniquePOS.length; j++) {
updateScatter('log', uniquePOS[j], byPOS[uniquePOS[j]]);
}
$('#scale').html('Log');
currentScale = 'log';
firstMult.select('#x-label')
.text("log(Rank)");
firstMult.select('#y-label')
.attr("x", -90)
.text("log(Frequency)");
}
});
</script>
</body>
</html>
Modified http://d3js.org/d3.v3.min.js to a secure url
https://d3js.org/d3.v3.min.js