Working on Visionscarto.net sitemap.
From tf/idf on articles keywords, compute a cosine similarity, feed that to t-SNE, k-means coloring, extraction of main term(s) per k-means center, Urquhart graph…
Original work by Philippe Rivière for Visionscarto.net. Comments and variants very welcome!
Add in contours using d3-contour (Susie Lu)
Forked from Fil's block: t-SNE with Levenshtein distances
forked from Fil's block: t-SNE site map [UNLISTED]
xxxxxxxxxx
<head>
<meta charset="utf-8">
<script src="https://unpkg.com/d3@4"></script>
<script src="https://unpkg.com/d3-contour"></script>
<script src="https://raw.githack.com/susielu/d3-annotation/master/d3-annotation.min.js"></script>
<style>
body {
margin: 0;
position: fixed;
top: 0;
right: 0;
bottom: 0;
left: 0;
font-family: monospace;
}
path.contour { fill: none; stroke: none; fill-opacity: 0.2;}
</style>
</head>
<body>
<script>
const width = 960,
height = 500,
margin = { top: 20, bottom: 20, left: 120, right: 120 },
scalepop = d3.scalePow().exponent(1/2.5).domain([1, 10000]).range([0, 15]),
scalepop2 = d3.scaleLog().domain([1, 1000]).range([0, 20]),
scalecountry = d3.scaleOrdinal(d3.merge([d3.schemeCategory10, d3.schemeCategory20c])),
centerx = d3.scaleLinear()
.range([margin.left, width - margin.right]),
centery = d3.scaleLinear()
.range([margin.top, height - margin.bottom]);
const voronoi = d3.voronoi();
let tfidf;
d3.json('plan.json', function (articles) {
const data = articles
.map((d, i) => {
return {
lon: Math.random(),
lat: Math.random(),
name: d['title'].replace(/&#\d+;/g, '’'),
props: d3.set(d3.merge([ [ "trad" + (d.trad || d.id) ], d.authors, d.tags ])).values(),
//props: d['title'].substring(0,1),
r: scalepop(+d['views']) + scalepop2(+d['pop']+1),
p: scalepop2(+d['popularity']),
color: scalecountry(''+d.authors)
};
})
.slice(0,400);
/*
const canvas = d3.select("body").append("canvas")
.attr("width", width)
.attr("height", height);
*/
const svg = d3.select("body").append("svg")
.attr("width", width)
.attr("height", height);
const lines = svg.append('g');
const gcontours = svg.append('g');
const garticles = svg.append('g');
const gmeans = svg.append('g');
const gannotations = svg.append("g")
// pos is the array of positions that will be updated by the tsne worker
// start with the geographic coordinates as is (plate-carrée)
// random or [0,0] is fine too
let pos = data.map(d => [Math.random(), Math.random()]);
var K = 20,
centers = d3.range(K).map(i => [0,0]),
colorscale = d3.scaleOrdinal(d3.schemeCategory20c);
let links = [];
const forcetsne = d3.forceSimulation(
data.map(d => (d.x = width / 2, d.y = height / 2, d))
)
//.stop()
.alphaDecay(0.001)
.alpha(0.1)
.force('tsne', function (alpha) {
centerx.domain(d3.extent(pos.map(d => d[0])));
centery.domain(d3.extent(pos.map(d => d[1])));
data.forEach((d, i) => {
d.x += alpha * (centerx(pos[i][0]) - d.x);
d.y += alpha * (centery(pos[i][1]) - d.y);
});
})
.force('collide', d3.forceCollide().radius(d => 1 * (1.5 + d.r)))
.force('kmeans', function(){
// a central point to re-init empty groups
var m = [d3.mean(centers.map(d => d[0] || 0)),
d3.mean(centers.map(d => d[1] || 0))];
// the order is important: move the centers before re-making the groups
// so that the centers follow the general movement and keep "their"
// points, instead of having points pass through them
// 1. move K-centers towards the barycenter of their group
centers.forEach((c,i) => {
c[0] = d3.mean(pos.filter((d,j) => data[j].group == i).map(d => d[0])) || m[0];
c[1] = d3.mean(pos.filter((d,j) => data[j].group == i).map(d => d[1])) || m[1];
});
// 2. group each point according to its closest K-center
data.forEach((d,i) => {
d.group = d3.scan(centers.map(c => {
var dx = pos[i][0] - c[0],
dy = pos[i][1] - c[1];
return (dx*dx + dy*dy);
}));
});
}
)
.on('tick', function () {
// drawcanvas(canvas, data);
drawcontours(gcontours, data);
drawsvg(garticles, data);
drawlines(lines, links);
drawgmeans(gmeans, centers);
});
function drawcanvas(canvas, nodes) {
let context = canvas.node().getContext("2d");
context.clearRect(0, 0, width, width);
for (var i = 0, n = nodes.length; i < n; ++i) {
var node = nodes[i];
context.beginPath();
context.moveTo(node.x, node.y);
context.arc(node.x, node.y, node.r, 0, 2 * Math.PI);
context.lineWidth = 0.5;
context.fillStyle = colorscale(node.group);
context.fill();
}
}
const xcolor = d3.scaleLinear().range(['lightblue', 'red']);
const catcolor = d3.scaleOrdinal(d3.schemeCategory10);
function drawcontours(svg, nodes) {
const densities = d3.contourDensity()
.x(function(d) { return d.x; })
.y(function(d) { return d.y; })
.size([width, height])
.bandwidth(20);
// https://twitter.com/DataToViz/status/885194994860015616
densities.thresholds(4);// + (Math.random() > 0.5 ? 1 : 0))
const path = d3.geoPath();
const words = ['Philippe Rekacewicz', 'Philippe Rivière', 'Agnès Stienne', 'Cristina Del Biaggio', 'Sylvain Lesage'];
const contours = words
//[ 'balkans', 'femmes', 'guerre', 'donnees', 'precurseurs', 'hackers', 'cartographie' ]
. map(word => ({'word': word, 'contour': densities(nodes.filter(d => d.props.indexOf(word) > -1))[0] }))
.filter(d => d.contour)
.map (d => {
d.centroid = path.centroid(d.contour);
return d;
});
const g = svg.selectAll('path.contour')
.data(contours);
//color.domain(d3.extent(contours, d => d.value));
const enter = g.enter().append('path').classed('contour', true);
g.merge(enter).attr("d", d => path(d.contour))
.style("stroke", d => catcolor(d.word))
.style("fill", d => catcolor(d.word));
g.exit().remove();
const annotation = d3.annotation()
.type(d3.annotationCalloutCircle)
.annotations(contours.map(d => {
return {
data: d,
dx: d.dx || 20,
dy: d.dy || -10,
note: {
title: d.word || "??",
},
subject: {
radius: 12,
radiusPadding: 12,
},
}
}))
.accessors({ x: d => d.centroid[0], y: d => d.centroid[1] })
gannotations.call(annotation)
.on('dblclick', function() {
annotation.editMode(!annotation.editMode()).update();
});
}
function mainterms(i) {
if (!tfidf) return;
var articles = data.filter(d => d.group === i), v = null;
if (articles.length ===0) return '';
var tags = d3.merge(articles.map(d => d.props))
.filter(d => {
return !d.match(/^trad/) && !d.match(/^[A-Z]/);
});
var freq = {}, max = 0;
tags.forEach(d => {
freq[d] = (freq[d]||0) + tfidf[d];
if (freq[d] > max) {
v = d;
max = freq[d];
}
});
return v;
}
function drawgmeans(svg, centers){
const means = svg.selectAll('text')
.data(centers);
var enter = means.enter()
.append('text')
.attr('stroke', 'black')
.attr('text-anchor', 'middle');
means.exit().remove();
means.merge(enter)
.attr('x', d => centerx(d[0]))
.attr('y', d => centery(d[1]))
.text((d,i) => mainterms(i));
}
function drawlines(svg, links){
const lines = svg.selectAll('line')
.data(links, l => l.index);
var enter = lines.enter()
.append('line');
lines.exit().remove();
lines.merge(enter)
.attr('x1', d => d.target.x)
.attr('x2', d => d.source.x)
.attr('y1', d => d.target.y)
.attr('y2', d => d.source.y)
.attr('stroke', 'black')
.attr('stroke-width', 0.2)
.attr('stroke-dasharray', '5 2');
}
function drawsvg(svg, nodes) {
const g = svg.selectAll('g.city')
.data(nodes);
var enter = g.enter().append('g').classed('city', true);
enter.append('circle')
.attr('r', d => d.r / 2)
//.attr('fill', d => d.color)
.append('title')
.text(d => d.name);
enter
.filter(d => d.r > 7)
.append('text')
.attr('fill', 'white')
.style('font-size', d => d.r > 9 ? '12px' : '9px')
.attr('text-anchor', 'middle')
.attr('dominant-baseline', 'middle')
.attr('pointer-events', 'none')
.text(d => d.name.replace(/^(L([ea] |’)|À |Un )/, '').substring(0,d.r/4));
g.merge(enter)
// .attr('fill', d => /*d.color ||*/ colorscale(d.group))
.attr('fill-opacity', 0.5)
.attr('transform', d => `translate(${d.x},${d.y})`);
}
d3.queue()
.defer(d3.text, 'tsne.js')
.defer(d3.text, 'https://unpkg.com/d3-geo')
.defer(d3.text, 'worker.js')
.awaitAll(function (err, scripts) {
const worker = new Worker(
window.URL.createObjectURL(
new Blob(scripts, {
type: "text/javascript"
})
)
);
worker.postMessage({
maxIter: 10,
dim: 2,
perplexity: 100.0,
data: data
});
worker.onmessage = function (e) {
if (e.data.log) console.log.apply(this, e.data.log);
if (e.data.tfidf) tfidf = e.data.tfidf;
//if (isNaN(e.data.cost)) console.log('isNaN');
if (e.data.pos) {
pos = e.data.pos;
let sc = d3.max(pos.map(d => d[0]))
let diagram = voronoi(pos.map((d,i) => (d.index = i, d[0]/=sc, d[1]/=sc, d)));
links = urquhart(diagram, (a,b) =>
(a[0]-b[0])*(a[0]-b[0]) + (a[1]-b[1])*(a[1]-b[1])
)
.map(d => {
d.source = data[d.source.index];
d.target = data[d.target.index];
return d;
});
/*
forcetsne.force('links', forcelinks.links(links.map(d => {
d.source = data[d.source.index];
d.target = data[d.target.index];
return d;
})));
*/
}
if (e.data.done && e.data.done < 10000 && e.data.cost > 1e-2) {
worker.postMessage({
maxIter: e.data.done + 10,
});
}
//console.log('pos', pos);
};
});
});
function urquhart(diagram, distance) {
var urquhart = d3.map();
diagram.links()
.forEach(function (link) {
var v = d3.extent([link.source.index, link.target.index]);
urquhart.set(v, link);
});
urquhart._remove = [];
diagram.triangles()
.forEach(function (t) {
var l = 0,
length = 0,
i = -1,
v;
for (var j = 0; j < 3; j++) {
var a = t[j],
b = t[(j + 1) % 3];
v = d3.extent([a.index, b.index]);
if (!distance) {
length = (a.x - b.x) * (a.x - b.x) + (a.y - b.y) * (a.y - b.y);
} else {
length = distance(a, b);
}
if (length >= l) {
l = length;
i = v;
}
}
urquhart._remove.push(i);
});
urquhart._remove.forEach(function (i) {
if (urquhart.has(i)) urquhart.remove(i);
});
return urquhart.values();
}
</script>
</body>
https://unpkg.com/d3@4
https://unpkg.com/d3-contour
https://raw.githack.com/susielu/d3-annotation/master/d3-annotation.min.js