K-Means Unsupervised Learning Algorithm
xxxxxxxxxx
<html>
<head>
<meta charset="utf-8">
<title>K-means</title>
<style media="screen">
body{
margin: 0;
}
.point circle{
fill:rgba(255, 255, 255, 1);
stroke:#aaa;
stroke-width:1px;
}
.lines line{
stroke:#aaa;
stroke-width:1px;
stroke-opacity:.25;
}
.mean{
opacity: 0;
fill:rgba(255, 255, 255, .7);
stroke-width:3px;
}
</style>
</head>
<body>
<svg></svg>
</body>
<script src="lodash.min.js"></script>
<script src="/blacki/raw/b83b3d4139257a353b8a/d3.min.js"></script>
<script src="/blacki/raw/b83b3d4139257a353b8a/dat.gui.min.js"></script>
<script src="/blacki/raw/b83b3d4139257a353b8a/d3-jetpack.js"></script>
<script src="/blacki/raw/b83b3d4139257a353b8a/d3-starterkit.js"></script>
<script src="d3-randompoints.js"></script>
<script>
var c24 =d3.shuffle(['#33cc33','#ff0000','#3366ff','#00e7b4','#a6cee3','#fb9a99','#b15928','#b4002f','#bcbc34','#393d77','#ff8200','#7800a8','#ff00d7','#ffc100','#b2df8a','#fdb56d','#cab2d6','#c39c95','#29bece','#cc70bc','#669c95','#ff7a9a','#e3026f','#006d33'])
distributions = ['uniform','bates', 'irwinHall', 'normal', 'logNormal']
o = {}
o.actualClusters = 2
o.guessClusters = 2
o.ptsPerCluster = 1500/o.actualClusters
o.objFuncThreshold = .5
o.loop = true
colors = c24.slice(-o.guessClusters)
w = window.innerWidth
h = window.innerHeight
r = .005*d3.min([w,h])
xRange = [r,w-r]
yRange = [r,h-r]
var lines,test,means,interval
var distances = diff = d3.range(1,o.guessClusters+1).map(function (d) {return d*d3.max([w,h])})
var data = clusterPoints(o.actualClusters,o.ptsPerCluster,'normal',xRange,yRange)
var testPts = data.map(ƒ('points')).reduce(function (prev,curr) {return prev.concat(curr)})
var meanPts = d3.shuffle(testPts).slice(-o.guessClusters)
draw(data)
interval = setInterval(stepK,400)
function distance(p1,p2){
return Math.sqrt(Math.pow((p1.x - p2.x),2) + Math.pow((p1.y - p2.y),2));
}
function stop () {
}
function stepK(){
if(!d3.sum(diff)||d3.sum(diff)<o.objFuncThreshold) return (o.loop)?o.restart():stop()
means.data(meanPts)
.transition().duration(400).delay(function(d,i) {return i*50})
.attr('cx',ƒ('x')).attr('cy',ƒ('y'))
.style('opacity',1)
test.each(function (p,i) {
curDistances = meanPts.map(function(m) { return distance(m,p) })
p.curMean = curDistances.indexOf(d3.min(curDistances))
d3.select(this)
.style('stroke', function () {return colors[p.curMean]})
d3.select('#line-'+i)
.transition().duration(400).delay(function(d,i) {return i*100})
.attr('x2',meanPts[p.curMean].x)
.attr('y2',meanPts[p.curMean].y)
.style('stroke', function () {return colors[p.curMean]})
})
var newMeans = []
meanPts.forEach(function (m,i) {
_p = test.filter(function (p) { return p.curMean == i }).data()
_x = d3.mean(_p.map(ƒ('x')))
_y = d3.mean(_p.map(ƒ('y')))
newMeans.push({x:_x,y:_y})
})
meanPts = newMeans
diff = distances.map(function (d,i) { return Math.abs(d-curDistances[i]) })
distances = curDistances
}
function draw(data) {
svg = d3.select('svg').attr({
width:w,
height:h,
}).html('')
lines = svg.append('g.lines').dataAppend(testPts,'line')
.attr('id',function (l,i) {return 'line-'+i})
.attr('x1',ƒ('x'))
.attr('y1',ƒ('y'))
.attr('x2',ƒ('x'))
.attr('y2',ƒ('y'))
test = svg.append('g.test').dataAppend(testPts,'g.point')
test.append('circle').attr('r',r)
.attr('cx',ƒ('x'))
.attr('cy',ƒ('y'))
means = svg.append('g.means').dataAppend(meanPts,'circle.mean')
.attr('r',r+2)
.attr('cx',ƒ('x'))
.attr('cy',ƒ('y'))
.style('stroke', function (d,i) {return colors[i]})
.style('fill', function (d,i) {return '#fff'})
var clusters = svg.append('g.actual').dataAppend(data,'g.cluster')
clusters.dataAppend(ƒ('points'),'circle.point.ans')
.attr('r',r)
.attr('cx',ƒ('x'))
.attr('cy',ƒ('y'))
clusters.each(function (c,i) {
d3.select(this)
.selectAll('.point')
.style('fill', function () {return colors[i]})
.style('opacity',0)
// .style('stroke', function (p) {return colors[i]})
})
}
o.restart = function () {
o.objFuncThreshold = Math.random() * 3
o.guessClusters = d3.shuffle(d3.range(2,5))[0]
o.ptsPerCluster = d3.shuffle(d3.range(500,1500))[0]/o.actualClusters
colors = d3.shuffle(c24).slice(-o.guessClusters)
distances = diff = d3.range(1,o.guessClusters+1).map(function (d) {return d*d3.max([w,h])})
data = clusterPoints(o.actualClusters,o.ptsPerCluster,'normal',xRange,yRange)
testPts = data.map(ƒ('points')).reduce(function (prev,curr) {return prev.concat(curr)})
meanPts = d3.shuffle(testPts).slice(-o.guessClusters)
draw(data)
clearInterval(interval)
interval = setInterval(stepK,200)
}
var gui = new dat.GUI({width:300})
gui.add(o, 'ptsPerCluster', 0, 1500).listen()
.onChange(function (num) {
data = []
d3.selectAll('.cluster').each(function (d,i) {
if(num>d.points.length-1){
curData = _.extend({},d)
rnd = randomPoints((num-d.points.length),'normal',d.xRange,d.yRange)
curData.points = curData.points.concat(rnd)
data.push(curData)
}else{
curData = _.extend({},d)
curData.points = curData.points.slice(0,num)
data.push(curData)
}
})
o.restart()
})
gui.add(o, 'actualClusters').min(1).max(o.guessClusters+3).step(1).listen()
.onChange(function() {
o.ptsPerCluster = 1500/o.actualClusters
o.restart()
})
gui.add(o, 'guessClusters').min(1).max(o.guessClusters+3).step(1).listen()
.onChange(function() {o.restart()})
gui.add(o, 'objFuncThreshold').min(0).max(o.guessClusters+3).step(.1).listen()
gui.add(o, 'loop')
gui.add(o, 'restart')
gui.close()
</script>
</html>