Extracts text nodes from a URL and displays them hierarchically.
xxxxxxxxxx
<head>
<meta charset="utf-8">
<script src="https://d3js.org/d3.v4.min.js"></script>
<style>
body { margin:0;padding:1em }
body, input { font: 1em/1.3 sans-serif;}
#url { min-width: 20em; height}
#root {max-width: 100%}
#root div {padding: .5em; padding-left: 0; box-sizing: border-box}
#root div + div { margin-top: .5em; }
#root div[data-tag] {
padding-left: .5em;
border: 1px dashed #ccc;
}
#root div[data-tag]::before {
content: attr(data-tag);
margin-bottom: .5em;
display: block;
font-size: .8em;
color: #ccc;
}
</style>
</head>
<body>
<label>URL: <input id="url" value="https://twitter.com"></label> <button id="submit">Load</button>
<div id="root"></div>
<script>
console.clear();
(function(doc) {
function initTree() {
var url = doc.getElementById('url').value;
loadRemoteDom(proxy(url), function(dom) {
var tree = mapNode(dom.body);
var root = doc.getElementById('root');
root.innerHTML = '';
renderTree(tree, root);
});
}
function renderTree(item, parentNode) {
var n = doc.createElement('div'), i;
if(item.tag) {
n.dataset.tag = item.tag.toLowerCase();
for(i = 0; i < item.children.length; i++) {
renderTree(item.children[i], n);
}
}
else {
n.textContent = item.content;
}
parentNode.appendChild(n);
}
function mapNode(node) {
var children = [], child, text, i;
switch(node.nodeType) {
case Node.TEXT_NODE:
text = node.nodeValue.trim().replace(/\s\s+/g, ' ');
if(!text.length) {
return;
}
return {
content: node.nodeValue,
}
case Node.ELEMENT_NODE:
if(node.tagName.match(/^(STYLE|SCRIPT)$/)) {
return;
}
for(i = 0; i < node.childNodes.length; i++) {
(child = mapNode(node.childNodes[i])) && children.push(child);
}
if(!children.length) {
return null;
}
return {
tag: node.tagName,
children: children
}
}
}
function proxy(url) {
return 'https://cors.io/?' + url;
}
function loadRemoteDom(url, callback) {
var xhr = new XMLHttpRequest();
xhr = xhr.xhr || xhr; // Handle bockbuilder.org
xhr.addEventListener('load', function() {
var parser = new DOMParser();
callback(parser.parseFromString(this.responseText, 'text/html'));
});
xhr.open('GET', url);
xhr.send();
}
doc.getElementById('submit').addEventListener('click', initTree);
}(document));
</script>
</body>
https://d3js.org/d3.v4.min.js