A rather deceiving experiment.
JS native search performs better (either Regex / indexOf).
Results are displayed in console
xxxxxxxxxx
<meta charset="utf-8">
<style>
text {
font: bold 38px monospace;
}
.enter {
fill: green;
}
.update {
fill: #333;
}
</style>
<body>
<script src="https://d3js.org/d3.v2.min.js?2.10.1"></script>
<script>
var AhoCorasick = { };
(function(AhoCorasick) {
function TrieNode() {
this.suffix = { };
this.is_word = null;
this.value = null;
this.data = [ ];
}
TrieNode.prototype.add = function(word, data, original_word) {
var chr = word.charAt(0),
node = this.suffix[chr];
if (!node) {
node = this.suffix[chr] = new TrieNode();
if (original_word) node.value = original_word.substr(0, original_word.length - word.length + 1);
else node.value = chr;
}
if (word.length > 1) node.add(word.substring(1), data, original_word || word);
else {
node.data.push(data);
node.is_word = true;
}
};
TrieNode.prototype.find = function(word) {
var suffix_node;
if (word.length === 0 || this.is_word) return this;
else {
suffix_node = this.suffix[word.charAt(0)];
return suffix_node ? suffix_node.find(word.substring(1)) : null;
}
};
TrieNode.prototype.print = function(prefix) {
var current = this,
suffixes = Object.keys(this.suffix),
out = this.value ? this.value : '(base)';
if (this.is_word) out = '[' + out + ']';
if (prefix) out = prefix + out;
console.log(out);
if (this.suffix_link) console.log(out + ' <- ' + this.suffix_link.value + ' [' + this.suffix_offset + ']');
for (var i = 0, len = suffixes.length; i < len; i++) {
this.suffix[suffixes[i]].print(out + ' -> ');
}
};
AhoCorasick.TrieNode = TrieNode;
AhoCorasick.add_suffix_links = function(node, trie) {
var suffixes = Object.keys(node.suffix),
link_node;
var i,len;
trie = trie || node;
node.suffix_link = null;
node.suffix_offset = 0;
if (node.value) {
for (i = 1, len = node.value.length; i < len && !link_node; i++) {
link_node = trie.find(node.value.substring(i));
}
if (link_node) {
node.suffix_link = link_node;
node.suffix_offset = node.value.length - (node.value.lastIndexOf(link_node.value) + link_node.value.length);
}
}
for (i = 0, len = suffixes.length; i < len; i++) {
AhoCorasick.add_suffix_links(node.suffix[suffixes[i]], trie);
}
};
AhoCorasick.search = function(string, trie, callback) {
var current = trie,
chr, next;
for (var i = 0, len = string.length; i < len; i++) {
chr = string.charAt(i);
next = current.suffix[chr]; // SLOW PART
if (next) {
current = next;
}
else {
if (callback && current && current.is_word)
{
callback(current.value, current.data,i);
}
if (current.suffix_link) {
i = i - (current.suffix_offset + 1);
current = current.suffix_link;
}
else {
current = trie;
}
}
}
if (callback && current && current.is_word) callback(current.value, current.data,i);
};
}(AhoCorasick));
// CHANGE WITH CHARACTER ARRAY
window.AhoCorasick = AhoCorasick;
var trie = new AhoCorasick.TrieNode();
var _text = "Ronaldo, it was the best of the times that it was the best of our memories. ronaldo, she sells by the shore seashells the shells"+
" that she sells by the shore are surely seashells. time it was and what a time it was it was the best of times. ";
var text = "Ronaldo, it was the best of the times that it was the best of our memories. ronaldo, she sells by the shore seashells the shells"+
" that she sells by the shore are surely seashells. time it was and what a time it was it was the best of times, and ronaldo find an ananas on the seashore of the shore of she.";
for (var l =0; l<15;l++) {
text = text+text;
}
var charText = [];
console.log(charText);
var options = [
["it","she"],
["the","your"],
["seashells","bizarreries"],
["and","and i will tell you another thing"],
["shore","sea"],
['seashore','someshore'],
['ronaldo','ron'],
['Ronaldo','ron'],
['ananas','pinneaple'],
['moreover','i want more'],
['was','when'],
['by','bys'],
["ait","she"],
["athe","your"],
["aseashells","bizarreries"],
["aand","and i will tell you another thing"],
["ashore","sea"],
['aseashore','someshore'],
['aronaldo','ron'],
['aRonaldo','ron'],
['aananas','pinneaple'],
['amoreover','i want more'],
['awas','when'],
['aby','bys'],
["vit","she"],
["vthe","your"],
["vseashells","bizarreries"],
["vand","and i will tell you another thing"],
["vshore","sea"],
['vseashore','someshore'],
['vronaldo','ron'],
['vRonaldo','ron'],
['vananas','pinneaple'],
['vmoreover','i want more'],
['vwas','when'],
['vby','bys'],
["vait","she"],
["vathe","your"],
["vaseashells","bizarreries"],
["vaand","and i will tell you another thing"],
["vashore","sea"],
['vaseashore','someshore'],
['varonaldo','ron'],
['vaRonaldo','ron'],
['vaananas','pinneaple'],
['vamoreover','i want more'],
['vawas','when'],
['vaby','bys'],
['cvRonaldo','ron'],
['vananas','pinneaple'],
['vcmoreover','i want more'],
['vwcas','when'],
['vbcy','bys'],
["vaigt","she"],
["vacgthe","your"],
["vasgeashells","bizarreries"],
["vaaand","and i will tell you another thing"],
["vaashore","sea"],
['vadseashore','someshore'],
['vargonaldo','ron'],
['vaRgonaldo','ron'],
['vaagnanas','pinneaple'],
['vagmoreover','i want more'],
['vagwas','when'],
['vagby','bys']
];
//FIMXE !! ALGO SHOULD DISPLAY THE END OF THE STRING
text = text +" TU N'AFFICHERAS PAS CA !!";
options.forEach(function(word) { trie.add(word[0], { word: word[0],replacement:word[1] }); });
AhoCorasick.add_suffix_links(trie);
var currentIndex=0;
var delta=0;
var time = new Date().getTime();
// this one works for word that are larger and smaller
AhoCorasick.search(text, trie, function(found_word, data,i) {
return;
var length=found_word.length;
var start = i-length;
var j=0;
while (currentIndex+delta<start) {
charText[currentIndex]=text[currentIndex+delta];
currentIndex++;
}
while (j<data[0].replacement.length)
{
charText[currentIndex]=data[0].replacement[j];
currentIndex++;
j++;
}
delta = delta + (-data[0].replacement.length+found_word.length);
});
// FIXME !!!! SHOULD GO TO THE END OF TEXT WHEN FINISHED
console.log('ELAPSED',time-(new Date().getTime()));
var time = new Date().getTime();
bruteForceA();
console.log('ELAPSED, BRUTE FORCE',time-(new Date().getTime()));
var s = "";
for (var i=0;i<charText.length;i++) {
s = s + charText[i];
}
//console.log(s);
// NO REALLY <> BETWEEN REGEX AND indexOf
function bruteForceA() {
for (var i=0;i<options.length;i++) {
var idx = indexes(text,options[i][0]);
for (var l=0;l<idx.length;l++) {
var element = idx[l]; // DO WE WIN SOME TIME BY DOING THIS
doStuff(element,options[i][1]); // will end inlined
}
// TODO DO THE SUBSTITUTION
}
}
function indexes(str, find) {
var regex = new RegExp(find, "g"),result,indices = [];
while ((result = regex.exec(str))) {
indices.push(result.index);
}
return indices;
}
function doStuff(id) {
id = id + id;
return id;
}
function getIndicesOf(searchStr, str, caseSensitive) {
var startIndex = 0, searchStrLen = searchStr.length;
var index, indices = [];
if (!caseSensitive) {
str = str.toLowerCase();
searchStr = searchStr.toLowerCase();
}
while ((index = str.indexOf(searchStr, startIndex)) > -1) {
indices.push(index);
startIndex = index + searchStrLen;
}
return indices;
}
</script>
Modified http://d3js.org/d3.v2.min.js?2.10.1 to a secure url
https://d3js.org/d3.v2.min.js?2.10.1