// Generated by CoffeeScript 1.7.1 var Tokenizer; Tokenizer = (function() { function Tokenizer() { var _ref; _ref = [{}, {}, {}], this.results = _ref[0], this.cache = _ref[1], this.words = _ref[2]; } Tokenizer.prototype.clear = function() { var _ref; return _ref = [{}, {}, {}], this.results = _ref[0], this.cache = _ref[1], this.words = _ref[2], _ref; }; Tokenizer.prototype.tokenize = function(s) { var breakRE, punctRE, _ref; _ref = [/[\s\n\r.,?!&()]+/, /[,?!@#$%^&*()]/], breakRE = _ref[0], punctRE = _ref[1]; this.results = _(_(s.split(breakRE)).without('', ' ')).map(function(w) { return w.replace(punctRE, '').toLowerCase(); }); return this.results; }; Tokenizer.prototype.getDuplicates = function() { var c; c = _(this.cache).filter(function(result) { return result.location.length > 1; }); return c || []; }; Tokenizer.prototype.getAllTokens = function() { return this.cache; }; Tokenizer.prototype.allgrams = function(min, max, sequence) { var n, _i, _results; _results = []; for (n = _i = min; min <= max ? _i <= max : _i >= max; n = min <= max ? ++_i : --_i) { _results.push(this.ngrams(n, sequence)); } return _results; }; Tokenizer.prototype.trigrams = function(sequence) { return this.ngrams(3, sequence); }; Tokenizer.prototype.bigrams = function(sequence) { return this.ngrams(2, sequence); }; Tokenizer.prototype.ngrams = function(n, sequence) { var _i, _ref, _results; if (!_(sequence).isArray()) { sequence = this.tokenize(sequence); } return _((function() { _results = []; for (var _i = 0, _ref = _.max([0, sequence.length - n + 1]); 0 <= _ref ? _i < _ref : _i > _ref; 0 <= _ref ? _i++ : _i--){ _results.push(_i); } return _results; }).apply(this)).map((function(_this) { return function(i) { var key, seq, _base; seq = sequence.slice(i, i + n); key = "" + n + "/" + (seq.join('/')); ((_base = _this.cache)[key] || (_base[key] = { location: [], key: key, length: n })).location.push(i); return seq; }; })(this)); }; return Tokenizer; })();