(function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o ize { reg: /([^aeiou][iy])s(e|ed|es|ing)?$/, repl: '$1z$2' }, // our -> or { reg: /(..)our(ly|y|ite)?$/, repl: '$1or$2' }, // re -> er { reg: /([^cdnv])re(s)?$/, repl: '$1er$2' }, // xion -> tion { reg: /([aeiou])xion([ed])?$/, repl: '$1tion$2' }, //logue -> log { reg: /logue$/, repl: 'log' }, // ae -> e { reg: /([o|a])e/, repl: 'e' }, //eing -> ing { reg: /e(ing|able)$/, repl: '$1' }, // illful -> ilful { reg: /([aeiou]+[^aeiou]+[aeiou]+)ll(ful|ment|est|ing|or|er|ed)$/, //must be second-syllable repl: '$1l$2' } ] for (var i = 0; i < patterns.length; i++) { if (str.match(patterns[i].reg)) { return str.replace(patterns[i].reg, patterns[i].repl) } } return str } // console.log(americanize("synthesise")=="synthesize") // console.log(americanize("synthesised")=="synthesized") },{}],18:[function(require,module,exports){ // convert american spellings into british ones // built with patterns+exceptions from https://en.wikipedia.org/wiki/British_spelling // (some patterns are only safe to do in one direction) module.exports = function (str) { var patterns = [ // ise -> ize { reg: /([^aeiou][iy])z(e|ed|es|ing)?$/, repl: '$1s$2' }, // our -> or // { // reg: /(..)our(ly|y|ite)?$/, // repl: '$1or$2', // exceptions: [] // }, // re -> er // { // reg: /([^cdnv])re(s)?$/, // repl: '$1er$2', // exceptions: [] // }, // xion -> tion // { // reg: /([aeiou])xion([ed])?$/, // repl: '$1tion$2', // exceptions: [] // }, //logue -> log // { // reg: /logue$/, // repl: 'log', // exceptions: [] // }, // ae -> e // { // reg: /([o|a])e/, // repl: 'e', // exceptions: [] // }, //eing -> ing // { // reg: /e(ing|able)$/, // repl: '$1', // exceptions: [] // }, // illful -> ilful { reg: /([aeiou]+[^aeiou]+[aeiou]+)l(ful|ment|est|ing|or|er|ed)$/, //must be second-syllable repl: '$1ll$2', exceptions: [] } ] for (var i = 0; i < patterns.length; i++) { if (str.match(patterns[i].reg)) { return str.replace(patterns[i].reg, patterns[i].repl) } } return str } },{}],19:[function(require,module,exports){ //chop a string into pronounced syllables module.exports = function (str) { var all = [] //suffix fixes var postprocess = function (arr) { //trim whitespace arr = arr.map(function (w) { w = w.replace(/^ */, '') w = w.replace(/ *$/, '') return w }) if (arr.length > 2) { return arr } var ones = [ /^[^aeiou]?ion/, /^[^aeiou]?ised/, /^[^aeiou]?iled/ ] var l = arr.length if (l > 1) { var suffix = arr[l - 2] + arr[l - 1]; for (var i = 0; i < ones.length; i++) { if (suffix.match(ones[i])) { arr[l - 2] = arr[l - 2] + arr[l - 1]; arr.pop(); } } } return arr } var doer = function (str) { var vow = /[aeiouy]$/ if (!str) { return } var chars = str.split('') var before = ""; var after = ""; var current = ""; for (var i = 0; i < chars.length; i++) { before = chars.slice(0, i).join('') current = chars[i] after = chars.slice(i + 1, chars.length).join('') var candidate = before + chars[i] //rules for syllables- //it's a consonant that comes after a vowel if (before.match(vow) && !current.match(vow)) { if (after.match(/^e[sm]/)) { candidate += "e" after = after.replace(/^e/, '') } all.push(candidate) return doer(after) } //unblended vowels ('noisy' vowel combinations) if (candidate.match(/(eo|eu|ia|oa|ua|ui)$/i)) { //'io' is noisy, not in 'ion' all.push(before) all.push(current) return doer(after) } } //if still running, end last syllable if (str.match(/[aiouy]/) || str.match(/ee$/)) { //allow silent trailing e all.push(str) } else { all[all.length - 1] = (all[all.length - 1] || '') + str; //append it to the last one } } str.split(/\s\-/).forEach(function (s) { doer(s) }) all = postprocess(all) //for words like 'tree' and 'free' if (all.length === 0) { all = [str] } return all } // console.log(syllables("suddenly").length === 3) // console.log(syllables("tree")) //broken // console.log(syllables("birchtree")) },{}],20:[function(require,module,exports){ //split a string into all possible parts module.exports = function (text, options) { options = options || {} var min_count = options.min_count || 1; // minimum hit-count var max_size = options.max_size || 5; // maximum gram count var REallowedChars = /[^a-zA-Z'\-]+/g; //Invalid characters are replaced with a whitespace var i, j, k, textlen, s; var keys = [null]; var results = []; //max_size++; for (i = 1; i <= max_size; i++) { keys.push({}); } // clean the text text = text.replace(REallowedChars, " ").replace(/^\s+/, "").replace(/\s+$/, ""); text = text.toLowerCase() // Create a hash text = text.split(/\s+/); for (i = 0, textlen = text.length; i < textlen; i++) { s = text[i]; keys[1][s] = (keys[1][s] || 0) + 1; for (j = 2; j <= max_size; j++) { if (i + j <= textlen) { s += " " + text[i + j - 1]; keys[j][s] = (keys[j][s] || 0) + 1; } else { break } } } // map to array i = undefined; for (k = 1; k <= max_size; k++) { results[k] = []; var key = keys[k]; for (i in key) { if (key.hasOwnProperty(i) && key[i] >= min_count) { results[k].push({ "word": i, "count": key[i], "size": k }) } } } results = results.filter(function (s) { return s !== null }) results = results.map(function (r) { r = r.sort(function (a, b) { return b.count - a.count }) return r; }); return results } // s = ngram("i really think that we all really think it's all good") // console.log(s) },{}],21:[function(require,module,exports){ //(Rule-based sentence boundary segmentation) - chop given text into its proper sentences. // Ignore periods/questions/exclamations used in acronyms/abbreviations/numbers, etc. // @spencermountain 2015 MIT module.exports = function(text) { var abbreviations = require("../../data/lexicon/abbreviations") var sentences = []; //first do a greedy-split.. var chunks = text.split(/(\S.+?[.\?!])(?=\s+|$|")/g); //date abbrevs. //these are added seperately because they are not nouns abbreviations = abbreviations.concat(["jan", "feb", "mar", "apr", "jun", "jul", "aug", "sep", "oct", "nov", "dec", "sept", "sep"]); //detection of non-sentence chunks var abbrev_reg = new RegExp("\\b(" + abbreviations.join("|") + ")[.!?] ?$", "i"); var acronym_reg= new RegExp("[ |\.][A-Z]\.?$", "i") var elipses_reg= new RegExp("\\.\\.\\.*$") //loop through these chunks, and join the non-sentence chunks back together.. var chunks_length = chunks.length; for (i = 0; i < chunks_length; i++) { if (chunks[i]) { //trim whitespace chunks[i] = chunks[i].replace(/^\s+|\s+$/g, ""); //should this chunk be combined with the next one? if (chunks[i+1] && chunks[i].match(abbrev_reg) || chunks[i].match(acronym_reg) || chunks[i].match(elipses_reg) ) { chunks[i + 1] = ((chunks[i]||'') + " " + (chunks[i + 1]||'')).replace(/ +/g, " "); } else if(chunks[i] && chunks[i].length>0){ //this chunk is a proper sentence.. sentences.push(chunks[i]); chunks[i] = ""; } } } //if we never got a sentence, return the given text if (sentences.length === 0) { return [text] } return sentences; } // console.log(sentence_parser('Tony is nice. He lives in Japan.').length === 2) // console.log(sentence_parser('I like that Color').length === 1) // console.log(sentence_parser("She was dead. He was ill.").length === 2) // console.log(sentence_parser("i think it is good ... or else.").length == 1) },{"../../data/lexicon/abbreviations":3}],22:[function(require,module,exports){ //split a string into 'words' - as intended to be most helpful for this library. var sentence_parser = require("./sentence") var multiples = require("../../data/lexicon/multiples") //these expressions ought to be one token, not two, because they are a distinct POS together var multi_words = Object.keys(multiples).map(function (m) { return m.split(' ') }) var normalise = function (str) { if (!str) { return "" } str = str.toLowerCase() str = str.replace(/[,\.!:;\?\(\)]/, '') str = str.replace(/’/g, "'") str = str.replace(/"/g, "") if (!str.match(/[a-z0-9]/i)) { return '' } return str } var sentence_type = function (sentence) { if (sentence.match(/\?$/)) { return "interrogative"; } else if (sentence.match(/\!$/)) { return "exclamative"; } else { return "declarative"; } } //some multi-word tokens should be combined here var combine_multiples = function (arr) { var better = [] var normalised = arr.map(function (a) { return normalise(a) }) //cached results for (var i = 0; i < arr.length; i++) { for (var o = 0; o < multi_words.length; o++) { if (arr[i + 1] && normalised[i] === multi_words[o][0] && normalised[i + 1] === multi_words[o][1]) { // //we have a match arr[i] = arr[i] + ' ' + arr[i + 1] arr[i + 1] = null break } } better.push(arr[i]) } return better.filter(function (w) { return w }) } var tokenize = function (str) { var sentences = sentence_parser(str) return sentences.map(function (sentence) { var arr = sentence.split(' '); arr = combine_multiples(arr) var tokens = arr.map(function (w, i) { return { text: w, normalised: normalise(w), title_case: (w.match(/^[A-Z][a-z]/) !== null), //use for merge-tokens noun_capital: i > 0 && (w.match(/^[A-Z][a-z]/) !== null), //use for noun signal punctuated: (w.match(/[,;:\(\)"]/) !== null) || undefined, end: (i === (arr.length - 1)) || undefined, start: (i === 0) || undefined } }) return { sentence: sentence, tokens: tokens, type: sentence_type(sentence) } }) } module.exports = tokenize // console.log(tokenize("i live in new york")[0].tokens.length==4) // console.log(tokenize("I speak optimistically of course.")[0].tokens.length==4) // console.log(tokenize("Joe is 9")[0].tokens.length==3) // console.log(tokenize("Joe in Toronto")[0].tokens.length==3) // console.log(tokenize("I am mega-rich")[0].tokens.length==3) },{"../../data/lexicon/multiples":9,"./sentence":21}],23:[function(require,module,exports){ // a hugely-ignorant, and widely subjective transliteration of latin, cryllic, greek unicode characters to english ascii. //http://en.wikipedia.org/wiki/List_of_Unicode_characters //https://docs.google.com/spreadsheet/ccc?key=0Ah46z755j7cVdFRDM1A2YVpwa1ZYWlpJM2pQZ003M0E //approximate visual (not semantic) relationship between unicode and ascii characters var compact = { "2": "²ƻ", "3": "³ƷƸƹƺǮǯЗҘҙӞӟӠӡȜȝ", "5": "Ƽƽ", "8": "Ȣȣ", "!": "¡", "?": "¿Ɂɂ", "a": "ªÀÁÂÃÄÅàáâãäåĀāĂ㥹ǍǎǞǟǠǡǺǻȀȁȂȃȦȧȺΆΑΔΛάαλАДадѦѧӐӑӒӓƛɅ", "b": "ßþƀƁƂƃƄƅɃΒβϐϦБВЪЬбвъьѢѣҌҍҔҕƥƾ", "c": "¢©ÇçĆćĈĉĊċČčƆƇƈȻȼͻͼͽϲϹϽϾϿЄСсєҀҁҪҫ", "d": "ÐĎďĐđƉƊȡƋƌǷ", "e": "ÈÉÊËèéêëĒēĔĕĖėĘęĚěƎƏƐǝȄȅȆȇȨȩɆɇΈΕΞΣέεξϱϵ϶ЀЁЕЭеѐёҼҽҾҿӖӗӘәӚӛӬӭ", "f": "ƑƒϜϝӺӻ", "g": "ĜĝĞğĠġĢģƓǤǥǦǧǴǵ", "h": "ĤĥĦħƕǶȞȟΉΗЂЊЋНнђћҢңҤҥҺһӉӊ", "I": "ÌÍÎÏ", "i": "ìíîïĨĩĪīĬĭĮįİıƖƗȈȉȊȋΊΐΪίιϊІЇії", "j": "ĴĵǰȷɈɉϳЈј", "k": "ĶķĸƘƙǨǩΚκЌЖКжкќҚқҜҝҞҟҠҡ", "l": "ĹĺĻļĽľĿŀŁłƚƪǀǏǐȴȽΙӀӏ", "m": "ΜϺϻМмӍӎ", "n": "ÑñŃńŅņŇňʼnŊŋƝƞǸǹȠȵΝΠήηϞЍИЙЛПийлпѝҊҋӅӆӢӣӤӥπ", "o": "ÒÓÔÕÖØðòóôõöøŌōŎŏŐőƟƠơǑǒǪǫǬǭǾǿȌȍȎȏȪȫȬȭȮȯȰȱΌΘΟΦΩδθοσόϕϘϙϬϭϴОФоѲѳѺѻѼѽӦӧӨөӪӫ¤ƍΏ", "p": "ƤƿΡρϷϸϼРрҎҏÞ", "q": "Ɋɋ", "r": "ŔŕŖŗŘřƦȐȑȒȓɌɍЃГЯгяѓҐґҒғӶӷſ", "s": "ŚśŜŝŞşŠšƧƨȘșȿςϚϛϟϨϩЅѕ", "t": "ŢţŤťŦŧƫƬƭƮȚțȶȾΓΤτϮϯТт҂Ҭҭ", "u": "µÙÚÛÜùúûüŨũŪūŬŭŮůŰűŲųƯưƱƲǓǔǕǖǗǘǙǚǛǜȔȕȖȗɄΰμυϋύϑЏЦЧцџҴҵҶҷҸҹӋӌӇӈ", "v": "ƔνѴѵѶѷ", "w": "ŴŵƜωώϖϢϣШЩшщѡѿ", "x": "×ΧχϗϰХхҲҳӼӽӾӿ", "y": "¥ÝýÿŶŷŸƳƴȲȳɎɏΎΥΨΫγψϒϓϔЎУучўѰѱҮүҰұӮӯӰӱӲӳ", "z": "ŹźŻżŽžƩƵƶȤȥɀΖζ" } //decompress data into an array var data = [] Object.keys(compact).forEach(function (k) { compact[k].split('').forEach(function (s) { data.push([s, k]) }) }) //convert array to two hashes var normaler = {} var greek = {} data.forEach(function (arr) { normaler[arr[0]] = arr[1] greek[arr[1]] = arr[0] }) var normalize = function (str, options) { options = options || {} options.percentage = options.percentage || 50 var arr = str.split('').map(function (s) { var r = Math.random() * 100 if (normaler[s] && r < options.percentage) { return normaler[s] || s } else { return s } }) return arr.join('') } var denormalize = function (str, options) { options = options || {} options.percentage = options.percentage || 50 var arr = str.split('').map(function (s) { var r = Math.random() * 100 if (greek[s] && r < options.percentage) { return greek[s] || s } else { return s } }) return arr.join('') } module.exports = { normalize: normalize, denormalize: denormalize } // s = "ӳžŽżźŹźӳžŽżźŹźӳžŽżźŹźӳžŽżźŹźӳžŽżźŹź" // s = "Björk" // console.log(normalize.normalize(s, { // percentage: 100 // })) // s = "The quick brown fox jumps over the lazy dog" // console.log(normalize.denormalize(s, { // percentage: 100 // })) },{}],24:[function(require,module,exports){ //these are adjectives that can become comparative + superlative with out "most/more" //its a whitelist for conjugation //this data is shared between comparative/superlative methods module.exports= [ "absurd", "aggressive", "alert", "alive", "awesome", "beautiful", "big", "bitter", "black", "blue", "bored", "boring", "brash", "brave", "brief", "bright", "broad", "brown", "calm", "charming", "cheap", "clean", "cold", "cool", "cruel", "cute", "damp", "deep", "dear", "dead", "dark", "dirty", "drunk", "dull", "eager", "efficient", "even", "faint", "fair", "fanc", "fast", "fat", "feeble", "few", "fierce", "fine", "flat", "forgetful", "frail", "full", "gentle", "glib", "great", "green", "gruesome", "handsome", "hard", "harsh", "high", "hollow", "hot", "impolite", "innocent", "keen", "kind", "lame", "lean", "light", "little", "loose", "long", "loud", "low", "lush", "macho", "mean", "meek", "mellow", "mundane", "near", "neat", "new", "nice", "normal", "odd", "old", "pale", "pink", "plain", "poor", "proud", "purple", "quick", "rare", "rapid", "red", "rich", "ripe", "rotten", "round", "rude", "sad", "safe", "scarce", "scared", "shallow", "sharp", "short", "shrill", "simple", "slim", "slow", "small", "smart", "smooth", "soft", "sore", "sour", "square", "stale", "steep", "stiff", "straight", "strange", "strong", "sweet", "swift", "tall", "tame", "tart", "tender", "tense", "thick", "thin", "tight", "tough", "vague", "vast", "vulgar", "warm", "weak", "wet", "white", "wide", "wild", "wise", "young", "yellow", "easy", "narrow", "late", "early", "soon", "close", "empty", "dry", "windy", "noisy", "thirsty", "hungry", "fresh", "quiet", "clear", "heavy", "happy", "funny", "lucky", "pretty", "important", "interesting", "attractive", "dangerous", "intellegent", "pure", "orange", "large", "firm", "grand", "formal", "raw", "weird", "glad", "mad", "strict", "tired", "solid", "extreme", "mature", "true", "free", "curly", "angry" ].reduce(function(h,s){ h[s]=true return h },{}) },{}],25:[function(require,module,exports){ //turn 'quick' into 'quickly' var main = function (str) { var irregulars = { "idle": "idly", "public": "publicly", "vague": "vaguely", "day": "daily", "icy": "icily", "single": "singly", "female": "womanly", "male": "manly", "simple": "simply", "whole": "wholly", "special": "especially", "straight": "straight", "wrong": "wrong", "fast": "fast", "hard": "hard", "late": "late", "early": "early", "well": "well", "best": "best", "latter": "latter", "bad": "badly" } var dont = { "foreign": 1, "black": 1, "modern": 1, "next": 1, "difficult": 1, "degenerate": 1, "young": 1, "awake": 1, "back": 1, "blue": 1, "brown": 1, "orange": 1, "complex": 1, "cool": 1, "dirty": 1, "done": 1, "empty": 1, "fat": 1, "fertile": 1, "frozen": 1, "gold": 1, "grey": 1, "gray": 1, "green": 1, "medium": 1, "parallel": 1, "outdoor": 1, "unknown": 1, "undersized": 1, "used": 1, "welcome": 1, "yellow": 1, "white": 1, "fixed": 1, "mixed": 1, "super": 1, "guilty": 1, "tiny": 1, "able": 1, "unable": 1, "same": 1, "adult": 1 } var transforms = [{ reg: /al$/i, repl: 'ally' }, { reg: /ly$/i, repl: 'ly' }, { reg: /(.{3})y$/i, repl: '$1ily' }, { reg: /que$/i, repl: 'quely' }, { reg: /ue$/i, repl: 'uly' }, { reg: /ic$/i, repl: 'ically' }, { reg: /ble$/i, repl: 'bly' }, { reg: /l$/i, repl: 'ly' }] var not_matches = [ /airs$/, /ll$/, /ee.$/, /ile$/ ] if (dont[str]) { return null } if (irregulars[str]) { return irregulars[str] } if (str.length <= 3) { return null } var i; for (i = 0; i < not_matches.length; i++) { if (str.match(not_matches[i])) { return null } } for (i = 0; i < transforms.length; i++) { if (str.match(transforms[i].reg)) { return str.replace(transforms[i].reg, transforms[i].repl) } } return str + 'ly' } module.exports = main; // console.log(adj_to_adv('direct')) },{}],26:[function(require,module,exports){ //turn 'quick' into 'quickly' var convertables = require("./convertables") var main = function (str) { var irregulars = { "grey": "greyer", "gray": "grayer", "green": "greener", "yellow": "yellower", "red": "redder", "good": "better", "well": "better", "bad": "worse", "sad": "sadder" } var dont = { "overweight": 1, "main": 1, "nearby": 1, "asleep": 1, "weekly": 1, "secret": 1, "certain": 1 } var transforms = [{ reg: /y$/i, repl: 'ier' }, { reg: /([aeiou])t$/i, repl: '$1tter' }, { reg: /([aeou])de$/i, repl: '$1der' }, { reg: /nge$/i, repl: 'nger' }] var matches = [ /ght$/, /nge$/, /ough$/, /ain$/, /uel$/, /[au]ll$/, /ow$/, /old$/, /oud$/, /e[ae]p$/ ] var not_matches = [ /ary$/, /ous$/ ] if (dont.hasOwnProperty(str)) { return null } for (i = 0; i < transforms.length; i++) { if (str.match(transforms[i].reg)) { return str.replace(transforms[i].reg, transforms[i].repl) } } if (convertables.hasOwnProperty(str)) { if (str.match(/e$/)) { return str + "r" } else { return str + "er" } } if (irregulars.hasOwnProperty(str)) { return irregulars[str] } var i; for (i = 0; i < not_matches.length; i++) { if (str.match(not_matches[i])) { return "more " + str } } for (i = 0; i < matches.length; i++) { if (str.match(matches[i])) { return str + "er" } } return "more " + str } module.exports = main; },{"./convertables":24}],27:[function(require,module,exports){ //convert cute to cuteness module.exports = function (w) { var irregulars = { "clean": "cleanliness", "naivety": "naivety" }; if (!w) { return ""; } if (irregulars.hasOwnProperty(w)) { return irregulars[w]; } if (w.match(" ")) { return w; } if (w.match(/w$/)) { return w; } var transforms = [{ "reg": /y$/, "repl": 'iness' }, { "reg": /le$/, "repl": 'ility' }, { "reg": /ial$/, "repl": 'y' }, { "reg": /al$/, "repl": 'ality' }, { "reg": /ting$/, "repl": 'ting' }, { "reg": /ring$/, "repl": 'ring' }, { "reg": /bing$/, "repl": 'bingness' }, { "reg": /sing$/, "repl": 'se' }, { "reg": /ing$/, "repl": 'ment' }, { "reg": /ess$/, "repl": 'essness' }, { "reg": /ous$/, "repl": 'ousness' }, ] for (var i = 0; i < transforms.length; i++) { if (w.match(transforms[i].reg)) { return w.replace(transforms[i].reg, transforms[i].repl); } } if (w.match(/s$/)) { return w; } return w + "ness"; }; },{}],28:[function(require,module,exports){ //turn 'quick' into 'quickest' var convertables = require("./convertables") module.exports = function (str) { var irregulars = { "nice": "nicest", "late": "latest", "hard": "hardest", "inner": "innermost", "outer": "outermost", "far": "furthest", "worse": "worst", "bad": "worst", "good": "best" } var dont = { "overweight": 1, "ready": 1 } var transforms = [{ "reg": /y$/i, "repl": 'iest' }, { "reg": /([aeiou])t$/i, "repl": '$1ttest' }, { "reg": /([aeou])de$/i, "repl": '$1dest' }, { "reg": /nge$/i, "repl": 'ngest' }] var matches = [ /ght$/, /nge$/, /ough$/, /ain$/, /uel$/, /[au]ll$/, /ow$/, /oud$/, /...p$/ ] var not_matches = [ /ary$/ ] var generic_transformation = function (str) { if (str.match(/e$/)) { return str + "st" } else { return str + "est" } } for (i = 0; i < transforms.length; i++) { if (str.match(transforms[i].reg)) { return str.replace(transforms[i].reg, transforms[i].repl) } } if (convertables.hasOwnProperty(str)) { return generic_transformation(str) } if (dont.hasOwnProperty(str)) { return "most " + str } if (irregulars.hasOwnProperty(str)) { return irregulars[str] } var i; for (i = 0; i < not_matches.length; i++) { if (str.match(not_matches[i])) { return "most " + str } } for (i = 0; i < matches.length; i++) { if (str.match(matches[i])) { return generic_transformation(str) } } return "most " + str } },{"./convertables":24}],29:[function(require,module,exports){ //wrapper for Adjective's methods var Adjective = function (str, sentence, word_i) { var the = this the.word = str || ''; var to_comparative = require("./conjugate/to_comparative") var to_superlative = require("./conjugate/to_superlative") var adj_to_adv = require("./conjugate/to_adverb") var adj_to_noun = require("./conjugate/to_noun") var parts_of_speech = require("../../data/parts_of_speech") the.conjugate = function () { return { comparative: to_comparative(the.word), superlative: to_superlative(the.word), adverb: adj_to_adv(the.word), noun: adj_to_noun(the.word) } } the.which = (function () { if (the.word.match(/..est$/)) { return parts_of_speech['JJS'] } if (the.word.match(/..er$/)) { return parts_of_speech['JJR'] } return parts_of_speech['JJ'] })() return the; }; module.exports = Adjective; // console.log(new Adjective("crazy")) },{"../../data/parts_of_speech":14,"./conjugate/to_adverb":25,"./conjugate/to_comparative":26,"./conjugate/to_noun":27,"./conjugate/to_superlative":28}],30:[function(require,module,exports){ //turns 'quickly' into 'quick' module.exports = function (str) { var irregulars = { "idly": "idle", "sporadically": "sporadic", "basically": "basic", "grammatically": "grammatical", "alphabetically": "alphabetical", "economically": "economical", "conically": "conical", "politically": "political", "vertically": "vertical", "practically": "practical", "theoretically": "theoretical", "critically": "critical", "fantastically": "fantastic", "mystically": "mystical", "pornographically": "pornographic", "fully": "full", "jolly": "jolly", "wholly": "whole" } var transforms = [{ "reg": /bly$/i, "repl": 'ble' }, { "reg": /gically$/i, "repl": 'gical' }, { "reg": /([rsdh])ically$/i, "repl": '$1ical' }, { "reg": /ically$/i, "repl": 'ic' }, { "reg": /uly$/i, "repl": 'ue' }, { "reg": /ily$/i, "repl": 'y' }, { "reg": /(.{3})ly$/i, "repl": '$1' }] if (irregulars.hasOwnProperty(str)) { return irregulars[str] } for (var i = 0; i < transforms.length; i++) { if (str.match(transforms[i].reg)) { return str.replace(transforms[i].reg, transforms[i].repl) } } return str } // console.log(to_adjective('quickly') === 'quick') // console.log(to_adjective('marvelously') === 'marvelous') },{}],31:[function(require,module,exports){ //wrapper for Adverb's methods var Adverb = function (str, sentence, word_i) { var the = this the.word = str || ''; var to_adjective = require("./conjugate/to_adjective") var parts_of_speech = require("../../data/parts_of_speech") the.conjugate = function () { return { adjective: to_adjective(the.word) } } the.which = (function () { if (the.word.match(/..est$/)) { return parts_of_speech['RBS'] } if (the.word.match(/..er$/)) { return parts_of_speech['RBR'] } return parts_of_speech['RB'] })() return the; } module.exports = Adverb; // console.log(new Adverb("suddenly").conjugate()) // console.log(adverbs.conjugate('powerfully')) },{"../../data/parts_of_speech":14,"./conjugate/to_adjective":30}],32:[function(require,module,exports){ //converts nouns from plural and singular, and viceversases //some regex borrowed from pksunkara/inflect //https://github.com/pksunkara/inflect/blob/master/lib/defaults.js var uncountables = require("../../../data/lexicon/uncountables") var irregular_nouns = require("../../../data/lexicon/irregular_nouns") var i; //words that shouldn't ever inflect, for metaphysical reasons uncountable_nouns = uncountables.reduce(function (h, a) { h[a] = true return h }, {}) var titlecase = function (str) { if (!str) { return '' } return str.charAt(0).toUpperCase() + str.slice(1) } //these aren't nouns, but let's inflect them anyways var irregulars = [ ["he", "they"], ["she", "they"], ["this", "these"], ["that", "these"], ["mine", "ours"], ["hers", "theirs"], ["his", "theirs"], ["i", "we"], ["move", "_s"], ["myself", "ourselves"], ["yourself", "yourselves"], ["himself", "themselves"], ["herself", "themselves"], ["themself", "themselves"], ["its", "theirs"], ["theirs", "_"] ] irregulars = irregulars.concat(irregular_nouns) var pluralize_rules = [ [/(ax|test)is$/i, '$1es'], [/(octop|vir|radi|nucle|fung|cact|stimul)us$/i, '$1i'], [/(octop|vir)i$/i, '$1i'], [/([rl])f$/i, '$1ves'], [/(alias|status)$/i, '$1es'], [/(bu)s$/i, '$1ses'], [/(al|ad|at|er|et|ed|ad)o$/i, '$1oes'], [/([ti])um$/i, '$1a'], [/([ti])a$/i, '$1a'], [/sis$/i, 'ses'], [/(?:([^f])fe|([lr])f)$/i, '$1ves'], [/(hive)$/i, '$1s'], [/([^aeiouy]|qu)y$/i, '$1ies'], [/(x|ch|ss|sh|s|z)$/i, '$1es'], [/(matr|vert|ind|cort)(ix|ex)$/i, '$1ices'], [/([m|l])ouse$/i, '$1ice'], [/([m|l])ice$/i, '$1ice'], [/^(ox)$/i, '$1en'], [/^(oxen)$/i, '$1'], [/(quiz)$/i, '$1zes'], [/(antenn|formul|nebul|vertebr|vit)a$/i, '$1ae'], [/(sis)$/i, 'ses'], [/^(?!talis|.*hu)(.*)man$/i, '$1men'], [/(.*)/i, '$1s'] ].map(function (a) { return { reg: a[0], repl: a[1] } }) var pluralize = function (str) { var low = str.toLowerCase() //uncountable if (uncountable_nouns[low]) { return str } //is it already plural? if (is_plural(low) === true) { return str } //irregular var found = irregulars.filter(function (r) { return r[0] === low }) if (found[0]) { if (titlecase(low) === str) { //handle capitalisation properly return titlecase(found[0][1]) } else { return found[0][1] } } //inflect first word of preposition-phrase if (str.match(/([a-z]*) (of|in|by|for) [a-z]/)) { var first = (str.match(/^([a-z]*) (of|in|by|for) [a-z]/) || [])[1] if (first) { var better_first = pluralize(first) return better_first + str.replace(first, '') } } //regular for (i = 0; i < pluralize_rules.length; i++) { if (str.match(pluralize_rules[i].reg)) { return str.replace(pluralize_rules[i].reg, pluralize_rules[i].repl) } } } var singularize_rules = [ [/([^v])ies$/i, '$1y'], [/ises$/i, 'isis'], [/ives$/i, 'ife'], [/(antenn|formul|nebul|vertebr|vit)ae$/i, '$1a'], [/(octop|vir|radi|nucle|fung|cact|stimul)(i)$/i, '$1us'], [/(buffal|tomat|tornad)(oes)$/i, '$1o'], [/((a)naly|(b)a|(d)iagno|(p)arenthe|(p)rogno|(s)ynop|(t)he)ses$/i, '$1sis'], [/(vert|ind|cort)(ices)$/i, '$1ex'], [/(matr|append)(ices)$/i, '$1ix'], [/(x|ch|ss|sh|s|z|o)es$/i, '$1'], [/men$/i, 'man'], [/(n)ews$/i, '$1ews'], [/([ti])a$/i, '$1um'], [/([^f])ves$/i, '$1fe'], [/([lr])ves$/i, '$1f'], [/([^aeiouy]|qu)ies$/i, '$1y'], [/(s)eries$/i, '$1eries'], [/(m)ovies$/i, '$1ovie'], [/([m|l])ice$/i, '$1ouse'], [/(cris|ax|test)es$/i, '$1is'], [/(alias|status)es$/i, '$1'], [/(ss)$/i, '$1'], [/(ics)$/i, "$1"], [/s$/i, ''] ].map(function (a) { return { reg: a[0], repl: a[1] } }) var singularize = function (str) { var low = str.toLowerCase() //uncountable if (uncountable_nouns[low]) { return str } //is it already singular? if (is_plural(low) === false) { return str } //irregular var found = irregulars.filter(function (r) { return r[1] === low }) if (found[0]) { if (titlecase(low) === str) { //handle capitalisation properly return titlecase(found[0][0]) } else { return found[0][0] } } //inflect first word of preposition-phrase if (str.match(/([a-z]*) (of|in|by|for) [a-z]/)) { var first = str.match(/^([a-z]*) (of|in|by|for) [a-z]/) if (first && first[1]) { var better_first = singularize(first[1]) return better_first + str.replace(first[1], '') } } //regular for (i = 0; i < singularize_rules.length; i++) { if (str.match(singularize_rules[i].reg)) { return str.replace(singularize_rules[i].reg, singularize_rules[i].repl) } } return str } var is_plural = function (str) { str = (str || '').toLowerCase() //handle 'mayors of chicago' var preposition = str.match(/([a-z]*) (of|in|by|for) [a-z]/) if (preposition && preposition[1]) { str = preposition[1] } // if it's a known irregular case for (i = 0; i < irregulars.length; i++) { if (irregulars[i][1] === str) { return true } if (irregulars[i][0] === str) { return false } } //similar to plural/singularize rules, but not the same var plural_indicators = [ /(^v)ies$/i, /ises$/i, /ives$/i, /(antenn|formul|nebul|vertebr|vit)ae$/i, /(octop|vir|radi|nucle|fung|cact|stimul)i$/i, /(buffal|tomat|tornad)oes$/i, /(analy|ba|diagno|parenthe|progno|synop|the)ses$/i, /(vert|ind|cort)ices$/i, /(matr|append)ices$/i, /(x|ch|ss|sh|s|z|o)es$/i, /men$/i, /news$/i, /.tia$/i, /(^f)ves$/i, /(lr)ves$/i, /(^aeiouy|qu)ies$/i, /(m|l)ice$/i, /(cris|ax|test)es$/i, /(alias|status)es$/i, /ics$/i ] for (i = 0; i < plural_indicators.length; i++) { if (str.match(plural_indicators[i])) { return true } } //similar to plural/singularize rules, but not the same var singular_indicators = [ /(ax|test)is$/i, /(octop|vir|radi|nucle|fung|cact|stimul)us$/i, /(octop|vir)i$/i, /(rl)f$/i, /(alias|status)$/i, /(bu)s$/i, /(al|ad|at|er|et|ed|ad)o$/i, /(ti)um$/i, /(ti)a$/i, /sis$/i, /(?:(^f)fe|(lr)f)$/i, /hive$/i, /(^aeiouy|qu)y$/i, /(x|ch|ss|sh|z)$/i, /(matr|vert|ind|cort)(ix|ex)$/i, /(m|l)ouse$/i, /(m|l)ice$/i, /(antenn|formul|nebul|vertebr|vit)a$/i, /.sis$/i, /^(?!talis|.*hu)(.*)man$/i ] for (i = 0; i < singular_indicators.length; i++) { if (str.match(singular_indicators[i])) { return false } } // 'looks pretty plural' rules if (str.match(/s$/) && !str.match(/ss$/) && str.length > 3) { //needs some lovin' return true } return false } var inflect = function (str) { if (uncountable_nouns[str]) { //uncountables shouldn't ever inflect return { plural: str, singular: str } } if (is_plural(str)) { return { plural: str, singular: singularize(str) } } else { return { singular: str, plural: pluralize(str) } } } module.exports = { inflect: inflect, is_plural: is_plural, singularize: singularize, pluralize: pluralize } // console.log(inflect.singularize('kisses')=="kiss") // console.log(inflect.singularize('kiss')=="kiss") // console.log(inflect.singularize('children')=="child") // console.log(inflect.singularize('child')=="child") // console.log(inflect.pluralize('gas')=="gases") // console.log(inflect.pluralize('narrative')=="narratives") // console.log(inflect.singularize('gases')=="gas") // console.log(inflect.pluralize('video')=="videos") // console.log(inflect.pluralize('photo')=="photos") // console.log(inflect.pluralize('stomach')=="stomachs") // console.log(inflect.pluralize('database')=="databases") // console.log(inflect.pluralize('kiss')=="kisses") // console.log(inflect.pluralize('towns')=="towns") // console.log(inflect.pluralize('mayor of chicago')=="mayors of chicago") // console.log(inflect.inflect('Index').plural=='Indices') // console.log(inflect.is_plural('octopus')==false) // console.log(inflect.is_plural('octopi')==true) // console.log(inflect.is_plural('eyebrow')==false) // console.log(inflect.is_plural('eyebrows')==true) // console.log(inflect.is_plural('child')==false) // console.log(inflect.is_plural('children')==true) // console.log(inflect.singularize('mayors of chicago')=="mayor of chicago") },{"../../../data/lexicon/irregular_nouns":8,"../../../data/lexicon/uncountables":11}],33:[function(require,module,exports){ //chooses an indefinite aricle 'a/an' for a word module.exports = function (str) { if (!str) { return null } var irregulars = { "hour": "an", "heir": "an", "heirloom": "an", "honest": "an", "honour": "an", "honor": "an", "uber": "an" //german u } var is_acronym = function (s) { //no periods if (s.length <= 5 && s.match(/^[A-Z]*$/)) { return true } //with periods if (s.length >= 4 && s.match(/^([A-Z]\.)*$/)) { return true } return false } //pronounced letters of acronyms that get a 'an' var an_acronyms = { A: true, E: true, F: true, H: true, I: true, L: true, M: true, N: true, O: true, R: true, S: true, X: true } //'a' regexes var a_regexs = [ /^onc?e/i, //'wu' sound of 'o' /^u[bcfhjkqrstn][aeiou]/i, // 'yu' sound for hard 'u' /^eul/i ]; //begin business time //////////////////// //explicit irregular forms if (irregulars.hasOwnProperty(str)) { return irregulars[str] } //spelled-out acronyms if (is_acronym(str) && an_acronyms.hasOwnProperty(str.substr(0, 1))) { return "an" } //'a' regexes for (var i = 0; i < a_regexs.length; i++) { if (str.match(a_regexs[i])) { return "a" } } //basic vowel-startings if (str.match(/^[aeiou]/i)) { return "an" } return "a" } // console.log(indefinite_article("wolf") === "a") },{}],34:[function(require,module,exports){ //wrapper for noun's methods var Noun = function (str, sentence, word_i) { var the = this var token, next; if (sentence !== undefined && word_i !== undefined) { token = sentence.tokens[word_i] next = sentence.tokens[word_i + i] } the.word = str || ''; var parts_of_speech = require("../../data/parts_of_speech") var firstnames = require("../../data/lexicon/firstnames") var honourifics = require("../../data/lexicon/honourifics") var inflect = require("./conjugate/inflect") var indefinite_article = require("./indefinite_article") //personal pronouns var prps = { "it": "PRP", "they": "PRP", "i": "PRP", "them": "PRP", "you": "PRP", "she": "PRP", "me": "PRP", "he": "PRP", "him": "PRP", "her": "PRP", "us": "PRP", "we": "PRP", "thou": "PRP" } var blacklist = { "itself": 1, "west": 1, "western": 1, "east": 1, "eastern": 1, "north": 1, "northern": 1, "south": 1, "southern": 1, "the": 1, "one": 1, "your": 1, "my": 1, "today": 1, "yesterday": 1, "tomorrow": 1, "era": 1, "century": 1, "it": 1 } //for resolution of obama -> he -> his var posessives = { "his": "he", "her": "she", "hers": "she", "their": "they", "them": "they", "its": "it" } the.is_acronym = function () { var s = the.word //no periods if (s.length <= 5 && s.match(/^[A-Z]*$/)) { return true } //with periods if (s.length >= 4 && s.match(/^([A-Z]\.)*$/)) { return true } return false } the.is_entity = function () { if (!token) { return false } if (token.normalised.length < 3 || !token.normalised.match(/[a-z]/i)) { return false } //prepositions if (prps[token.normalised]) { return false } //blacklist if (blacklist[token.normalised]) { return false } //discredit specific nouns forms if (token.pos) { if (token.pos.tag == "NNA") { //eg. 'singer' return false } if (token.pos.tag == "NNO") { //eg. "spencer's" return false } if (token.pos.tag == "NNG") { //eg. 'walking' return false } if (token.pos.tag == "NNP") { //yes! eg. 'Edinburough' return true } } //distinct capital is very good signal if (token.noun_capital) { return true } //multiple-word nouns are very good signal if (token.normalised.match(/ /)) { return true } //if it has an acronym/abbreviation, like 'business ltd.' if (token.normalised.match(/\./)) { return true } //appears to be a non-capital acronym, and not just caps-lock if (token.normalised.length < 5 && token.text.match(/^[A-Z]*$/)) { return true } //acronyms are a-ok if (the.is_acronym()) { return true } //else, be conservative return false } the.conjugate = function () { return inflect.inflect(the.word) } the.is_plural = function () { return inflect.is_plural(the.word) } the.article = function () { if (the.is_plural()) { return "the" } else { return indefinite_article(the.word) } } the.pluralize = function () { return inflect.pluralize(the.word) } the.singularize = function () { return inflect.singularize(the.word) } //uses common first-name list + honourifics to guess if this noun is the name of a person the.is_person = function () { var i, l; //remove things that are often named after people var blacklist = [ "center", "centre", "memorial", "school", "bridge", "university", "house", "college", "square", "park", "foundation", "institute", "club", "museum", "arena", "stadium", "ss", "of", "the", "for", "and", "&", "co", "sons" ] l = blacklist.length for (i = 0; i < l; i++) { if (the.word.match(new RegExp("\\b" + blacklist[i] + "\\b", "i"))) { return false } } //see if noun has an honourific, like 'jr.' l = honourifics.length; for (i = 0; i < l; i++) { if (the.word.match(new RegExp("\\b" + honourifics[i] + "\\.?\\b", 'i'))) { return true } } //see if noun has a known first-name var names = the.word.split(' ').map(function (a) { return a.toLowerCase() }) if (firstnames[names[0]]) { return true } //(test middle name too, if there's one) if (names.length > 2 && firstnames[names[1]]) { return true } //if it has an initial between two words if (the.word.match(/[a-z]{3,20} [a-z]\.? [a-z]{3,20}/i)) { return true } return false } //decides if it deserves a he, she, they, or it the.pronoun = function () { //if it's a person try to classify male/female if (the.is_person()) { var names = the.word.split(' ').map(function (a) { return a.toLowerCase() }) if (firstnames[names[0]] === "m" || firstnames[names[1]] == "m") { return "he" } if (firstnames[names[0]] === "f" || firstnames[names[1]] == "f") { return "she" } //test some honourifics if (the.word.match(/^(mrs|miss|ms|misses|mme|mlle)\.? /, 'i')) { return "she" } if (the.word.match(/\b(mr|mister|sr|jr)\b/, 'i')) { return "he" } //if it's a known unisex name, don't try guess it. be safe. if (firstnames[names[0]] === "a" || firstnames[names[1]] == "a") { return "they" } //if we think it's a person, but still don't know the gender, do a little guessing if (names[0].match(/[aeiy]$/)) { //if it ends in a 'ee or ah', female return "she" } if (names[0].match(/[ou]$/)) { //if it ends in a 'oh or uh', male return "he" } if (names[0].match(/(nn|ll|tt)/)) { //if it has double-consonants, female return "she" } //fallback to 'singular-they' return "they" } //not a person if (the.is_plural()) { return "they" } return "it" } //list of pronouns that refer to this named noun. "[obama] is cool, [he] is nice." the.referenced_by = function () { //if it's named-noun, look forward for the pronouns pointing to it -> '... he' if (token && token.pos.tag !== "PRP" && token.pos.tag !== "PP") { var prp = the.pronoun() //look at rest of sentence var interested = sentence.tokens.slice(word_i + 1, sentence.tokens.length) //add next sentence too, could go further.. if (sentence.next) { interested = interested.concat(sentence.next.tokens) } //find the matching pronouns, and break if another noun overwrites it var matches = [] for (var i = 0; i < interested.length; i++) { if (interested[i].pos.tag === "PRP" && (interested[i].normalised === prp || posessives[interested[i].normalised] === prp)) { //this pronoun points at our noun matches.push(interested[i]) } else if (interested[i].pos.tag === "PP" && posessives[interested[i].normalised] === prp) { //this posessive pronoun ('his/her') points at our noun matches.push(interested[i]) } else if (interested[i].pos.parent === "noun" && interested[i].analysis.pronoun() === prp) { //this noun stops our further pursuit break } } return matches } return [] } // a pronoun that points at a noun mentioned previously '[he] is nice' the.reference_to = function () { //if it's a pronoun, look backwards for the first mention '[obama]... <-.. [he]' if (token && (token.pos.tag === "PRP" || token.pos.tag === "PP")) { var prp = token.normalised var possessives={ "his":"he", "her":"she", "their":"they" } if(possessives[prp]!==undefined){//support possessives prp=possessives[prp] } //look at starting of this sentence var interested = sentence.tokens.slice(0, word_i) //add previous sentence, if applicable if (sentence.last) { interested = sentence.last.tokens.concat(interested) } //reverse the terms to loop through backward.. interested = interested.reverse() for (var i = 0; i < interested.length; i++) { //it's a match if (interested[i].pos.parent === "noun" && interested[i].pos.tag !== "PRP" && interested[i].analysis.pronoun() === prp) { return interested[i] } } } } //specifically which pos it is the.which = (function () { //posessive if (the.word.match(/'s$/)) { return parts_of_speech['NNO'] } //plural // if (the.is_plural) { // return parts_of_speech['NNS'] // } //generic return parts_of_speech['NN'] })() return the; } module.exports = Noun; // console.log(new Noun('farmhouse').is_entity()) // console.log(new Noun("FBI").is_acronym()) // console.log(new Noun("Tony Danza").is_person()) // console.log(new Noun("Tony Danza").pronoun()=="he") // console.log(new Noun("Tanya Danza").pronoun()=="she") // console.log(new Noun("mrs. Taya Danza").pronoun()=="she") // console.log(new Noun("Gool Tanya Danza").pronoun()=="she") // console.log(new Noun("illi G. Danza").pronoun()=="she") // console.log(new Noun("horses").pronoun()=="they") },{"../../data/lexicon/firstnames":6,"../../data/lexicon/honourifics":7,"../../data/parts_of_speech":14,"./conjugate/inflect":32,"./indefinite_article":33}],35:[function(require,module,exports){ //Parents are classes for each main part of speech, with appropriate methods //load files if server-side, otherwise assume these are prepended already var Adjective = require("./adjective/index"); var Noun = require("./noun/index"); var Adverb = require("./adverb/index"); var Verb = require("./verb/index"); var Value = require("./value/index"); var parents = { adjective: function(str, next, last, token) { return new Adjective(str, next, last, token) }, noun: function(str, next, last, token) { return new Noun(str, next, last, token) }, adverb: function(str, next, last, token) { return new Adverb(str, next, last, token) }, verb: function(str, next, last, token) { return new Verb(str, next, last, token) }, value: function(str, next, last, token) { return new Value(str, next, last, token) }, glue: function(str, next, last, token) { return {} } } module.exports = parents; },{"./adjective/index":29,"./adverb/index":31,"./noun/index":34,"./value/index":37,"./verb/index":44}],36:[function(require,module,exports){ // #generates properly-formatted dates from free-text date forms // #by spencer kelly 2014 var months = "(january|february|march|april|may|june|july|august|september|october|november|december|jan|feb|mar|apr|aug|sept|oct|nov|dec),?"; var days = "([0-9]{1,2}),?"; var years = "([12][0-9]{3})"; var to_obj = function (arr, places) { return Object.keys(places).reduce(function (h, k) { h[k] = arr[places[k]]; return h; }, {}); } var regexes = [{ reg: String(months) + " " + String(days) + "-" + String(days) + " " + String(years), example: "March 7th-11th 1987", process: function (arr) { if (!arr) { arr = []; } var places = { month: 1, day: 2, to_day: 3, year: 4 }; return to_obj(arr, places); } }, { reg: String(days) + " of " + String(months) + " to " + String(days) + " of " + String(months) + " " + String(years), example: "28th of September to 5th of October 2008", process: function (arr) { if (!arr) { arr = []; } var places = { day: 1, month: 2, to_day: 3, to_month: 4, to_year: 5 }; return to_obj(arr, places); } }, { reg: String(months) + " " + String(days) + " to " + String(months) + " " + String(days) + " " + String(years), example: "March 7th to june 11th 1987", process: function (arr) { if (!arr) { arr = []; } var places = { month: 1, day: 2, to_month: 3, to_day: 4, year: 5, to_year: 5 }; return to_obj(arr, places); } }, { reg: "between " + String(days) + " " + String(months) + " and " + String(days) + " " + String(months) + " " + String(years), example: "between 13 February and 15 February 1945", process: function (arr) { if (!arr) { arr = []; } var places = { day: 1, month: 2, to_day: 3, to_month: 4, year: 5, to_year: 5 }; return to_obj(arr, places); } }, { reg: "between " + String(months) + " " + String(days) + " and " + String(months) + " " + String(days) + " " + String(years), example: "between March 7th and june 11th 1987", process: function (arr) { if (!arr) { arr = []; } var places = { month: 1, day: 2, to_month: 3, to_day: 4, year: 5, to_year: 5 }; return to_obj(arr, places); } }, { reg: String(months) + " " + String(days) + " " + String(years), example: "March 1st 1987", process: function (arr) { if (!arr) { arr = []; } var places = { month: 1, day: 2, year: 3 }; return to_obj(arr, places); } }, { reg: String(days) + " - " + String(days) + " of " + String(months) + " " + String(years), example: "3rd - 5th of March 1969", process: function (arr) { if (!arr) { arr = []; } var places = { day: 1, to_day: 2, month: 3, year: 4 }; return to_obj(arr, places); } }, { reg: String(days) + " of " + String(months) + " " + String(years), example: "3rd of March 1969", process: function (arr) { if (!arr) { arr = []; } var places = { day: 1, month: 2, year: 3 }; return to_obj(arr, places); } }, { reg: String(months) + " " + years + ",? to " + String(months) + " " + String(years), example: "September 1939 to April 1945", process: function (arr) { if (!arr) { arr = []; } var places = { month: 1, year: 2, to_month: 3, to_year: 4 }; return to_obj(arr, places); } }, { reg: String(months) + " " + String(years), example: "March 1969", process: function (arr) { if (!arr) { arr = []; } var places = { month: 1, year: 2 }; return to_obj(arr, places); } }, { reg: String(months) + " " + days, example: "March 18th", process: function (arr) { if (!arr) { arr = []; } var places = { month: 1, day: 2 }; return to_obj(arr, places); } }, { reg: String(days) + " of " + months, example: "18th of March", process: function (arr) { if (!arr) { arr = []; } var places = { month: 2, day: 1 }; return to_obj(arr, places); } }, { reg: years + " ?- ?" + String(years), example: "1997-1998", process: function (arr) { if (!arr) { arr = []; } var places = { year: 1, to_year: 2 }; return to_obj(arr, places); } }, { reg: years, example: "1998", process: function (arr) { if (!arr) { arr = []; } var places = { year: 1 }; return to_obj(arr, places); } }].map(function (o) { o.reg = new RegExp(o.reg, "g"); return o; }); //0 based months, 1 based days... var months_obj = { january: 0, february: 1, march: 2, april: 3, may: 4, june: 5, july: 6, august: 7, september: 8, october: 9, november: 10, december: 11, jan: 0, feb: 1, mar: 2, apr: 3, aug: 7, sept: 8, oct: 9, nov: 10, dec: 11 }; //thirty days hath september... var last_dates = [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]; var preprocess = function (str) { str = str.toLowerCase(); str = str.replace(/([0-9])(th|rd|st)/g, '$1'); return str; }; var postprocess = function (obj, options) { var d; d = new Date(); options = options || {}; obj.year = parseInt(obj.year, 10) || undefined; obj.day = parseInt(obj.day, 10) || undefined; obj.to_day = parseInt(obj.to_day, 10) || undefined; obj.to_year = parseInt(obj.to_year, 10) || undefined; obj.month = months_obj[obj.month]; obj.to_month = months_obj[obj.to_month]; //swap to_month and month if (obj.to_month !== undefined && obj.month === undefined) { obj.month = obj.to_month; } if (obj.to_month === undefined && obj.month !== undefined) { obj.to_month = obj.month; } //swap to_year and year if (obj.to_year && !obj.year) { obj.year = obj.to_year; } if (!obj.to_year && obj.year && obj.to_month !== undefined) { obj.to_year = obj.year; } if (options.assume_year && !obj.year) { obj.year = d.getFullYear(); } //make sure date is in that month.. if (obj.day !== undefined && (obj.day > 31 || (obj.month !== undefined && obj.day > last_dates[obj.month]))) { obj.day = undefined; } //make sure to date is after from date. fail everything if so... //todo: do this smarter if (obj.to_month !== undefined && obj.to_month < obj.month) { return {} } if (obj.to_year && obj.to_year < obj.year) { obj.year = undefined; obj.to_year = undefined; } //make sure date is in reasonable range (very opinionated) if (obj.year > 2090 || obj.year < 1200) { obj.year = undefined; obj.to_year = undefined; } //format result better obj = { day: obj.day, month: obj.month, year: obj.year, to: { day: obj.to_day, month: obj.to_month, year: obj.to_year } }; //add javascript date objects, if you can if (obj.year && obj.day && obj.month !== undefined) { obj.date_object = new Date(); obj.date_object.setYear(obj.year); obj.date_object.setMonth(obj.month); obj.date_object.setDate(obj.day); } if (obj.to.year && obj.to.day && obj.to.month !== undefined) { obj.to.date_object = new Date(); obj.to.date_object.setYear(obj.to.year); obj.to.date_object.setMonth(obj.to.month); obj.to.date_object.setDate(obj.to.day); } //if we have enough data to return a result.. if (obj.year || obj.month !== undefined) { return obj; } return {}; }; //pass through sequence of regexes until tempate is matched.. module.exports = function (str, options) { options = options || {}; str = preprocess(str) var arr, good, clone_reg, obj; var l = regexes.length; for (var i = 0; i < l; i += 1) { obj = regexes[i] if (str.match(obj.reg)) { clone_reg = new RegExp(obj.reg.source, "i"); //this avoids a memory-leak arr = clone_reg.exec(str); good = obj.process(arr); return postprocess(good, options); } } }; // console.log(date_extractor("1998")) // console.log(date_extractor("1999")) },{}],37:[function(require,module,exports){ //wrapper for value's methods var Value = function (str, sentence, word_i) { var the = this the.word = str || ''; var to_number = require("./to_number") var date_extractor = require("./date_extractor") var parts_of_speech = require("../../data/parts_of_speech") the.date = function (options) { options = options || {} return date_extractor(the.word, options) } the.is_date = function () { var months = /(january|february|march|april|may|june|july|august|september|october|november|december|jan|feb|mar|apr|aug|sept|oct|nov|dec)/i var times = /1?[0-9]:[0-9]{2}/ var days = /\b(monday|tuesday|wednesday|thursday|friday|saturday|sunday|mon|tues|wed|thurs|fri|sat|sun)\b/i if (the.word.match(months) || the.word.match(times) || the.word.match(days)) { return true } return false } the.number = function () { if (the.is_date()) { return null } return to_number(the.word) } the.which = (function () { if (the.date()) { return parts_of_speech['DA'] } if (the.number()) { return parts_of_speech['NU'] } return parts_of_speech['CD'] })() return the; }; module.exports = Value; // console.log(new Value("fifty five").number()) // console.log(new Value("june 5th 1998").date()) },{"../../data/parts_of_speech":14,"./date_extractor":36,"./to_number":38}],38:[function(require,module,exports){ // converts spoken numbers into integers "fifty seven point eight" -> 57.8 // // Spoken numbers take the following format // [sixty five] (thousand) [sixty five] (hundred) [sixty five] // aka: [one/teen/ten] (multiple) [one/teen/ten] (multiple) ... // combile the [one/teen/ten]s as 'current_sum', then multiply it by its following multiple // multiple not repeat "use strict"; //these sets of numbers each have different rules //[tenth, hundreth, thousandth..] are ambiguous because they could be ordinal like fifth, or decimal like one-one-hundredth, so are ignored var ones = { 'a': 1, 'zero': 0, 'one': 1, 'two': 2, 'three': 3, 'four': 4, 'five': 5, 'six': 6, 'seven': 7, 'eight': 8, 'nine': 9, "first": 1, "second": 2, "third": 3, "fourth": 4, "fifth": 5, "sixth": 6, "seventh": 7, "eighth": 8, "ninth": 9 } var teens = { 'ten': 10, 'eleven': 11, 'twelve': 12, 'thirteen': 13, 'fourteen': 14, 'fifteen': 15, 'sixteen': 16, 'seventeen': 17, 'eighteen': 18, 'nineteen': 19, "eleventh": 11, "twelfth": 12, "thirteenth": 13, "fourteenth": 14, "fifteenth": 15, "sixteenth": 16, "seventeenth": 17, "eighteenth": 18, "nineteenth": 19 } var tens = { 'twenty': 20, 'thirty': 30, 'forty': 40, 'fifty': 50, 'sixty': 60, 'seventy': 70, 'eighty': 80, 'ninety': 90, "twentieth": 20, "thirtieth": 30, "fourtieth": 40, "fiftieth": 50, "sixtieth": 60, "seventieth": 70, "eightieth": 80, "ninetieth": 90 } var multiple = { 'hundred': 100, 'grand': 1000, 'thousand': 1000, 'million': 1000000, 'billion': 1000000000, 'trillion': 1000000000000, 'quadrillion': 1000000000000000, 'quintillion': 1000000000000000000, 'sextillion': 1000000000000000000000, 'septillion': 1000000000000000000000000, 'octillion': 1000000000000000000000000000, 'nonillion': 1000000000000000000000000000000, 'decillion': 1000000000000000000000000000000000 } // var decimal_multiple={'tenth':0.1, 'hundredth':0.01, 'thousandth':0.001, 'millionth':0.000001,'billionth':0.000000001}; var main = function (s) { //remember these concerns for possible errors var ones_done = false var teens_done = false var tens_done = false var multiple_done = {} var total = 0 var global_multiplier = 1 //pretty-printed numbers s = s.replace(/, ?/g, '') //parse-out currency s = s.replace(/[$£€]/, '') //try to finish-fast if (s.match(/[0-9]\.[0-9]/) && parseFloat(s) == s) { return parseFloat(s) } if (parseInt(s, 10) == s) { return parseInt(s, 10) } //try to die fast. (phone numbers or times) if (s.match(/[0-9][\-:][0-9]/)) { return null } //support global multipliers, like 'half-million' by doing 'million' then multiplying by 0.5 var mults = [{ reg: /^(minus|negative)[\s\-]/i, mult: -1 }, { reg: /^(a\s)?half[\s\-](of\s)?/i, mult: 0.5 }, { reg: /^(a\s)?quarter[\s\-]/i, mult: 0.25 }] for (i = 0; i < mults.length; i++) { if (s.match(mults[i].reg)) { global_multiplier = mults[i].mult s = s.replace(mults[i].reg, '') break; } } //do each word in turn.. var words = s.toString().split(/[\s\-]+/); var w, x; var current_sum = 0; var local_multiplier = 1 var decimal_mode = false for (var i = 0; i < words.length; i++) { w = words[i] //skip 'and' eg. five hundred and twelve if (w == "and") { continue; } //..we're doing decimals now if (w == "point" || w == "decimal") { if (decimal_mode) { return null } //two point one point six decimal_mode = true total += current_sum current_sum = 0 ones_done = false local_multiplier = 0.1 continue; } //handle special rules following a decimal if (decimal_mode) { x = null //allow consecutive ones in decimals eg. 'two point zero five nine' if (ones[w] !== undefined) { x = ones[w] } if (teens[w] !== undefined) { x = teens[w] } if (parseInt(w, 10) == w) { x = parseInt(w, 10) } if (!x) { return null } if (x < 10) { total += x * local_multiplier local_multiplier = local_multiplier * 0.1 // next number is next decimal place current_sum = 0 continue; } //two-digit decimals eg. 'two point sixteen' if (x < 100) { total += x * (local_multiplier * 0.1) local_multiplier = local_multiplier * 0.01 // next number is next decimal place current_sum = 0 continue; } } //if it's already an actual number if (w.match(/^[0-9]\.[0-9]$/)) { current_sum += parseFloat(w) continue; } if (parseInt(w, 10) == w) { current_sum += parseInt(w, 10) continue; } //ones rules if (ones[w] !== undefined) { if (ones_done) { return null } // eg. five seven if (teens_done) { return null } // eg. five seventeen ones_done = true current_sum += ones[w] continue; } //teens rules if (teens[w]) { if (ones_done) { return null } // eg. five seventeen if (teens_done) { return null } // eg. fifteen seventeen if (tens_done) { return null } // eg. sixty fifteen teens_done = true current_sum += teens[w] continue; } //tens rules if (tens[w]) { if (ones_done) { return null } // eg. five seventy if (teens_done) { return null } // eg. fiveteen seventy if (tens_done) { return null } // eg. twenty seventy tens_done = true current_sum += tens[w] continue; } //multiple rules if (multiple[w]) { if (multiple_done[w]) { return null } // eg. five hundred six hundred multiple_done[w] = true //reset our concerns. allow 'five hundred five' ones_done = false teens_done = false tens_done = false //case of 'hundred million', (2 consecutive multipliers) if (current_sum === 0) { total = total || 1 //dont ever multiply by 0 total *= multiple[w] } else { current_sum *= multiple[w] total += current_sum } current_sum = 0 continue; } //if word is not a known thing now, die return null } if (current_sum) { total += (current_sum || 1) * local_multiplier } //combine with global multiplier, like 'minus' or 'half' total = total * global_multiplier return total } //kick it into module module.exports = main; // console.log(to_number("sixteen hundred")) // console.log(to_number("a hundred")) // console.log(to_number("four point seven seven")) },{}],39:[function(require,module,exports){ //turn a verb into its other grammatical forms. var verb_to_doer = require("./to_doer") var verb_irregulars = require("./verb_irregulars") var verb_rules = require("./verb_rules") var suffix_rules = require("./suffix_rules") //this method is the slowest in the whole library, basically TODO:whaaa var predict = function (w) { var endsWith = function (str, suffix) { return str.indexOf(suffix, str.length - suffix.length) !== -1; } var arr = Object.keys(suffix_rules); for (i = 0; i < arr.length; i++) { if (endsWith(w, arr[i])) { return suffix_rules[arr[i]] } } return "infinitive" } //fallback to this transformation if it has an unknown prefix var fallback = function (w) { var infinitive; if (w.length > 4) { infinitive = w.replace(/ed$/, ''); } else { infinitive = w.replace(/d$/, ''); } var present, past, gerund, doer; if (w.match(/[^aeiou]$/)) { gerund = w + "ing" past = w + "ed" if (w.match(/ss$/)) { present = w + "es" //'passes' } else { present = w + "s" } doer = verb_to_doer(infinitive) } else { gerund = w.replace(/[aeiou]$/, 'ing') past = w.replace(/[aeiou]$/, 'ed') present = w.replace(/[aeiou]$/, 'es') doer = verb_to_doer(infinitive) } return { infinitive: infinitive, present: present, past: past, gerund: gerund, doer: doer, future: "will " + infinitive } } //make sure object has all forms var fufill = function (obj, prefix) { if (!obj.infinitive) { return obj } if (!obj.gerund) { obj.gerund = obj.infinitive + 'ing' } if (!obj.doer) { obj.doer = verb_to_doer(obj.infinitive) } if (!obj.present) { obj.present = obj.infinitive + 's' } if (!obj.past) { obj.past = obj.infinitive + 'ed' } //add the prefix to all forms, if it exists if (prefix) { Object.keys(obj).forEach(function (k) { obj[k] = prefix + obj[k] }) } //future is 'will'+infinitive if (!obj.future) { obj.future = "will " + obj.infinitive } //perfect is 'have'+past-tense if (!obj.perfect) { obj.perfect = "have " + obj.past } //pluperfect is 'had'+past-tense if (!obj.pluperfect) { obj.pluperfect = "had " + obj.past } //future perfect is 'will have'+past-tense if (!obj.future_perfect) { obj.future_perfect = "will have " + obj.past } return obj } var main = function (w) { if (w === undefined) { return {} } //for phrasal verbs ('look out'), conjugate look, then append 'out' var phrasal_reg = new RegExp("^(.*?) (in|out|on|off|behind|way|with|of|do|away|across|ahead|back|over|under|together|apart|up|upon|aback|down|about|before|after|around|to|forth|round|through|along|onto)$", 'i') if (w.match(' ') && w.match(phrasal_reg)) { var split = w.match(phrasal_reg, '') var phrasal_verb = split[1] var particle = split[2] var result = main(phrasal_verb) //recursive delete result["doer"] Object.keys(result).forEach(function (k) { if (result[k]) { result[k] += " " + particle } }) return result } //for pluperfect ('had tried') remove 'had' and call it past-tense if (w.match(/^had [a-z]/i)) { w = w.replace(/^had /i, '') } //for perfect ('have tried') remove 'have' and call it past-tense if (w.match(/^have [a-z]/i)) { w = w.replace(/^have /i, '') } //for future perfect ('will have tried') remove 'will have' and call it past-tense if (w.match(/^will have [a-z]/i)) { w = w.replace(/^will have /i, '') } //chop it if it's future-tense w = w.replace(/^will /i, '') //un-prefix the verb, and add it in later var prefix = (w.match(/^(over|under|re|anti|full)\-?/i) || [])[0] var verb = w.replace(/^(over|under|re|anti|full)\-?/i, '') //check irregulars var obj = {}; var l = verb_irregulars.length var x, i; for (i = 0; i < l; i++) { x = verb_irregulars[i] if (verb === x.present || verb === x.gerund || verb === x.past || verb === x.infinitive) { obj = JSON.parse(JSON.stringify(verb_irregulars[i])); // object 'clone' hack, to avoid mem leak return fufill(obj, prefix) } } //guess the tense, so we know which transormation to make var predicted = predict(w) || 'infinitive' //check against suffix rules l = verb_rules[predicted].length var r, keys; for (i = 0; i < l; i++) { r = verb_rules[predicted][i]; if (w.match(r.reg)) { obj[predicted] = w; keys= Object.keys(r.repl) for(var o=0; o infinitive ) var compact = [ [ "arise", "arising", "arose", "_s", "_r" ], [ "babysit", "_ting", "babysat", "_s", "_ter" ], [ "be", "_ing", "was", "is", "" ], [ "beat", "_ing", "_", "_s", "_er" ], [ "become", "becoming", "became", "_s", "_r" ], [ "bend", "_ing", "bent", "_s", "_er" ], [ "begin", "_ning", "began", "_s", "_ner" ], [ "bet", "_ting", "_", "_s", "_ter" ], [ "bind", "_ing", "bound", "_s", "_er" ], [ "bite", "biting", "bit", "_s", "_r" ], [ "bleed", "_ing", "bled", "_s", "_er" ], [ "blow", "_ing", "blew", "_s", "_er" ], [ "break", "_ing", "broke", "_s", "_er" ], [ "breed", "_ing", "bred", "_s", "_er" ], [ "bring", "_ing", "brought", "_s", "_er" ], [ "broadcast", "_ing", "_", "_s", "_er" ], [ "build", "_ing", "built", "_s", "_er" ], [ "buy", "_ing", "bought", "_s", "_er" ], [ "catch", "_ing", "caught", "_es", "_er" ], [ "choose", "choosing", "chose", "_s", "_r" ], [ "come", "coming", "came", "_s", "_r" ], [ "cost", "_ing", "_", "_s", "_er" ], [ "cut", "_ting", "_", "_s", "_ter" ], [ "deal", "_ing", "_t", "_s", "_er" ], [ "dig", "_ging", "dug", "_s", "_ger" ], [ "do", "_ing", "did", "_es", "_er" ], [ "draw", "_ing", "drew", "_s", "_er" ], [ "drink", "_ing", "drank", "_s", "_er" ], [ "drive", "driving", "drove", "_s", "_r" ], [ "eat", "_ing", "ate", "_s", "_er" ], [ "fall", "_ing", "fell", "_s", "_er" ], [ "feed", "_ing", "fed", "_s", "_er" ], [ "feel", "_ing", "felt", "_s", "_er" ], [ "fight", "_ing", "fought", "_s", "_er" ], [ "find", "_ing", "found", "_s", "_er" ], [ "fly", "_ing", "flew", "_s", "flier" ], [ "forbid", "_ing", "forbade", "_s", ], [ "forget", "_ing", "forgot", "_s", "_er" ], [ "forgive", "forgiving", "forgave", "_s", "_r" ], [ "freeze", "freezing", "froze", "_s", "_r" ], [ "get", "_ting", "got", "_s", "_ter" ], [ "give", "giving", "gave", "_s", "_r" ], [ "go", "_ing", "went", "_es", "_er" ], [ "grow", "_ing", "grew", "_s", "_er" ], [ "hang", "_ing", "hung", "_s", "_er" ], [ "have", "having", "had", "has", ], [ "hear", "_ing", "_d", "_s", "_er" ], [ "hide", "hiding", "hid", "_s", "_r" ], [ "hit", "_ting", "_", "_s", "_ter" ], [ "hold", "_ing", "held", "_s", "_er" ], [ "hurt", "_ing", "_", "_s", "_er" ], [ "know", "_ing", "knew", "_s", "_er" ], [ "relay", "_ing", "_ed", "_s", "_er" ], [ "lay", "_ing", "laid", "_s", "_er" ], [ "lead", "_ing", "led", "_s", "_er" ], [ "leave", "leaving", "left", "_s", "_r" ], [ "lend", "_ing", "lent", "_s", "_er" ], [ "let", "_ting", "_", "_s", "_ter" ], [ "lie", "lying", "lay", "_s", "_r" ], [ "light", "_ing", "lit", "_s", "_er" ], [ "lose", "losing", "lost", "_s", "_r" ], [ "make", "making", "made", "_s", "_r" ], [ "mean", "_ing", "_t", "_s", "_er" ], [ "meet", "_ing", "met", "_s", "_er" ], [ "pay", "_ing", "paid", "_s", "_er" ], [ "put", "_ting", "_", "_s", "_ter" ], [ "quit", "_ting", "_", "_s", "_ter" ], [ "read", "_ing", "_", "_s", "_er" ], [ "ride", "riding", "rode", "_s", "_r" ], [ "ring", "_ing", "rang", "_s", "_er" ], [ "rise", "rising", "rose", "_s", "_r" ], [ "run", "_ning", "ran", "_s", "_ner" ], [ "say", "_ing", "said", "_s", ], [ "see", "_ing", "saw", "_s", "_r" ], [ "sell", "_ing", "sold", "_s", "_er" ], [ "send", "_ing", "sent", "_s", "_er" ], [ "set", "_ting", "_", "_s", "_ter" ], [ "shake", "shaking", "shook", "_s", "_r" ], [ "shine", "shining", "shone", "_s", "_r" ], [ "shoot", "_ing", "shot", "_s", "_er" ], [ "show", "_ing", "_ed", "_s", "_er" ], [ "shut", "_ting", "_", "_s", "_ter" ], [ "sing", "_ing", "sang", "_s", "_er" ], [ "sink", "_ing", "sank", "_s", "_er" ], [ "sit", "_ting", "sat", "_s", "_ter" ], [ "slide", "sliding", "slid", "_s", "_r" ], [ "speak", "_ing", "spoke", "_s", "_er" ], [ "spend", "_ing", "spent", "_s", "_er" ], [ "spin", "_ning", "spun", "_s", "_ner" ], [ "spread", "_ing", "_", "_s", "_er" ], [ "stand", "_ing", "stood", "_s", "_er" ], [ "steal", "_ing", "stole", "_s", "_er" ], [ "stick", "_ing", "stuck", "_s", "_er" ], [ "sting", "_ing", "stung", "_s", "_er" ], [ "strike", "striking", "struck", "_s", "_r" ], [ "swear", "_ing", "swore", "_s", "_er" ], [ "swim", "_ing", "swam", "_s", "_mer" ], [ "swing", "_ing", "swung", "_s", "_er" ], [ "take", "taking", "took", "_s", "_r" ], [ "teach", "_ing", "taught", "_s", "_er" ], [ "tear", "_ing", "tore", "_s", "_er" ], [ "tell", "_ing", "told", "_s", "_er" ], [ "think", "_ing", "thought", "_s", "_er" ], [ "throw", "_ing", "threw", "_s", "_er" ], [ "understand", "_ing", "understood", "_s", ], [ "wake", "waking", "woke", "_s", "_r" ], [ "wear", "_ing", "wore", "_s", "_er" ], [ "win", "_ning", "won", "_s", "_ner" ], [ "withdraw", "_ing", "withdrew", "_s", "_er" ], [ "write", "writing", "wrote", "_s", "_r" ], [ "tie", "tying", "_d", "_s", "_r" ], [ "obey", "_ing", "_ed", "_s", "_er" ], [ "ski", "_ing", "_ied", "_s", "_er" ], [ "boil", "_ing", "_ed", "_s", "_er" ], [ "miss", "_ing", "_ed", "_", "_er" ], [ "act", "_ing", "_ed", "_s", "_or" ], [ "compete", "competing", "_d", "_s", "competitor" ], [ "being", "are", "were", "are", ], [ "imply", "_ing", "implied", "implies", "implier" ], [ "ice", "icing", "_d", "_s", "_r" ], [ "develop", "_ing", "_", "_s", "_er" ], [ "wait", "_ing", "_ed", "_s", "_er" ], [ "aim", "_ing", "_ed", "_s", "_er" ], [ "spill", "_ing", "spilt", "_s", "_er" ], [ "drop", "_ping", "_ped", "_s", "_per" ], [ "head", "_ing", "_ed", "_s", "_er" ], [ "log", "_ging", "_ged", "_s", "_ger" ], [ "rub", "_bing", "_bed", "_s", "_ber" ], [ "smash", "_ing", "_ed", "_es", "_er" ], [ "add", "_ing", "_ed", "_s", "_er" ], [ "word", "_ing", "_ed", "_s", "_er" ], [ "suit", "_ing", "_ed", "_s", "_er" ], [ "be", "am", "was", "am", "" ] ] //expand compact version out module.exports = compact.map(function (arr) { var obj = {} for (var i = 0; i < arr.length; i++) { obj[types[i]] = arr[i].replace(/_/, arr[0]) } return obj }) // console.log(JSON.stringify(verb_irregulars, null, 2)); },{}],43:[function(require,module,exports){ // regex rules for each part of speech that convert it to all other parts of speech. // used in combination with the generic 'fallback' method var verb_rules = { "infinitive": [ [ "(eed)$", { "pr": "$1s", "g": "$1ing", "pa": "$1ed", "do": "$1er" } ], [ "(e)(ep)$", { "pr": "$1$2s", "g": "$1$2ing", "pa": "$1pt", "do": "$1$2er" } ], [ "(a[tg]|i[zn]|ur|nc|gl|is)e$", { "pr": "$1es", "g": "$1ing", "pa": "$1ed" } ], [ "([i|f|rr])y$", { "pr": "$1ies", "g": "$1ying", "pa": "$1ied" } ], [ "([td]er)$", { "pr": "$1s", "g": "$1ing", "pa": "$1ed" } ], [ "([bd]l)e$", { "pr": "$1es", "g": "$1ing", "pa": "$1ed" } ], [ "(ish|tch|ess)$", { "pr": "$1es", "g": "$1ing", "pa": "$1ed" } ], [ "(ion|end|e[nc]t)$", { "pr": "$1s", "g": "$1ing", "pa": "$1ed" } ], [ "(om)e$", { "pr": "$1es", "g": "$1ing", "pa": "ame" } ], [ "([aeiu])([pt])$", { "pr": "$1$2s", "g": "$1$2$2ing", "pa": "$1$2" } ], [ "(er)$", { "pr": "$1s", "g": "$1ing", "pa": "$1ed" } ], [ "(en)$", { "pr": "$1s", "g": "$1ing", "pa": "$1ed" } ] ], "present": [ [ "(ies)$", { "in": "y", "g": "ying", "pa": "ied" } ], [ "(tch|sh)es$", { "in": "$1", "g": "$1ing", "pa": "$1ed" } ], [ "(ss)es$", { "in": "$1", "g": "$1ing", "pa": "$1ed" } ], [ "([tzlshicgrvdnkmu])es$", { "in": "$1e", "g": "$1ing", "pa": "$1ed" } ], [ "(n[dtk]|c[kt]|[eo]n|i[nl]|er|a[ytrl])s$", { "in": "$1", "g": "$1ing", "pa": "$1ed" } ], [ "(ow)s$", { "in": "$1", "g": "$1ing", "pa": "ew" } ], [ "(op)s$", { "in": "$1", "g": "$1ping", "pa": "$1ped" } ], [ "([eirs])ts$", { "in": "$1t", "g": "$1tting", "pa": "$1tted" } ], [ "(ll)s$", { "in": "$1", "g": "$1ing", "pa": "$1ed" } ], [ "(el)s$", { "in": "$1", "g": "$1ling", "pa": "$1led" } ], [ "(ip)es$", { "in": "$1e", "g": "$1ing", "pa": "$1ed" } ], [ "ss$", { "in": "ss", "g": "ssing", "pa": "ssed" } ], [ "s$", { "in": "", "g": "ing", "pa": "ed" } ] ], "gerund": [ [ "pping$", { "in": "p", "pr": "ps", "pa": "pped" } ], [ "lling$", { "in": "ll", "pr": "lls", "pa": "lled" } ], [ "tting$", { "in": "t", "pr": "ts", "pa": "t" } ], [ "ssing$", { "in": "ss", "pr": "sses", "pa": "ssed" } ], [ "gging$", { "in": "g", "pr": "gs", "pa": "gged" } ], [ "([^aeiou])ying$", { "in": "$1y", "pr": "$1ies", "pa": "$1ied", "do": "$1ier" } ], [ "(i.)ing$", { "in": "$1e", "pr": "$1es", "pa": "$1ed" } ], [ "(u[rtcb]|[bdtpkg]l|n[cg]|a[gdkvtc]|[ua]s|[dr]g|yz|o[rlsp]|cre)ing$", { "in": "$1e", "pr": "$1es", "pa": "$1ed" } ], [ "(ch|sh)ing$", { "in": "$1", "pr": "$1es", "pa": "$1ed" } ], [ "(..)ing$", { "in": "$1", "pr": "$1s", "pa": "$1ed" } ] ], "past": [ [ "(ued)$", { "pr": "ues", "g": "uing", "pa": "ued", "do": "uer" } ], [ "(e|i)lled$", { "pr": "$1lls", "g": "$1lling", "pa": "$1lled", "do": "$1ller" } ], [ "(sh|ch)ed$", { "in": "$1", "pr": "$1es", "g": "$1ing", "do": "$1er" } ], [ "(tl|gl)ed$", { "in": "$1e", "pr": "$1es", "g": "$1ing", "do": "$1er" } ], [ "(ss)ed$", { "in": "$1", "pr": "$1es", "g": "$1ing", "do": "$1er" } ], [ "pped$", { "in": "p", "pr": "ps", "g": "pping", "do": "pper" } ], [ "tted$", { "in": "t", "pr": "ts", "g": "tting", "do": "tter" } ], [ "gged$", { "in": "g", "pr": "gs", "g": "gging", "do": "gger" } ], [ "(h|ion|n[dt]|ai.|[cs]t|pp|all|ss|tt|int|ail|ld|en|oo.|er|k|pp|w|ou.|rt|ght|rm)ed$", { "in": "$1", "pr": "$1s", "g": "$1ing", "do": "$1er" } ], [ "(..[^aeiou])ed$", { "in": "$1e", "pr": "$1es", "g": "$1ing", "do": "$1er" } ], [ "ied$", { "in": "y", "pr": "ies", "g": "ying", "do": "ier" } ], [ "(.o)ed$", { "in": "$1o", "pr": "$1os", "g": "$1oing", "do": "$1oer" } ], [ "(.i)ed$", { "in": "$1", "pr": "$1s", "g": "$1ing", "do": "$1er" } ], [ "([rl])ew$", { "in": "$1ow", "pr": "$1ows", "g": "$1owing" } ], [ "([pl])t$", { "in": "$1t", "pr": "$1ts", "g": "$1ting" } ] ] } //unpack compressed form verb_rules=Object.keys(verb_rules).reduce(function(h,k){ h[k]=verb_rules[k].map(function(a){ var obj={ reg:new RegExp(a[0],"i"), repl:{ infinitive:a[1]["in"], present:a[1]["pr"], past:a[1]["pa"], gerund:a[1]["g"] } } if(a[1]["do"]){ obj.repl.doer=a[1]["do"] } return obj }) return h },{}) module.exports = verb_rules; // console.log(JSON.stringify(verb_rules, null, 2)); },{}],44:[function(require,module,exports){ //wrapper for verb's methods var Verb = function (str, sentence, word_i) { var the = this var token, next; if (sentence !== undefined && word_i !== undefined) { token = sentence.tokens[word_i] next = sentence.tokens[word_i + i] } the.word = str || ''; var verb_conjugate = require("./conjugate/conjugate") var parts_of_speech = require("../../data/parts_of_speech") var copulas = { "is": "CP", "will be": "CP", "will": "CP", "are": "CP", "was": "CP", "were": "CP" } var modals = { "can": "MD", "may": "MD", "could": "MD", "might": "MD", "will": "MD", "ought to": "MD", "would": "MD", "must": "MD", "shall": "MD", "should": "MD" } var tenses = { past: "VBD", participle: "VBN", infinitive: "VBP", present: "VBZ", gerund: "VBG" } the.conjugate = function () { return verb_conjugate(the.word) } the.to_past = function () { if (the.form === "gerund") { return the.word } return verb_conjugate(the.word).past } the.to_present = function () { return verb_conjugate(the.word).present } the.to_future = function () { return "will " + verb_conjugate(the.word).infinitive } //which conjugation the.form = (function () { //don't choose infinitive if infinitive==present var order = [ "past", "present", "gerund", "infinitive" ] var forms = verb_conjugate(the.word) for (var i = 0; i < order.length; i++) { if (forms[order[i]] === the.word) { return order[i] } } })() //past/present/future //wahh?! the.tense = (function () { if (the.word.match(/\bwill\b/)) { return "future" } if (the.form === "present") { return "present" } if (the.form === "past") { return "past" } return "present" })() //the most accurate part_of_speech the.which = (function () { if (copulas[the.word]) { return parts_of_speech['CP'] } if (the.word.match(/([aeiou][^aeiouwyrlm])ing$/)) { return parts_of_speech['VBG'] } var form = the.form return parts_of_speech[tenses[form]] })() //is this verb negative already? the.negative = function () { if (the.word.match(/n't$/)) { return true } if ((modals[the.word] || copulas[the.word]) && next && next.normalised === "not") { return true } return false } return the; } module.exports = Verb; // console.log(new Verb("will")) // console.log(new Verb("stalking").tense) },{"../../data/parts_of_speech":14,"./conjugate/conjugate":39}],45:[function(require,module,exports){ var lexicon = require("./data/lexicon") var values = require("./data/lexicon/values") var tokenize = require("./methods/tokenization/tokenize"); var parts_of_speech = require("./data/parts_of_speech") var word_rules = require("./data/word_rules") var wordnet_suffixes = require("./data/unambiguous_suffixes") var Sentence = require("./sentence") var Section = require("./section") var parents = require("./parents/parents") //possible 2nd part in a phrasal verb var particles = ["in", "out", "on", "off", "behind", "way", "with", "of", "do", "away", "across", "ahead", "back", "over", "under", "together", "apart", "up", "upon", "aback", "down", "about", "before", "after", "around", "to", "forth", "round", "through", "along", "onto"] particles = particles.reduce(function (h, s) { h[s] = true return h }, {}) var merge_tokens = function (a, b) { a.text += " " + b.text a.normalised += " " + b.normalised a.pos_reason += "|" + b.pos_reason a.start = a.start || b.start a.noun_capital = (a.noun_capital && b.noun_capital) a.punctuated = a.punctuated || b.punctuated a.end = a.end || b.end return a } //combine adjacent neighbours, and special cases var combine_tags = function (sentence) { var arr = sentence.tokens || [] for (var i = 0; i <= arr.length; i++) { var next = arr[i + 1] if (arr[i] && next) { var tag = arr[i].pos.tag //'joe smith' are both NN, for example if (tag === next.pos.tag && arr[i].punctuated !== true && arr[i].noun_capital == next.noun_capital) { arr[i + 1] = merge_tokens(arr[i], arr[i + 1]) arr[i] = null } //merge NNP and NN, like firstname, lastname else if ((tag === "NNP" && next.pos.tag === "NN") || (tag === "NN" && next.pos.tag === "NNP")) { arr[i + 1] = merge_tokens(arr[i], arr[i + 1]) arr[i] = null arr[i + 1].pos = parts_of_speech['NNP'] } //merge dates manually, which often have punctuation else if (tag === "CD" && next.pos.tag === "CD") { arr[i + 1] = merge_tokens(arr[i], arr[i + 1]) arr[i] = null } //merge abbreviations with nouns manually, eg. "Joe jr." else if ((tag === "NNAB" && next.pos.parent === "noun") || (arr[i].pos.parent === "noun" && next.pos.tag === "NNAB")) { arr[i + 1] = merge_tokens(arr[i], arr[i + 1]) arr[i] = null } //'will walk' -> future-tense verb else if (arr[i].normalised === "will" && next.pos.parent === "verb") { arr[i + 1] = merge_tokens(arr[i], arr[i + 1]) arr[i] = null } //'hundred and fifty', 'march the 5th' else if (tag === "CD" && (next.normalised === "and" || next.normalised === "the") && arr[i + 2] && arr[i + 2].pos.tag === "CD") { arr[i + 1] = merge_tokens(arr[i], arr[i + 1]) arr[i] = null } //capitals surrounding a preposition 'United States of America' else if (tag == "NN" && arr[i].noun_capital && (next.normalised == "of" || next.normalised == "and") && arr[i + 2] && arr[i + 2].noun_capital) { arr[i + 1] = merge_tokens(arr[i], arr[i + 1]) arr[i] = null arr[i + 2] = merge_tokens(arr[i + 1], arr[i + 2]) arr[i + 1] = null } //capitals surrounding two prepositions 'Phantom of the Opera' else if (arr[i].noun_capital && next.normalised == "of" && arr[i + 2] && arr[i + 2].pos.tag == "DT" && arr[i + 3] && arr[i + 3].noun_capital) { arr[i + 1] = merge_tokens(arr[i], arr[i + 1]) arr[i] = null arr[i + 2] = merge_tokens(arr[i + 1], arr[i + 2]) arr[i + 1] = null arr[i + 3] = merge_tokens(arr[i + 2], arr[i + 3]) arr[i + 2] = null } } } sentence.tokens = arr.filter(function (r) { return r }) return sentence } //some prepositions are clumped onto the back of a verb "looked for", "looks at" //they should be combined with the verb, sometimes. //does not handle seperated phrasal verbs ('take the coat off' -> 'take off') var combine_phrasal_verbs = function (sentence) { var arr = sentence.tokens || [] for (var i = 1; i < arr.length; i++) { if (particles[arr[i].normalised]) { //it matches a known phrasal-verb if (lexicon[arr[i - 1].normalised + " " + arr[i].normalised]) { // console.log(arr[i-1].normalised + " " + arr[i].normalised) arr[i] = merge_tokens(arr[i - 1], arr[i]) arr[i - 1] = null } } } sentence.tokens = arr.filter(function (r) { return r }) return sentence } var lexicon_pass = function (w) { if (lexicon.hasOwnProperty(w)) { return parts_of_speech[lexicon[w]] } //try to match it without a prefix - eg. outworked -> worked if (w.match(/^(over|under|out|-|un|re|en).{4}/)) { var attempt = w.replace(/^(over|under|out|.*?-|un|re|en)/, '') return parts_of_speech[lexicon[attempt]] } } var rules_pass = function (w) { for (var i = 0; i < word_rules.length; i++) { if (w.length > 4 && w.match(word_rules[i].reg)) { return parts_of_speech[word_rules[i].pos] } } } var fourth_pass = function (token, i, sentence) { var last = sentence.tokens[i - 1] var next = sentence.tokens[i + 1] var strong_determiners = { "the": 1, "a": 1, "an": 1 } //resolve ambiguous 'march','april','may' with dates if ((token.normalised == "march" || token.normalised == "april" || token.normalised == "may") && ((next && next.pos.tag == "CD") || (last && last.pos.tag == "CD"))) { token.pos = parts_of_speech['CD'] token.pos_reason = "may_is_date" } //if it's before a modal verb, it's a noun -> lkjsdf would if (next && token.pos.parent !== "noun" && token.pos.parent !== "glue" && next.pos.tag === "MD") { token.pos = parts_of_speech['NN'] token.pos_reason = "before_modal" } //if it's after the word 'will' its probably a verb/adverb if (last && last.normalised == "will" && !last.punctuated && token.pos.parent == "noun" && token.pos.tag !== "PRP" && token.pos.tag !== "PP") { token.pos = parts_of_speech['VB'] token.pos_reason = "after_will" } //if it's after the word 'i' its probably a verb/adverb if (last && last.normalised == "i" && !last.punctuated && token.pos.parent == "noun") { token.pos = parts_of_speech['VB'] token.pos_reason = "after_i" } //if it's after an adverb, it's not a noun -> quickly acked //support form 'atleast he is..' if (last && token.pos.parent === "noun" && token.pos.tag !== "PRP" && token.pos.tag !== "PP" && last.pos.tag === "RB" && !last.start) { token.pos = parts_of_speech['VB'] token.pos_reason = "after_adverb" } //no consecutive, unpunctuated adjectives -> real good if (next && token.pos.parent === "adjective" && next.pos.parent === "adjective" && !token.punctuated) { token.pos = parts_of_speech['RB'] token.pos_reason = "consecutive_adjectives" } //if it's after a determiner, it's not a verb -> the walk if (last && token.pos.parent === "verb" && strong_determiners[last.pos.normalised] && token.pos.tag != "CP") { token.pos = parts_of_speech['NN'] token.pos_reason = "determiner-verb" } //copulas are followed by a determiner ("are a .."), or an adjective ("are good") if (last && last.pos.tag === "CP" && token.pos.tag !== "DT" && token.pos.tag !== "RB" && token.pos.tag !== "PRP" && token.pos.parent !== "adjective" && token.pos.parent !== "value") { token.pos = parts_of_speech['JJ'] token.pos_reason = "copula-adjective" } //copula, adverb, verb -> copula adverb adjective -> is very lkjsdf if (last && next && last.pos.tag === "CP" && token.pos.tag === "RB" && next.pos.parent === "verb") { sentence.tokens[i + 1].pos = parts_of_speech['JJ'] sentence.tokens[i + 1].pos_reason = "copula-adverb-adjective" } // the city [verb] him. if (next && next.pos.tag == "PRP" && token.pos.tag !== "PP" && token.pos.parent == "noun" && !token.punctuated) { token.pos = parts_of_speech['VB'] token.pos_reason = "before_[him|her|it]" } //the misled worker -> misled is an adjective, not vb if (last && next && last.pos.tag === "DT" && next.pos.parent === "noun" && token.pos.parent === "verb") { token.pos = parts_of_speech['JJ'] token.pos_reason = "determiner-adjective-noun" } //where's he gone -> gone=VB, not JJ if (last && last.pos.tag === "PRP" && token.pos.tag === "JJ") { token.pos = parts_of_speech['VB'] token.pos_reason = "adjective-after-pronoun" } return token } //add a 'quiet' token for contractions so we can represent their grammar var handle_contractions = function (arr) { var contractions = { "i'd": ["i", "would"], "she'd": ["she", "would"], "he'd": ["he", "would"], "they'd": ["they", "would"], "we'd": ["we", "would"], "i'll": ["i", "will"], "she'll": ["she", "will"], "he'll": ["he", "will"], "they'll": ["they", "will"], "we'll": ["we", "will"], "i've": ["i", "have"], "they've": ["they", "have"], "we've": ["we", "have"], "should've": ["should", "have"], "would've": ["would", "have"], "could've": ["could", "have"], "must've": ["must", "have"], "i'm": ["i", "am"], "we're": ["we", "are"], "they're": ["they", "are"], "cannot": ["can", "not"] } var before, after, fix; for (var i = 0; i < arr.length; i++) { if (contractions.hasOwnProperty(arr[i].normalised)) { before = arr.slice(0, i) after = arr.slice(i + 1, arr.length) fix = [{ text: arr[i].text, normalised: contractions[arr[i].normalised][0], start: arr[i].start }, { text: "", normalised: contractions[arr[i].normalised][1], start: undefined }] arr = before.concat(fix) arr = arr.concat(after) return handle_contractions(arr) //recursive } } return arr } //these contractions require (some) grammatical knowledge to disambig properly (e.g "he's"=> ['he is', 'he was'] var handle_ambiguous_contractions = function (arr) { var ambiguous_contractions = { "he's": "he", "she's": "she", "it's": "it", "who's": "who", "what's": "what", "where's": "where", "when's": "when", "why's": "why", "how's": "how" } var before, after, fix; for (var i = 0; i < arr.length; i++) { if (ambiguous_contractions.hasOwnProperty(arr[i].normalised)) { before = arr.slice(0, i) after = arr.slice(i + 1, arr.length) //choose which verb this contraction should have.. var chosen = "is" //look for the next verb, and if it's past-tense (he's walked -> he has walked) for (var o = i + 1; o < arr.length; o++) { if (arr[o] && arr[o].pos && arr[o].pos.tag == "VBD") { //past tense chosen = "has" break } } fix = [{ text: arr[i].text, normalised: ambiguous_contractions[arr[i].normalised], //the "he" part start: arr[i].start, pos: parts_of_speech[lexicon[ambiguous_contractions[arr[i].normalised]]], pos_reason: "ambiguous_contraction" }, { text: "", normalised: chosen, //is,was,or have start: undefined, pos: parts_of_speech[lexicon[chosen]], pos_reason: "silent_contraction" }] arr = before.concat(fix) arr = arr.concat(after) return handle_ambiguous_contractions(arr) //recursive } } return arr } //////////////// ///party-time// var main = function (text, options) { options = options || {} if (!text || !text.match(/[a-z0-9]/i)) { return new Section([]) } var sentences = tokenize(text); sentences.forEach(function (sentence) { //first, let's handle the capitalisation-of-the-first-word issue var first = sentence.tokens[0] if (first) { //if second word is a noun-capital, give more sympathy to this capital if (sentence.tokens[1] && sentence.tokens[1].noun_capital && !lexicon_pass(first.normalised)) { sentence.tokens[0].noun_capital = true; } } //smart handling of contractions sentence.tokens = handle_contractions(sentence.tokens) //first pass, word-level clues sentence.tokens = sentence.tokens.map(function (token) { //it has a capital and isn't a month, etc. if (token.noun_capital && !values[token.normalised]) { token.pos = parts_of_speech['NN'] token.pos_reason = "noun_capitalised" return token } //known words list var lex = lexicon_pass(token.normalised) if (lex) { token.pos = lex; token.pos_reason = "lexicon" //if it's an abbreviation, forgive the punctuation (eg. 'dr.') if (token.pos.tag === "NNAB") { token.punctuated = false } return token } //handle punctuation like ' -- ' if (!token.normalised) { token.pos = parts_of_speech['UH'] token.pos_reason = "wordless_string" return token } // suffix pos signals from wordnet var len = token.normalised.length if (len > 4) { var suffix = token.normalised.substr(len - 4, len - 1) if (wordnet_suffixes.hasOwnProperty(suffix)) { token.pos = parts_of_speech[wordnet_suffixes[suffix]] token.pos_reason = "wordnet suffix" return token } } // suffix regexes for words var r = rules_pass(token.normalised); if (r) { token.pos = r; token.pos_reason = "regex suffix" return token } //see if it's a number if (parseFloat(token.normalised)) { token.pos = parts_of_speech['CD'] token.pos_reason = "parsefloat" return token } return token }) //second pass, wrangle results a bit sentence.tokens = sentence.tokens.map(function (token, i) { //set ambiguous 'ed' endings as either verb/adjective if (token.pos_reason !== "lexicon" && token.normalised.match(/.ed$/)) { token.pos = parts_of_speech['VB'] token.pos_reason = "ed" } return token }) //split-out more difficult contractions, like "he's"->["he is", "he was"] // (now that we have enough pos data to do this) sentence.tokens = handle_ambiguous_contractions(sentence.tokens) //third pass, seek verb or noun phrases after their signals var need = null var reason = '' sentence.tokens = sentence.tokens.map(function (token, i) { var next = sentence.tokens[i + 1] if (token.pos) { //suggest noun after some determiners (a|the), posessive pronouns (her|my|its) if (token.normalised == "the" || token.normalised == "a" || token.normalised == "an" || token.pos.tag === "PP") { need = 'noun' reason = token.pos.name return token //proceed } //suggest verb after personal pronouns (he|she|they), modal verbs (would|could|should) if (token.pos.tag === "PRP" && token.pos.tag !== "PP" || token.pos.tag === "MD") { need = 'verb' reason = token.pos.name return token //proceed } } //satisfy need on a conflict, and fix a likely error if (token.pos) { if (need == "verb" && token.pos.parent == "noun" && (!next || (next.pos && next.pos.parent != "noun"))) { if (!next || !next.pos || next.pos.parent != need) { //ensure need not satisfied on the next one token.pos = parts_of_speech['VB'] token.pos_reason = "signal from " + reason need = null } } if (need == "noun" && token.pos.parent == "verb" && (!next || (next.pos && next.pos.parent != "verb"))) { if (!next || !next.pos || next.pos.parent != need) { //ensure need not satisfied on the next one token.pos = parts_of_speech["NN"] token.pos_reason = "signal from " + reason need = null } } } //satisfy need with an unknown pos if (need && !token.pos) { if (!next || !next.pos || next.pos.parent != need) { //ensure need not satisfied on the next one token.pos = parts_of_speech[need] token.pos_reason = "signal from " + reason need = null } } //set them back as satisfied.. if (need === 'verb' && token.pos && token.pos.parent === 'verb') { need = null } if (need === 'noun' && token.pos && token.pos.parent === 'noun') { need = null } return token }) //third pass, identify missing clauses, fallback to noun var has = {} sentence.tokens.forEach(function (token) { if (token.pos) { has[token.pos.parent] = true } }) sentence.tokens = sentence.tokens.map(function (token, i) { if (!token.pos) { //if there is no verb in the sentence, and there needs to be. if (has['adjective'] && has['noun'] && !has['verb']) { token.pos = parts_of_speech['VB'] token.pos_reason = "need one verb" has['verb'] = true return token } //fallback to a noun token.pos = parts_of_speech['NN'] token.pos_reason = "noun fallback" } return token }) //fourth pass, error correction sentence.tokens = sentence.tokens.map(function (token, i) { return fourth_pass(token, i, sentence) }) //run the fourth-pass again! sentence.tokens = sentence.tokens.map(function (token, i) { return fourth_pass(token, i, sentence) }) }) //combine neighbours if (!options.dont_combine) { sentences = sentences.map(function (s) { return combine_tags(s) }) sentences = sentences.map(function (s) { return combine_phrasal_verbs(s) }) } //make them Sentence objects sentences = sentences.map(function (s) { var sentence = new Sentence(s.tokens) sentence.type = s.type return sentence }) //add analysis on each token sentences = sentences.map(function (s) { s.tokens = s.tokens.map(function (token, i) { token.analysis = parents[token.pos.parent](token.normalised, s, i) return token }) return s }) //add next-last references sentences = sentences.map(function (sentence, i) { sentence.last = sentences[i - 1] sentence.next = sentences[i + 1] return sentence }) //return a Section object, with its methods return new Section(sentences) } module.exports = main; // console.log( pos("Geroge Clooney walked, quietly into a bank. It was cold.") ) // console.log( pos("it is a three-hundred and one").tags() ) // console.log( pos("funny funny funny funny").sentences[0].tokens ) // pos("In March 2009, while Secretary of State for Energy and Climate Change, Miliband attended the UK premiere of climate-change film The Age of Stupid, where he was ambushed").sentences[0].tokens.map(function(t){console.log(t.pos.tag + " "+t.text)}) // pos("the Energy and Climate Change, Miliband").sentences[0].tokens.map(function(t){console.log(t.pos.tag + " "+t.text)}) // console.log(pos("Energy and Climate Change, Miliband").sentences[0].tokens) // console.log(pos("http://google.com").sentences[0].tokens) // console.log(pos("may live").tags()) // console.log(pos("may 7th live").tags()) // console.log(pos("She and Marc Emery married on July 23, 2006.").tags()) // console.log(pos("Toronto is fun. Spencer and heather quickly walked. it was cool").sentences[0].referables()) // console.log(pos("a hundred").sentences[0].tokens) // console.log(pos("Tony Reagan skates").sentences[0].tokens) // console.log(pos("She and Marc Emery married on July 23, 2006").sentences[0].tokens) // console.log(pos("Tony Hawk walked quickly to the store.").sentences[0].tokens) // console.log(pos("jahn j. jacobheimer").sentences[0].tokens[0].analysis.is_person()) // pos("Dr. Conrad Murray recieved a guilty verdict").sentences[0].tokens.map(function(t){console.log(t.pos.tag + " "+t.text)}) // pos("the Phantom of the Opera").sentences[0].tokens.map(function(t){console.log(t.pos.tag + " "+t.text)}) // pos("Tony Hawk is nice").sentences[0].tokens.map(function(t){console.log(t.pos.tag + " "+t.text)}) // pos("tony hawk is nice").sentences[0].tokens.map(function(t){console.log(t.pos.tag + " "+t.text)}) // console.log(pos("look after a kid").sentences[0].tags()) // pos("Sather tried to stop the deal, but when he found out that Gretzky").sentences[0].tokens.map(function(t){console.log(t.pos.tag + " "+t.text+" "+t.pos_reason)}) // pos("Gretzky had tried skating").sentences[0].tokens.map(function(t){console.log(t.pos.tag + " "+t.text+" "+t.pos_reason)}) // pos("Sally and Tom fight a lot. She thinks he is her friend.").sentences[0].tokens.map(function(t){console.log(t.pos.tag + " "+t.text+" "+t.pos_reason)}) // console.log(pos("i think Tony Danza is cool. He rocks and he is golden.").sentences[0].tokens[2].analysis.referenced_by()) // console.log(pos("i think Tony Danza is cool and he is golden.").sentences[0].tokens[6].analysis.reference_to()) // console.log(pos("Tina grabbed her shoes. She is lovely.").sentences[0].tokens[0].analysis.referenced_by()) // console.log(pos("Sally and Tom fight a lot. She thinks he is her friend.").sentences[0].tokens[0].analysis.referenced_by()) // console.log(pos("it's gotten the best features").sentences[0].tokens[1].normalised=="has") //bug // console.log(pos("he's fun").sentences[0].tokens[1].normalised=="is") },{"./data/lexicon":2,"./data/lexicon/values":12,"./data/parts_of_speech":14,"./data/unambiguous_suffixes":15,"./data/word_rules":16,"./methods/tokenization/tokenize":22,"./parents/parents":35,"./section":46,"./sentence":47}],46:[function(require,module,exports){ //a section is a block of text, with an arbitrary number of sentences //these methods are just wrappers around the ones in sentence.js var Section = function(sentences) { var the = this the.sentences = sentences || []; the.text = function() { return the.sentences.map(function(s) { return s.text() }).join(' ') } the.tense = function() { return the.sentences.map(function(s) { return s.tense() }) } //pluck out wanted data from sentences the.nouns = function() { return the.sentences.map(function(s) { return s.nouns() }).reduce(function(arr, a) { return arr.concat(a) }, []) } the.entities = function(options) { return the.sentences.map(function(s) { return s.entities(options) }).reduce(function(arr, a) { return arr.concat(a) }, []) } the.people = function() { return the.sentences.map(function(s) { return s.people() }).reduce(function(arr, a) { return arr.concat(a) }, []) } the.adjectives = function() { return the.sentences.map(function(s) { return s.adjectives() }).reduce(function(arr, a) { return arr.concat(a) }, []) } the.verbs = function() { return the.sentences.map(function(s) { return s.verbs() }).reduce(function(arr, a) { return arr.concat(a) }, []) } the.adverbs = function() { return the.sentences.map(function(s) { return s.adverbs() }).reduce(function(arr, a) { return arr.concat(a) }, []) } the.values = function() { return the.sentences.map(function(s) { return s.values() }).reduce(function(arr, a) { return arr.concat(a) }, []) } the.tags = function() { return the.sentences.map(function(s) { return s.tags() }) } //transform the sentences the.negate = function() { the.sentences = the.sentences.map(function(s) { return s.negate() }) return the } the.to_past = function() { the.sentences = the.sentences.map(function(s) { return s.to_past() }) return the } the.to_present = function() { the.sentences = the.sentences.map(function(s) { return s.to_present() }) return the } the.to_future = function() { the.sentences = the.sentences.map(function(s) { return s.to_future() }) return the } } module.exports = Section; },{}],47:[function(require,module,exports){ // methods that hang on a parsed set of words // accepts parsed tokens var Sentence = function(tokens) { var the = this the.tokens = tokens || []; var capitalise = function(s) { return s.charAt(0).toUpperCase() + s.slice(1); } the.tense = function() { var verbs = the.tokens.filter(function(token) { return token.pos.parent === "verb" }) return verbs.map(function(v) { return v.analysis.tense }) } the.to_past = function() { the.tokens = the.tokens.map(function(token) { if (token.pos.parent === "verb") { token.text = token.analysis.to_past() token.normalised = token.text } return token }) return the } the.to_present = function() { the.tokens = the.tokens.map(function(token) { if (token.pos.parent === "verb") { token.text = token.analysis.to_present() token.normalised = token.text } return token }) return the } the.to_future = function() { the.tokens = the.tokens.map(function(token) { if (token.pos.parent === "verb") { token.text = token.analysis.to_future() token.normalised = token.text } return token }) return the } the.insert = function(token, i) { if (i && token) { the.tokens.splice(i, 0, token); } } //negate makes the sentence mean the opposite thing. the.negate = function() { //these are cheap ways to negate the meaning // ('none' is ambiguous because it could mean (all or some) ) var logic_negate = { //some logical ones work "everyone": "no one", "everybody": "nobody", "someone": "no one", "somebody": "nobody", // everything:"nothing", "always": "never", //copulas "is": "isn't", "are": "aren't", "was": "wasn't", "will": "won't", //modals "didn't": "did", "wouldn't": "would", "couldn't": "could", "shouldn't": "should", "can't": "can", "won't": "will", "mustn't": "must", "shan't": "shall", "shant": "shall", "did": "didn't", "would": "wouldn't", "could": "couldn't", "should": "shouldn't", "can": "can't", "must": "mustn't" } //loop through each term.. for (var i = 0; i < the.tokens.length; i++) { var tok = the.tokens[i] //turn 'is' into 'isn't', etc - make sure 'is' isnt followed by a 'not', too if (logic_negate[tok.normalised] && (!the.tokens[i + 1] || the.tokens[i + 1].normalised != "not")) { tok.text = logic_negate[tok.normalised] tok.normalised = logic_negate[tok.normalised] if (tok.capitalised) { tok.text = capitalise(tok.text) } return the } // find the first verb.. if (tok.pos.parent == "verb") { // if verb is already negative, make it not negative if (tok.analysis.negative()) { if (the.tokens[i + 1] && the.tokens[i + 1].normalised == "not") { the.tokens.splice(i + 1, 1) } return the } //turn future-tense 'will go' into "won't go" if (tok.normalised.match(/^will /i)) { tok.text = tok.text.replace(/^will /i, "won't ") tok.normalised = tok.text if (tok.capitalised) { tok.text = capitalise(tok.text) } return the } // - INFINITIVE- // 'i walk' -> "i don't walk" if (tok.analysis.form == "infinitive" && tok.analysis.form != "future") { tok.text = "don't " + (tok.analysis.conjugate().infinitive || tok.text) tok.normalised = tok.text.toLowerCase() return the } // - GERUND- // if verb is gerund, 'walking' -> "not walking" if (tok.analysis.form == "gerund") { tok.text = "not " + tok.text tok.normalised = tok.text.toLowerCase() return the } // - PAST- // if verb is past-tense, 'he walked' -> "he did't walk" if (tok.analysis.tense == "past") { tok.text = "didn't " + (tok.analysis.conjugate().infinitive || tok.text) tok.normalised = tok.text.toLowerCase() return the } // - PRESENT- // if verb is present-tense, 'he walks' -> "he doesn't walk" if (tok.analysis.tense == "present") { tok.text = "doesn't " + (tok.analysis.conjugate().infinitive || tok.text) tok.normalised = tok.text.toLowerCase() return the } // - FUTURE- // if verb is future-tense, 'will go' -> won't go. easy-peasy if (tok.analysis.tense == "future") { if (tok.normalised == "will") { tok.normalised = "won't" tok.text = "won't" } else { tok.text = tok.text.replace(/^will /i, "won't ") tok.normalised = tok.normalised.replace(/^will /i, "won't ") } if (tok.capitalised) { tok.text = capitalise(tok.text); } return the } return the } } return the } the.entities = function(options) { var spots = [] options = options || {} the.tokens.forEach(function(token) { if (token.pos.parent === "noun" && token.analysis.is_entity()) { spots.push(token) } }) if (options.ignore_gerund) { spots = spots.filter(function(t) { return t.pos.tag !== "VBG" }) } return spots } //noun-entities that look like person names.. the.people = function(){ return the.entities({}).filter(function(o){ return o.analysis.is_person() }) } the.text = function() { return the.tokens.map(function(s) { return s.text }).join(' ') } //sugar 'grab' methods the.verbs = function() { return the.tokens.filter(function(t) { return t.pos.parent == "verb" }) } the.adverbs = function() { return the.tokens.filter(function(t) { return t.pos.parent == "adverb" }) } the.nouns = function() { return the.tokens.filter(function(t) { return t.pos.parent == "noun" }) } the.adjectives = function() { return the.tokens.filter(function(t) { return t.pos.parent == "adjective" }) } the.values = function() { return the.tokens.filter(function(t) { return t.pos.parent == "value" }) } the.tags = function() { return the.tokens.map(function(t) { return t.pos.tag }) } //find the 'it', 'he', 'she', and 'they' of this sentence //these are the words that get 'exported' to be used in other sentences the.referables=function(){ var pronouns={ he:undefined, she:undefined, they:undefined, it:undefined } the.tokens.forEach(function(t){ if(t.pos.parent=="noun" && t.pos.tag!="PRP"){ pronouns[t.analysis.pronoun()]=t } }) return pronouns } return the } module.exports = Sentence; },{}],48:[function(require,module,exports){ //just a wrapper for text -> entities //most of this logic is in ./parents/noun var pos = require("./pos"); var main = function (text, options) { options = options || {} //collect 'entities' from all nouns var sentences = pos(text, options).sentences var arr = sentences.reduce(function (arr, s) { return arr.concat(s.entities(options)) }, []) //for people, remove instances of 'george', and 'bush' after 'george bush'. var ignore = {} arr = arr.filter(function (o) { //add tokens to blacklist if (o.analysis.is_person()) { o.normalised.split(' ').forEach(function (s) { ignore[s] = true }) } if (ignore[o.normalised]) { return false } return true }) return arr } module.exports = main; // console.log(spot("Tony Hawk is cool. Tony eats all day.").map(function(s){return s})) // console.log(spot("Tony eats all day. Tony Hawk is cool.").map(function(s){return s})) // console.log(spot("My Hawk is cool").map(function(s){return s.normalised})) },{"./pos":45}]},{},[1]);