• Jump To … +
    abbreviations.js adjectives.js convertables.js dates.js demonyms.js firstnames.js honourifics.js irregular_nouns.js irregular_verbs.js misc.js multiples.js numbers.js organisations.js phrasal_verbs.js places.js uncountables.js verbs.js fns.js index.js lexicon.js negate.js passive_voice.js contractions.js fancy_lumping.js grammar_rules.js parts_of_speech.js phrasal_verbs.js tagger.js word_rules.js question.js sentence.js statement.js tense.js adjective.js to_adverb.js to_comparative.js to_noun.js to_superlative.js adverb.js to_adjective.js is_acronym.js article.js date.js date_rules.js is_date.js parse_date.js is_plural.js is_uncountable.js noun.js is_organisation.js organisation.js gender.js is_person.js parse_name.js person.js is_place.js place.js pluralize.js pronoun.js singularize.js is_value.js numbers.js to_number.js units.js value.js term.js conjugate.js from_infinitive.js predict_form.js suffix_rules.js to_actor.js to_infinitive.js negate.js verb.js sentence_parser.js text.js
  • word_rules.js

  • ¶
    const tag_mapping = require('./parts_of_speech.js').tag_mapping;
  • ¶

    regex patterns and parts of speech],

    module.exports = [
      ['.[cts]hy$', 'JJ'],
      ['.[st]ty$', 'JJ'],
      ['.[lnr]ize$', 'VB'],
      ['.[gk]y$', 'JJ'],
      ['.fies$', 'VB'],
      ['.some$', 'JJ'],
      ['.[nrtumcd]al$', 'JJ'],
      ['.que$', 'JJ'],
      ['.[tnl]ary$', 'JJ'],
      ['.[di]est$', 'JJS'],
      ['^(un|de|re)\\-[a-z]..', 'VB'],
      ['.lar$', 'JJ'],
      ['[bszmp]{2}y', 'JJ'],
      ['.zes$', 'VB'],
      ['.[icldtgrv]ent$', 'JJ'],
      ['.[rln]ates$', 'VBZ'],
      ['.[oe]ry$', 'JJ'],
      ['[rdntkdhs]ly$', 'RB'],
      ['.[lsrnpb]ian$', 'JJ'],
      ['.[^aeiou]ial$', 'JJ'],
      ['.[^aeiou]eal$', 'JJ'],
      ['.[vrl]id$', 'JJ'],
      ['.[ilk]er$', 'JJR'],
      ['.ike$', 'JJ'],
      ['.ends$', 'VB'],
      ['.wards$', 'RB'],
      ['.rmy$', 'JJ'],
      ['.rol$', 'NN'],
      ['.tors$', 'NN'],
      ['.azy$', 'JJ'],
      ['.where$', 'RB'],
      ['.ify$', 'VB'],
      ['.bound$', 'JJ'],
      ['.ens$', 'VB'],
      ['.oid$', 'JJ'],
      ['.vice$', 'NN'],
      ['.rough$', 'JJ'],
      ['.mum$', 'JJ'],
      ['.teen(th)?$', 'CD'],
      ['.oses$', 'VB'],
      ['.ishes$', 'VB'],
      ['.ects$', 'VB'],
      ['.tieth$', 'CD'],
      ['.ices$', 'NN'],
      ['.bles$', 'VB'],
      ['.pose$', 'VB'],
      ['.ions$', 'NN'],
      ['.ean$', 'JJ'],
      ['.[ia]sed$', 'JJ'],
      ['.tized$', 'VB'],
      ['.llen$', 'JJ'],
      ['.fore$', 'RB'],
      ['.ances$', 'NN'],
      ['.gate$', 'VB'],
      ['.nes$', 'VB'],
      ['.less$', 'RB'],
      ['.ried$', 'JJ'],
      ['.gone$', 'JJ'],
      ['.made$', 'JJ'],
      ['.[pdltrkvyns]ing$', 'JJ'],
      ['.tions$', 'NN'],
      ['.tures$', 'NN'],
      ['.ous$', 'JJ'],
      ['.ports$', 'NN'],
      ['. so$', 'RB'],
      ['.ints$', 'NN'],
      ['.[gt]led$', 'JJ'],
      ['[aeiou].*ist$', 'JJ'],
      ['.lked$', 'VB'],
      ['.fully$', 'RB'],
      ['.*ould$', 'MD'],
      ['^-?[0-9]+(.[0-9]+)?$', 'CD'],
      ['[a-z]*\\-[a-z]*\\-', 'JJ'],
      ['[a-z]\'s$', 'NNO'],
      ['.\'n$', 'VB'],
      ['.\'re$', 'CP'],
      ['.\'ll$', 'MD'],
      ['.\'t$', 'VB'],
      ['.tches$', 'VB'],
      ['^https?\:?\/\/[a-z0-9]', 'NN'], //the colon is removed in normalisation
      ['^www\.[a-z0-9]', 'NN'],
      ['.ize$', 'VB'],
      ['.[^aeiou]ise$', 'VB'],
      ['.[aeiou]te$', 'VB'],
      ['.ea$', 'NN'],
      ['[aeiou][pns]er$', 'NN'],
      ['.ia$', 'NN'],
      ['.sis$', 'NN'],
      ['.[aeiou]na$', 'NN'],
      ['.[^aeiou]ity$', 'NN'],
      ['.[^aeiou]ium$', 'NN'],
      ['.[^aeiou][ei]al$', 'JJ'],
      ['.ffy$', 'JJ'],
      ['.[^aeiou]ic$', 'JJ'],
      ['.(gg|bb|zz)ly$', 'JJ'],
      ['.[aeiou]my$', 'JJ'],
      ['.[aeiou]ble$', 'JJ'],
      ['.[^aeiou]ful$', 'JJ'],
      ['.[^aeiou]ish$', 'JJ'],
      ['.[^aeiou]ica$', 'NN'],
      ['[aeiou][^aeiou]is$', 'NN'],
      ['[^aeiou]ard$', 'NN'],
      ['[^aeiou]ism$', 'NN'],
      ['.[^aeiou]ity$', 'NN'],
      ['.[^aeiou]ium$', 'NN'],
      ['.[lstrn]us$', 'NN'],
      ['..ic$', 'JJ'],
      ['[aeiou][^aeiou]id$', 'JJ'],
      ['.[^aeiou]ish$', 'JJ'],
      ['.[^aeiou]ive$', 'JJ'],
      ['[ea]{2}zy$', 'JJ'],
    ].map(function(a) {
      return {
        reg: new RegExp(a[0], 'i'),
        pos: tag_mapping[a[1]]
      };
    });