• Jump To … +
    abbreviations.js adjectives.js convertables.js dates.js demonyms.js firstnames.js honourifics.js irregular_nouns.js irregular_verbs.js misc.js multiples.js numbers.js organisations.js phrasal_verbs.js places.js uncountables.js verbs.js fns.js index.js lexicon.js negate.js passive_voice.js contractions.js fancy_lumping.js grammar_rules.js parts_of_speech.js phrasal_verbs.js tagger.js word_rules.js question.js sentence.js statement.js tense.js adjective.js to_adverb.js to_comparative.js to_noun.js to_superlative.js adverb.js to_adjective.js is_acronym.js article.js date.js date_rules.js is_date.js parse_date.js is_plural.js is_uncountable.js noun.js is_organisation.js organisation.js gender.js is_person.js parse_name.js person.js is_place.js place.js pluralize.js pronoun.js singularize.js is_value.js numbers.js to_number.js units.js value.js term.js conjugate.js from_infinitive.js predict_form.js suffix_rules.js to_actor.js to_infinitive.js negate.js verb.js sentence_parser.js text.js
  • fancy_lumping.js

  • ¶

    fancy combining/chunking of terms

    'use strict';
    const pos = require('./parts_of_speech');
    
    const shouldLumpThree = function(a, b, c) {
      if (!a || !b || !c) {
        return false;
      }
      const lump_rules = [
        {
          condition: (a.pos.Noun && b.text === '&' && c.pos.Noun), //John & Joe's
          result: 'Person',
        },
        {
          condition: (a.pos.Noun && b.text === 'N' && c.pos.Noun), //John N Joe's
          result: 'Person',
        },
        {
          condition: (a.pos.Date && b.normal === 'the' && c.pos.Value), //June the 5th
          result: 'Person',
        },
        {
          condition: (a.is_capital() && b.normal === 'of' && c.is_capital()), //President of Mexico
          result: 'Noun',
        },
        {
          condition: (a.text.match(/^["']/) && !b.text.match(/["']/) && c.text.match(/["']$/)), //three-word quote
          result: 'Noun',
        },
        {
          condition: (a.normal === 'will' && b.normal === 'have' && b.pos.Verb), //will have walk
          result: 'FutureTense',
        },
      ];
      for(let i = 0; i < lump_rules.length; i++) {
        if (lump_rules[i].condition) {
          return lump_rules[i].result;
        }
      }
      return false;
    };
    
    const shouldLumpTwo = function(a, b) {
      if (!a || !b) {
        return false;
      }
      const lump_rules = [
        {
          condition: (a.pos.Person && b.pos.Honourific || a.pos.Honourific && b.pos.Person), //"John sr."
          result: 'Person',
        },
        {
          condition: (a.pos.Honourific && b.is_capital()), //'Dr. John
          result: 'Person',
        },
        {
          condition: (a.pos.Person && b.is_capital()), //'Person, Capital -> Person'
          result: 'Person',
        },
        {
          condition: (a.pos.Date && b.pos.Value), //June 4
          result: 'Date',
        },
        {
          condition: (a.pos.Value && b.pos.Noun), //5 books
          result: 'Value',
        },
        {
          condition: (a.is_capital() && b.pos['Organisation'] || b.is_capital() && a.pos['Organisation']), //Canada Inc
          result: 'Organisation',
        },
        {
          condition: (a.text.match(/^["']/) && b.text.match(/["']$/)), //two-word quote
          result: 'Noun',
        },
        {
          condition: (a.normal === 'will' && b.pos.Verb), //will walk (perfect)
          result: 'PerfectTense',
        },
        {
          condition: (a.normal.match(/^will ha(ve|d)$/) && b.pos.Verb), //will have walked (pluperfect)
          result: 'PluperfectTense',
        },
      ];
      for(let i = 0; i < lump_rules.length; i++) {
        if (lump_rules[i].condition) {
          return lump_rules[i].result;
        }
      }
      return false;
    };
    
    
    const fancy_lumping = function(terms) {
      for(let i = 1; i < terms.length; i++) {
        let a = terms[i - 1];
        let b = terms[i];
        let c = terms[i + 1];
  • ¶

    rules for lumping two terms

        let tag = shouldLumpTwo(a, b);
        if (tag !== false) {
          let Cl = pos.classMapping[tag] || pos.Term;
          terms[i] = new Cl(a.text + ' ' + b.text, tag);
          terms[i].reason = 'lumped(' + terms[i].reason + ')';
          terms[i - 1] = null;
          continue;
        }
  • ¶

    rules for lumpting three terms

        if (c) {
          tag = shouldLumpThree(a, b, c);
          if (tag !== false) {
            let Cl = pos.classMapping[tag] || pos.Term;
            terms[i - 1] = new Cl([a.text, b.text, c.text].join(' '), tag);
            terms[i - 1].reason = 'lumped(' + terms[i].reason + ')';
            terms[i] = null;
            terms[i + 1] = null;
            continue;
          }
        }
    
      }
  • ¶

    remove killed terms

      terms = terms.filter(function(t) {
        return t !== null;
      });
      return terms;
    };
    
    
    module.exports = fancy_lumping;