• Jump To … +
    abbreviations.js adjectives.js convertables.js dates.js demonyms.js firstnames.js honourifics.js irregular_nouns.js irregular_verbs.js misc.js multiples.js numbers.js organisations.js phrasal_verbs.js places.js uncountables.js verbs.js fns.js index.js lexicon.js negate.js passive_voice.js contractions.js fancy_lumping.js grammar_rules.js parts_of_speech.js phrasal_verbs.js tagger.js word_rules.js question.js sentence.js statement.js tense.js adjective.js to_adverb.js to_comparative.js to_noun.js to_superlative.js adverb.js to_adjective.js is_acronym.js article.js date.js date_rules.js is_date.js parse_date.js is_plural.js is_uncountable.js noun.js is_organisation.js organisation.js gender.js is_person.js parse_name.js person.js is_place.js place.js pluralize.js pronoun.js singularize.js is_value.js numbers.js to_number.js units.js value.js term.js conjugate.js from_infinitive.js predict_form.js suffix_rules.js to_actor.js to_infinitive.js negate.js verb.js sentence_parser.js text.js
  • firstnames.js

  • ¶

    common first-names in compressed form. from http://www.ssa.gov/oact/babynames/limits.html and http://www.servicealberta.gov.ab.ca/pdf/vs/2001_Boys.pdf not sure what regional/cultural/demographic bias this has. Probably a lot. 73% of people are represented in the top 1000 names used to reduce redundant named-entities in longer text. (don’t spot the same person twice.) used to identify gender for coreference resolution

    'use strict';
    const main = [];
  • ¶

    an ad-hoc prefix encoding for names. 2ms decompression of names

    const male_names = {
      'will': 'iam,ie,ard,is,iams',
      'fred': ',erick,die,rick,dy',
      'marc': 'us,,o,os,el',
      'darr': 'ell,yl,en,el,in',
      'fran': 'k,cis,cisco,klin,kie',
      'terr': 'y,ance,ence,ell',
      'rand': 'y,all,olph,al',
      'brad': 'ley,,ford,y',
      'jeff': 'rey,,ery,ry',
      'john': ',ny,nie,athan',
      'greg': 'ory,,g,orio',
      'mar': 'k,tin,vin,io,shall,ty,lon,lin',
      'car': 'l,los,lton,roll,y,ey',
      'ken': 'neth,,t,ny,dall,drick',
      'har': 'old,ry,vey,ley,lan,rison',
      'ste': 'ven,phen,ve,wart,phan,rling',
      'jer': 'ry,emy,ome,emiah,maine,ald',
      'mic': 'hael,heal,ah,key,hel',
      'dar': 'yl,in,nell,win,ius',
      'dan': 'iel,ny,,e',
      'wil': 'bur,son,bert,fred,fredo',
      'ric': 'hard,ky,ardo,k,key',
      'cli': 'fford,nton,fton,nt,ff',
      'cla': 'rence,ude,yton,rk,y',
      'ben': 'jamin,,nie,ny,ito',
      'rod': 'ney,erick,olfo,ger,',
      'rob': 'ert,erto,bie,',
      'gar': 'y,ry,rett,land',
      'sam': 'uel,,my,mie',
      'and': 'rew,re,y,res',
      'jos': 'eph,e,hua,h',
      'joe': ',l,y,sph',
      'leo': 'nard,n,,nardo',
      'tom': ',my,as,mie',
      'bry': 'an,ant,ce,on',
      'ant': 'hony,onio,oine,on',
      'jac': 'k,ob,kson',
      'cha': 'rles,d,rlie,se',
      'sha': 'wn,ne,un',
      'bre': 'nt,tt,ndan,t',
      'jes': 'se,us,s',
      'al': 'bert,an,len,fred,exander,ex,vin,lan,fredo,berto,ejandro,fonso,ton,,onzo,i,varo',
      'ro': 'nald,ger,y,nnie,land,n,ss,osevelt,gelio,lando,man,cky,yce,scoe,ry',
      'de': 'nnis,rek,an,rrick,lbert,vin,wey,xter,wayne,metrius,nis,smond',
      'ja': 'mes,son,y,red,vier,ke,sper,mal,rrod',
      'el': 'mer,lis,bert,ias,ijah,don,i,ton,liot,liott,vin,wood',
      'ma': 'tthew,nuel,urice,thew,x,tt,lcolm,ck,son',
      'do': 'nald,uglas,n,nnie,ug,minic,yle,mingo,minick',
      'er': 'ic,nest,ik,nesto,ick,vin,nie,win',
      'ra': 'ymond,lph,y,mon,fael,ul,miro,phael',
      'ed': 'ward,win,die,gar,uardo,,mund,mond',
      'co': 'rey,ry,dy,lin,nrad,rnelius',
      'le': 'roy,wis,ster,land,vi',
      'lo': 'uis,nnie,renzo,ren,well,uie,u,gan',
      'da': 'vid,le,ve,mon,llas,mian,mien',
      'jo': 'nathan,n,rge,rdan,nathon,aquin',
      'ru': 'ssell,ben,dolph,dy,fus,ssel,sty',
      'ke': 'vin,ith,lvin,rmit',
      'ar': 'thur,nold,mando,turo,chie,mand',
      're': 'ginald,x,ynaldo,uben,ggie',
      'ge': 'orge,rald,ne,rard,offrey,rardo',
      'la': 'rry,wrence,nce,urence,mar,mont',
      'mo': 'rris,ses,nte,ises,nty',
      'ju': 'an,stin,lio,lian,lius,nior',
      'pe': 'ter,dro,rry,te,rcy',
      'tr': 'avis,oy,evor,ent',
      'he': 'nry,rbert,rman,ctor,ath',
      'no': 'rman,el,ah,lan,rbert',
      'em': 'anuel,il,ilio,mett,manuel',
      'wa': 'lter,yne,rren,llace,de',
      'mi': 'ke,guel,lton,tchell,les',
      'sa': 'lvador,lvatore,ntiago,ul,ntos',
      'ch': 'ristopher,ris,ester,ristian,uck',
      'pa': 'ul,trick,blo,t',
      'st': 'anley,uart,an',
      'hu': 'gh,bert,go,mberto',
      'br': 'ian,uce,andon,ain',
      'vi': 'ctor,ncent,rgil,cente',
      'ca': 'lvin,meron,leb',
      'gu': 'y,illermo,stavo',
      'lu': 'is,ther,ke,cas',
      'gr': 'ant,ady,over,aham',
      'ne': 'il,lson,al,d',
      't': 'homas,imothy,odd,ony,heodore,im,yler,ed,yrone,aylor,erence,immy,oby,eddy,yson',
      's': 'cott,ean,idney,ergio,eth,pencer,herman,ylvester,imon,heldon,cotty,olomon',
      'r': 'yan',
      'n': 'icholas,athan,athaniel,ick,icolas',
      'a': 'dam,aron,drian,ustin,ngelo,braham,mos,bel,gustin,ugust,dolfo',
      'b': 'illy,obby,arry,ernard,ill,ob,yron,lake,ert,oyd,illie,laine,art,uddy,urton',
      'e': 'ugene,arl,verett,nrique,van,arnest,frain,than,steban',
      'h': 'oward,omer,orace,ans,al',
      'p': 'hillip,hilip,reston,hil,ierre',
      'c': 'raig,urtis,lyde,ecil,esar,edric,leveland,urt',
      'j': 'immy,im,immie',
      'g': 'lenn,ordon,len,ilbert,abriel,ilberto',
      'm': 'elvin,yron,erle,urray',
      'k': 'yle,arl,urt,irk,ristopher',
      'o': 'scar,tis,liver,rlando,mar,wen,rville,tto',
      'l': 'loyd,yle,ionel',
      'f': 'loyd,ernando,elix,elipe,orrest,abian,idel',
      'w': 'esley,endell,m,oodrow,inston',
      'd': 'ustin,uane,wayne,wight,rew,ylan',
      'z': 'achary',
      'v': 'ernon,an,ance',
      'i': 'an,van,saac,ra,rving,smael,gnacio,rvin',
      'q': 'uentin,uinton',
      'x': 'avier'
    };
    const female_names = {
      'mari': 'a,e,lyn,an,anne,na,ssa,bel,sa,sol,tza',
      'kris': 'ten,tin,tina,ti,tine,ty,ta,tie',
      'jean': 'ette,ne,nette,nie,ine,nine',
      'chri': 'stine,stina,sty,stie,sta,sti',
      'marg': 'aret,ie,arita,uerite,ret,o',
      'ange': 'la,lica,lina,lia,line',
      'fran': 'ces,cine,cisca',
      'kath': 'leen,erine,y,ryn,arine',
      'sher': 'ry,ri,yl,i,rie',
      'caro': 'l,lyn,line,le,lina',
      'dian': 'e,a,ne,na',
      'jenn': 'ifer,ie,y,a',
      'luci': 'lle,a,nda,le',
      'kell': 'y,i,ey,ie',
      'rosa': ',lie,lind',
      'jani': 'ce,e,s,ne',
      'stac': 'y,ey,ie,i',
      'shel': 'ly,ley,ia',
      'laur': 'a,en,ie,el',
      'trac': 'y,ey,i,ie',
      'jane': 't,,lle,tte',
      'bett': 'y,ie,e,ye',
      'rose': 'mary,marie,tta',
      'joan': ',ne,n,na',
      'mar': 'y,tha,jorie,cia,lene,sha,yann,cella,ta,la,cy,tina',
      'lor': 'i,raine,etta,a,ena,ene,na,ie',
      'sha': 'ron,nnon,ri,wna,nna,na,una',
      'dor': 'othy,is,a,een,thy,othea',
      'cla': 'ra,udia,ire,rice,udette',
      'eli': 'zabeth,sa,sabeth,se,za',
      'kar': 'en,la,a,i,in',
      'tam': 'my,ara,i,mie,ika',
      'ann': 'a,,e,ie,ette',
      'car': 'men,rie,la,a,mela',
      'mel': 'issa,anie,inda',
      'ali': 'ce,cia,son,sha,sa',
      'bri': 'ttany,dget,ttney,dgette',
      'lyn': 'n,da,ne,ette',
      'del': 'ores,la,ia,oris',
      'ter': 'esa,ri,i',
      'son': 'ia,ya,ja,dra',
      'deb': 'orah,ra,bie,ora',
      'jac': 'queline,kie,quelyn,lyn',
      'lat': 'oya,asha,onya,isha',
      'che': 'ryl,lsea,ri,rie',
      'vic': 'toria,ki,kie,ky',
      'sus': 'an,ie,anne,ana',
      'rob': 'erta,yn',
      'est': 'her,elle,ella,er',
      'lea': 'h,,nne,nn',
      'lil': 'lian,lie,a,y',
      'ma': 'ureen,ttie,xine,bel,e,deline,ggie,mie,ble,ndy,ude,yra,nuela,vis,gdalena,tilda',
      'jo': 'yce,sephine,,di,dy,hanna,sefina,sie,celyn,lene,ni,die',
      'be': 'verly,rtha,atrice,rnice,th,ssie,cky,linda,ulah,rnadette,thany,tsy,atriz',
      'ca': 'therine,thy,ssandra,ndace,ndice,mille,itlin,ssie,thleen,llie',
      'le': 'slie,na,ona,ticia,igh,la,nora,ola,sley,ila',
      'el': 'aine,len,eanor,sie,la,ena,oise,vira,sa,va,ma',
      'sa': 'ndra,rah,ra,lly,mantha,brina,ndy,die,llie',
      'mi': 'chelle,ldred,chele,nnie,riam,sty,ndy,randa,llie',
      'co': 'nnie,lleen,nstance,urtney,ra,rinne,nsuelo,rnelia',
      'ju': 'lie,dith,dy,lia,anita,ana,stine',
      'da': 'wn,nielle,rlene,na,isy,rla,phne',
      're': 'becca,nee,na,bekah,ba',
      'al': 'ma,lison,berta,exandra,yssa,ta',
      'ra': 'chel,mona,chael,quel,chelle',
      'an': 'drea,ita,a,gie,toinette,tonia',
      'ge': 'raldine,rtrude,orgia,nevieve,orgina',
      'de': 'nise,anna,siree,na,ana,e',
      'ja': 'smine,na,yne',
      'lu': 'cy,z,la,pe,ella,isa',
      'je': 'ssica,nifer,well,ri',
      'ad': 'a,rienne,die,ele,riana,eline',
      'pa': 'tricia,mela,ula,uline,tsy,m,tty,ulette,tti,trice,trica,ige',
      'ke': 'ndra,rri,isha,ri',
      'mo': 'nica,lly,nique,na,llie',
      'lo': 'uise,is,la',
      'he': 'len,ather,idi,nrietta,lene,lena',
      'me': 'gan,rcedes,redith,ghan,agan',
      'wi': 'lma,lla,nnie',
      'ga': 'il,yle,briela,brielle,le',
      'er': 'in,ica,ika,ma,nestine',
      'ce': 'cilia,lia,celia,leste,cile',
      'ka': 'tie,y,trina,yla,te',
      'ol': 'ga,ivia,lie,a',
      'li': 'nda,sa,ndsay,ndsey,zzie',
      'na': 'ncy,talie,omi,tasha,dine',
      'la': 'verne,na,donna,ra',
      'vi': 'rginia,vian,ola',
      'ha': 'rriet,nnah',
      'pe': 'ggy,arl,nny,tra',
      'br': 'enda,andi,ooke',
      'ki': 'mberly,m,mberley,rsten',
      'au': 'drey,tumn,dra',
      'bo': 'nnie,bbie,nita,bbi',
      'do': 'nna,lores,lly,minique',
      'gl': 'oria,adys,enda,enna',
      'tr': 'icia,ina,isha,udy',
      'ta': 'ra,nya,sha,bitha',
      'ro': 'sie,xanne,chelle,nda',
      'am': 'y,anda,ber,elia',
      'fa': 'ye,nnie,y',
      'ni': 'cole,na,chole,kki',
      've': 'ronica,ra,lma,rna',
      'gr': 'ace,etchen,aciela,acie',
      'b': 'arbara,lanca,arbra,ianca',
      'r': 'uth,ita,honda',
      's': 'hirley,tephanie,ylvia,heila,uzanne,ue,tella,ophia,ilvia,ophie,tefanie,heena,ummer,elma,ocorro,ybil,imone',
      'c': 'ynthia,rystal,indy,harlene,ristina,leo',
      'e': 'velyn,mily,dna,dith,thel,mma,va,ileen,unice,ula,ssie,ffie,tta,ugenia',
      'a': 'shley,pril,gnes,rlene,imee,bigail,ida,bby,ileen',
      't': 'heresa,ina,iffany,helma,onya,oni,herese,onia',
      'i': 'rene,da,rma,sabel,nez,ngrid,va,mogene,sabelle',
      'w': 'anda,endy,hitney',
      'p': 'hyllis,riscilla,olly',
      'n': 'orma,ellie,ora,ettie,ell',
      'f': 'lorence,elicia,lora,reda,ern,rieda',
      'v': 'alerie,anessa',
      'j': 'ill,illian',
      'y': 'vonne,olanda,vette',
      'g': 'ina,wendolyn,wen,oldie',
      'l': 'ydia',
      'm': 'yrtle,yra,uriel,yrna',
      'h': 'ilda',
      'o': 'pal,ra,felia',
      'k': 'rystal',
      'd': 'ixie,ina',
      'u': 'rsula'
    };
    const ambiguous = [
      'casey',
      'jamie',
      'lee',
      'jaime',
      'jessie',
      'morgan',
      'rene',
      'robin',
      'devon',
      'kerry',
      'alexis',
      'guadalupe',
      'blair',
      'kasey',
      'jean',
      'marion',
      'aubrey',
      'shelby',
      'jan',
      'shea',
      'jade',
      'kenyatta',
      'kelsey',
      'shay',
      'lashawn',
      'trinity',
      'regan',
      'jammie',
      'cassidy',
      'cheyenne',
      'reagan',
      'shiloh',
      'marlo',
      'andra',
      'devan',
      'rosario',
      'lee'
    ];
  • ¶

    add data into the main obj males

    let keys = Object.keys(male_names);
    let l = keys.length;
    for (let i = 0; i < l; i++) {
      const arr = male_names[keys[i]].split(',');
      for (let i2 = 0; i2 < arr.length; i2++) {
        main[keys[i] + arr[i2]] = 'm';
      }
    }
  • ¶

    females

    keys = Object.keys(female_names);
    l = keys.length;
    for (let i = 0; i < l; i++) {
      const arr = female_names[keys[i]].split(',');
      for (let i2 = 0; i2 < arr.length; i2++) {
        main[keys[i] + arr[i2]] = 'f';
      }
    }
  • ¶

    unisex names

    l = ambiguous.length;
    for (let i = 0; i < l; i += 1) {
      main[ambiguous[i]] = 'a';
    }
  • ¶

    console.log(firstnames[‘spencer’]) console.log(firstnames[‘jill’]) console.log(firstnames[‘sue’]) console.log(firstnames[‘jan’]) console.log(JSON.stringify(Object.keys(firstnames).length, null, 2));

    module.exports = main;