l0bsterssg

node.js static responsive blog post generator
Log | Files | Refs | README

Lexer.js (11468B)


      1 const Tokenizer = require('./Tokenizer.js');
      2 const { defaults } = require('./defaults.js');
      3 const { block, inline } = require('./rules.js');
      4 
      5 /**
      6  * smartypants text replacement
      7  */
      8 function smartypants(text) {
      9   return text
     10     // em-dashes
     11     .replace(/---/g, '\u2014')
     12     // en-dashes
     13     .replace(/--/g, '\u2013')
     14     // opening singles
     15     .replace(/(^|[-\u2014/(\[{"\s])'/g, '$1\u2018')
     16     // closing singles & apostrophes
     17     .replace(/'/g, '\u2019')
     18     // opening doubles
     19     .replace(/(^|[-\u2014/(\[{\u2018\s])"/g, '$1\u201c')
     20     // closing doubles
     21     .replace(/"/g, '\u201d')
     22     // ellipses
     23     .replace(/\.{3}/g, '\u2026');
     24 }
     25 
     26 /**
     27  * mangle email addresses
     28  */
     29 function mangle(text) {
     30   let out = '',
     31     i,
     32     ch;
     33 
     34   const l = text.length;
     35   for (i = 0; i < l; i++) {
     36     ch = text.charCodeAt(i);
     37     if (Math.random() > 0.5) {
     38       ch = 'x' + ch.toString(16);
     39     }
     40     out += '&#' + ch + ';';
     41   }
     42 
     43   return out;
     44 }
     45 
     46 /**
     47  * Block Lexer
     48  */
     49 module.exports = class Lexer {
     50   constructor(options) {
     51     this.tokens = [];
     52     this.tokens.links = Object.create(null);
     53     this.options = options || defaults;
     54     this.options.tokenizer = this.options.tokenizer || new Tokenizer();
     55     this.tokenizer = this.options.tokenizer;
     56     this.tokenizer.options = this.options;
     57 
     58     const rules = {
     59       block: block.normal,
     60       inline: inline.normal
     61     };
     62 
     63     if (this.options.pedantic) {
     64       rules.block = block.pedantic;
     65       rules.inline = inline.pedantic;
     66     } else if (this.options.gfm) {
     67       rules.block = block.gfm;
     68       if (this.options.breaks) {
     69         rules.inline = inline.breaks;
     70       } else {
     71         rules.inline = inline.gfm;
     72       }
     73     }
     74     this.tokenizer.rules = rules;
     75   }
     76 
     77   /**
     78    * Expose Rules
     79    */
     80   static get rules() {
     81     return {
     82       block,
     83       inline
     84     };
     85   }
     86 
     87   /**
     88    * Static Lex Method
     89    */
     90   static lex(src, options) {
     91     const lexer = new Lexer(options);
     92     return lexer.lex(src);
     93   }
     94 
     95   /**
     96    * Static Lex Inline Method
     97    */
     98   static lexInline(src, options) {
     99     const lexer = new Lexer(options);
    100     return lexer.inlineTokens(src);
    101   }
    102 
    103   /**
    104    * Preprocessing
    105    */
    106   lex(src) {
    107     src = src
    108       .replace(/\r\n|\r/g, '\n')
    109       .replace(/\t/g, '    ');
    110 
    111     this.blockTokens(src, this.tokens, true);
    112 
    113     this.inline(this.tokens);
    114 
    115     return this.tokens;
    116   }
    117 
    118   /**
    119    * Lexing
    120    */
    121   blockTokens(src, tokens = [], top = true) {
    122     src = src.replace(/^ +$/gm, '');
    123     let token, i, l, lastToken;
    124 
    125     while (src) {
    126       // newline
    127       if (token = this.tokenizer.space(src)) {
    128         src = src.substring(token.raw.length);
    129         if (token.type) {
    130           tokens.push(token);
    131         }
    132         continue;
    133       }
    134 
    135       // code
    136       if (token = this.tokenizer.code(src, tokens)) {
    137         src = src.substring(token.raw.length);
    138         if (token.type) {
    139           tokens.push(token);
    140         } else {
    141           lastToken = tokens[tokens.length - 1];
    142           lastToken.raw += '\n' + token.raw;
    143           lastToken.text += '\n' + token.text;
    144         }
    145         continue;
    146       }
    147 
    148       // fences
    149       if (token = this.tokenizer.fences(src)) {
    150         src = src.substring(token.raw.length);
    151         tokens.push(token);
    152         continue;
    153       }
    154 
    155       // heading
    156       if (token = this.tokenizer.heading(src)) {
    157         src = src.substring(token.raw.length);
    158         tokens.push(token);
    159         continue;
    160       }
    161 
    162       // table no leading pipe (gfm)
    163       if (token = this.tokenizer.nptable(src)) {
    164         src = src.substring(token.raw.length);
    165         tokens.push(token);
    166         continue;
    167       }
    168 
    169       // hr
    170       if (token = this.tokenizer.hr(src)) {
    171         src = src.substring(token.raw.length);
    172         tokens.push(token);
    173         continue;
    174       }
    175 
    176       // blockquote
    177       if (token = this.tokenizer.blockquote(src)) {
    178         src = src.substring(token.raw.length);
    179         token.tokens = this.blockTokens(token.text, [], top);
    180         tokens.push(token);
    181         continue;
    182       }
    183 
    184       // list
    185       if (token = this.tokenizer.list(src)) {
    186         src = src.substring(token.raw.length);
    187         l = token.items.length;
    188         for (i = 0; i < l; i++) {
    189           token.items[i].tokens = this.blockTokens(token.items[i].text, [], false);
    190         }
    191         tokens.push(token);
    192         continue;
    193       }
    194 
    195       // html
    196       if (token = this.tokenizer.html(src)) {
    197         src = src.substring(token.raw.length);
    198         tokens.push(token);
    199         continue;
    200       }
    201 
    202       // def
    203       if (top && (token = this.tokenizer.def(src))) {
    204         src = src.substring(token.raw.length);
    205         if (!this.tokens.links[token.tag]) {
    206           this.tokens.links[token.tag] = {
    207             href: token.href,
    208             title: token.title
    209           };
    210         }
    211         continue;
    212       }
    213 
    214       // table (gfm)
    215       if (token = this.tokenizer.table(src)) {
    216         src = src.substring(token.raw.length);
    217         tokens.push(token);
    218         continue;
    219       }
    220 
    221       // lheading
    222       if (token = this.tokenizer.lheading(src)) {
    223         src = src.substring(token.raw.length);
    224         tokens.push(token);
    225         continue;
    226       }
    227 
    228       // top-level paragraph
    229       if (top && (token = this.tokenizer.paragraph(src))) {
    230         src = src.substring(token.raw.length);
    231         tokens.push(token);
    232         continue;
    233       }
    234 
    235       // text
    236       if (token = this.tokenizer.text(src, tokens)) {
    237         src = src.substring(token.raw.length);
    238         if (token.type) {
    239           tokens.push(token);
    240         } else {
    241           lastToken = tokens[tokens.length - 1];
    242           lastToken.raw += '\n' + token.raw;
    243           lastToken.text += '\n' + token.text;
    244         }
    245         continue;
    246       }
    247 
    248       if (src) {
    249         const errMsg = 'Infinite loop on byte: ' + src.charCodeAt(0);
    250         if (this.options.silent) {
    251           console.error(errMsg);
    252           break;
    253         } else {
    254           throw new Error(errMsg);
    255         }
    256       }
    257     }
    258 
    259     return tokens;
    260   }
    261 
    262   inline(tokens) {
    263     let i,
    264       j,
    265       k,
    266       l2,
    267       row,
    268       token;
    269 
    270     const l = tokens.length;
    271     for (i = 0; i < l; i++) {
    272       token = tokens[i];
    273       switch (token.type) {
    274         case 'paragraph':
    275         case 'text':
    276         case 'heading': {
    277           token.tokens = [];
    278           this.inlineTokens(token.text, token.tokens);
    279           break;
    280         }
    281         case 'table': {
    282           token.tokens = {
    283             header: [],
    284             cells: []
    285           };
    286 
    287           // header
    288           l2 = token.header.length;
    289           for (j = 0; j < l2; j++) {
    290             token.tokens.header[j] = [];
    291             this.inlineTokens(token.header[j], token.tokens.header[j]);
    292           }
    293 
    294           // cells
    295           l2 = token.cells.length;
    296           for (j = 0; j < l2; j++) {
    297             row = token.cells[j];
    298             token.tokens.cells[j] = [];
    299             for (k = 0; k < row.length; k++) {
    300               token.tokens.cells[j][k] = [];
    301               this.inlineTokens(row[k], token.tokens.cells[j][k]);
    302             }
    303           }
    304 
    305           break;
    306         }
    307         case 'blockquote': {
    308           this.inline(token.tokens);
    309           break;
    310         }
    311         case 'list': {
    312           l2 = token.items.length;
    313           for (j = 0; j < l2; j++) {
    314             this.inline(token.items[j].tokens);
    315           }
    316           break;
    317         }
    318         default: {
    319           // do nothing
    320         }
    321       }
    322     }
    323 
    324     return tokens;
    325   }
    326 
    327   /**
    328    * Lexing/Compiling
    329    */
    330   inlineTokens(src, tokens = [], inLink = false, inRawBlock = false, prevChar = '') {
    331     let token;
    332 
    333     // String with links masked to avoid interference with em and strong
    334     let maskedSrc = src;
    335     let match;
    336 
    337     // Mask out reflinks
    338     if (this.tokens.links) {
    339       const links = Object.keys(this.tokens.links);
    340       if (links.length > 0) {
    341         while ((match = this.tokenizer.rules.inline.reflinkSearch.exec(maskedSrc)) != null) {
    342           if (links.includes(match[0].slice(match[0].lastIndexOf('[') + 1, -1))) {
    343             maskedSrc = maskedSrc.slice(0, match.index) + '[' + 'a'.repeat(match[0].length - 2) + ']' + maskedSrc.slice(this.tokenizer.rules.inline.reflinkSearch.lastIndex);
    344           }
    345         }
    346       }
    347     }
    348     // Mask out other blocks
    349     while ((match = this.tokenizer.rules.inline.blockSkip.exec(maskedSrc)) != null) {
    350       maskedSrc = maskedSrc.slice(0, match.index) + '[' + 'a'.repeat(match[0].length - 2) + ']' + maskedSrc.slice(this.tokenizer.rules.inline.blockSkip.lastIndex);
    351     }
    352 
    353     while (src) {
    354       // escape
    355       if (token = this.tokenizer.escape(src)) {
    356         src = src.substring(token.raw.length);
    357         tokens.push(token);
    358         continue;
    359       }
    360 
    361       // tag
    362       if (token = this.tokenizer.tag(src, inLink, inRawBlock)) {
    363         src = src.substring(token.raw.length);
    364         inLink = token.inLink;
    365         inRawBlock = token.inRawBlock;
    366         tokens.push(token);
    367         continue;
    368       }
    369 
    370       // link
    371       if (token = this.tokenizer.link(src)) {
    372         src = src.substring(token.raw.length);
    373         if (token.type === 'link') {
    374           token.tokens = this.inlineTokens(token.text, [], true, inRawBlock);
    375         }
    376         tokens.push(token);
    377         continue;
    378       }
    379 
    380       // reflink, nolink
    381       if (token = this.tokenizer.reflink(src, this.tokens.links)) {
    382         src = src.substring(token.raw.length);
    383         if (token.type === 'link') {
    384           token.tokens = this.inlineTokens(token.text, [], true, inRawBlock);
    385         }
    386         tokens.push(token);
    387         continue;
    388       }
    389 
    390       // strong
    391       if (token = this.tokenizer.strong(src, maskedSrc, prevChar)) {
    392         src = src.substring(token.raw.length);
    393         token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock);
    394         tokens.push(token);
    395         continue;
    396       }
    397 
    398       // em
    399       if (token = this.tokenizer.em(src, maskedSrc, prevChar)) {
    400         src = src.substring(token.raw.length);
    401         token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock);
    402         tokens.push(token);
    403         continue;
    404       }
    405 
    406       // code
    407       if (token = this.tokenizer.codespan(src)) {
    408         src = src.substring(token.raw.length);
    409         tokens.push(token);
    410         continue;
    411       }
    412 
    413       // br
    414       if (token = this.tokenizer.br(src)) {
    415         src = src.substring(token.raw.length);
    416         tokens.push(token);
    417         continue;
    418       }
    419 
    420       // del (gfm)
    421       if (token = this.tokenizer.del(src)) {
    422         src = src.substring(token.raw.length);
    423         token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock);
    424         tokens.push(token);
    425         continue;
    426       }
    427 
    428       // autolink
    429       if (token = this.tokenizer.autolink(src, mangle)) {
    430         src = src.substring(token.raw.length);
    431         tokens.push(token);
    432         continue;
    433       }
    434 
    435       // url (gfm)
    436       if (!inLink && (token = this.tokenizer.url(src, mangle))) {
    437         src = src.substring(token.raw.length);
    438         tokens.push(token);
    439         continue;
    440       }
    441 
    442       // text
    443       if (token = this.tokenizer.inlineText(src, inRawBlock, smartypants)) {
    444         src = src.substring(token.raw.length);
    445         prevChar = token.raw.slice(-1);
    446         tokens.push(token);
    447         continue;
    448       }
    449 
    450       if (src) {
    451         const errMsg = 'Infinite loop on byte: ' + src.charCodeAt(0);
    452         if (this.options.silent) {
    453           console.error(errMsg);
    454           break;
    455         } else {
    456           throw new Error(errMsg);
    457         }
    458       }
    459     }
    460 
    461     return tokens;
    462   }
    463 };