l0bsterssg

node.js static responsive blog post generator
Log | Files | Refs | README

Tokenizer.js (16182B)


      1 const { defaults } = require('./defaults.js');
      2 const {
      3   rtrim,
      4   splitCells,
      5   escape,
      6   findClosingBracket
      7 } = require('./helpers.js');
      8 
      9 function outputLink(cap, link, raw) {
     10   const href = link.href;
     11   const title = link.title ? escape(link.title) : null;
     12   const text = cap[1].replace(/\\([\[\]])/g, '$1');
     13 
     14   if (cap[0].charAt(0) !== '!') {
     15     return {
     16       type: 'link',
     17       raw,
     18       href,
     19       title,
     20       text
     21     };
     22   } else {
     23     return {
     24       type: 'image',
     25       raw,
     26       href,
     27       title,
     28       text: escape(text)
     29     };
     30   }
     31 }
     32 
     33 function indentCodeCompensation(raw, text) {
     34   const matchIndentToCode = raw.match(/^(\s+)(?:```)/);
     35 
     36   if (matchIndentToCode === null) {
     37     return text;
     38   }
     39 
     40   const indentToCode = matchIndentToCode[1];
     41 
     42   return text
     43     .split('\n')
     44     .map(node => {
     45       const matchIndentInNode = node.match(/^\s+/);
     46       if (matchIndentInNode === null) {
     47         return node;
     48       }
     49 
     50       const [indentInNode] = matchIndentInNode;
     51 
     52       if (indentInNode.length >= indentToCode.length) {
     53         return node.slice(indentToCode.length);
     54       }
     55 
     56       return node;
     57     })
     58     .join('\n');
     59 }
     60 
     61 /**
     62  * Tokenizer
     63  */
     64 module.exports = class Tokenizer {
     65   constructor(options) {
     66     this.options = options || defaults;
     67   }
     68 
     69   space(src) {
     70     const cap = this.rules.block.newline.exec(src);
     71     if (cap) {
     72       if (cap[0].length > 1) {
     73         return {
     74           type: 'space',
     75           raw: cap[0]
     76         };
     77       }
     78       return { raw: '\n' };
     79     }
     80   }
     81 
     82   code(src, tokens) {
     83     const cap = this.rules.block.code.exec(src);
     84     if (cap) {
     85       const lastToken = tokens[tokens.length - 1];
     86       // An indented code block cannot interrupt a paragraph.
     87       if (lastToken && lastToken.type === 'paragraph') {
     88         return {
     89           raw: cap[0],
     90           text: cap[0].trimRight()
     91         };
     92       }
     93 
     94       const text = cap[0].replace(/^ {4}/gm, '');
     95       return {
     96         type: 'code',
     97         raw: cap[0],
     98         codeBlockStyle: 'indented',
     99         text: !this.options.pedantic
    100           ? rtrim(text, '\n')
    101           : text
    102       };
    103     }
    104   }
    105 
    106   fences(src) {
    107     const cap = this.rules.block.fences.exec(src);
    108     if (cap) {
    109       const raw = cap[0];
    110       const text = indentCodeCompensation(raw, cap[3] || '');
    111 
    112       return {
    113         type: 'code',
    114         raw,
    115         lang: cap[2] ? cap[2].trim() : cap[2],
    116         text
    117       };
    118     }
    119   }
    120 
    121   heading(src) {
    122     const cap = this.rules.block.heading.exec(src);
    123     if (cap) {
    124       return {
    125         type: 'heading',
    126         raw: cap[0],
    127         depth: cap[1].length,
    128         text: cap[2]
    129       };
    130     }
    131   }
    132 
    133   nptable(src) {
    134     const cap = this.rules.block.nptable.exec(src);
    135     if (cap) {
    136       const item = {
    137         type: 'table',
    138         header: splitCells(cap[1].replace(/^ *| *\| *$/g, '')),
    139         align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */),
    140         cells: cap[3] ? cap[3].replace(/\n$/, '').split('\n') : [],
    141         raw: cap[0]
    142       };
    143 
    144       if (item.header.length === item.align.length) {
    145         let l = item.align.length;
    146         let i;
    147         for (i = 0; i < l; i++) {
    148           if (/^ *-+: *$/.test(item.align[i])) {
    149             item.align[i] = 'right';
    150           } else if (/^ *:-+: *$/.test(item.align[i])) {
    151             item.align[i] = 'center';
    152           } else if (/^ *:-+ *$/.test(item.align[i])) {
    153             item.align[i] = 'left';
    154           } else {
    155             item.align[i] = null;
    156           }
    157         }
    158 
    159         l = item.cells.length;
    160         for (i = 0; i < l; i++) {
    161           item.cells[i] = splitCells(item.cells[i], item.header.length);
    162         }
    163 
    164         return item;
    165       }
    166     }
    167   }
    168 
    169   hr(src) {
    170     const cap = this.rules.block.hr.exec(src);
    171     if (cap) {
    172       return {
    173         type: 'hr',
    174         raw: cap[0]
    175       };
    176     }
    177   }
    178 
    179   blockquote(src) {
    180     const cap = this.rules.block.blockquote.exec(src);
    181     if (cap) {
    182       const text = cap[0].replace(/^ *> ?/gm, '');
    183 
    184       return {
    185         type: 'blockquote',
    186         raw: cap[0],
    187         text
    188       };
    189     }
    190   }
    191 
    192   list(src) {
    193     const cap = this.rules.block.list.exec(src);
    194     if (cap) {
    195       let raw = cap[0];
    196       const bull = cap[2];
    197       const isordered = bull.length > 1;
    198       const isparen = bull[bull.length - 1] === ')';
    199 
    200       const list = {
    201         type: 'list',
    202         raw,
    203         ordered: isordered,
    204         start: isordered ? +bull.slice(0, -1) : '',
    205         loose: false,
    206         items: []
    207       };
    208 
    209       // Get each top-level item.
    210       const itemMatch = cap[0].match(this.rules.block.item);
    211 
    212       let next = false,
    213         item,
    214         space,
    215         b,
    216         addBack,
    217         loose,
    218         istask,
    219         ischecked;
    220 
    221       const l = itemMatch.length;
    222       for (let i = 0; i < l; i++) {
    223         item = itemMatch[i];
    224         raw = item;
    225 
    226         // Remove the list item's bullet
    227         // so it is seen as the next token.
    228         space = item.length;
    229         item = item.replace(/^ *([*+-]|\d+[.)]) */, '');
    230 
    231         // Outdent whatever the
    232         // list item contains. Hacky.
    233         if (~item.indexOf('\n ')) {
    234           space -= item.length;
    235           item = !this.options.pedantic
    236             ? item.replace(new RegExp('^ {1,' + space + '}', 'gm'), '')
    237             : item.replace(/^ {1,4}/gm, '');
    238         }
    239 
    240         // Determine whether the next list item belongs here.
    241         // Backpedal if it does not belong in this list.
    242         if (i !== l - 1) {
    243           b = this.rules.block.bullet.exec(itemMatch[i + 1])[0];
    244           if (isordered ? b.length === 1 || (!isparen && b[b.length - 1] === ')')
    245             : (b.length > 1 || (this.options.smartLists && b !== bull))) {
    246             addBack = itemMatch.slice(i + 1).join('\n');
    247             list.raw = list.raw.substring(0, list.raw.length - addBack.length);
    248             i = l - 1;
    249           }
    250         }
    251 
    252         // Determine whether item is loose or not.
    253         // Use: /(^|\n)(?! )[^\n]+\n\n(?!\s*$)/
    254         // for discount behavior.
    255         loose = next || /\n\n(?!\s*$)/.test(item);
    256         if (i !== l - 1) {
    257           next = item.charAt(item.length - 1) === '\n';
    258           if (!loose) loose = next;
    259         }
    260 
    261         if (loose) {
    262           list.loose = true;
    263         }
    264 
    265         // Check for task list items
    266         istask = /^\[[ xX]\] /.test(item);
    267         ischecked = undefined;
    268         if (istask) {
    269           ischecked = item[1] !== ' ';
    270           item = item.replace(/^\[[ xX]\] +/, '');
    271         }
    272 
    273         list.items.push({
    274           type: 'list_item',
    275           raw,
    276           task: istask,
    277           checked: ischecked,
    278           loose: loose,
    279           text: item
    280         });
    281       }
    282 
    283       return list;
    284     }
    285   }
    286 
    287   html(src) {
    288     const cap = this.rules.block.html.exec(src);
    289     if (cap) {
    290       return {
    291         type: this.options.sanitize
    292           ? 'paragraph'
    293           : 'html',
    294         raw: cap[0],
    295         pre: !this.options.sanitizer
    296           && (cap[1] === 'pre' || cap[1] === 'script' || cap[1] === 'style'),
    297         text: this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0])) : cap[0]
    298       };
    299     }
    300   }
    301 
    302   def(src) {
    303     const cap = this.rules.block.def.exec(src);
    304     if (cap) {
    305       if (cap[3]) cap[3] = cap[3].substring(1, cap[3].length - 1);
    306       const tag = cap[1].toLowerCase().replace(/\s+/g, ' ');
    307       return {
    308         tag,
    309         raw: cap[0],
    310         href: cap[2],
    311         title: cap[3]
    312       };
    313     }
    314   }
    315 
    316   table(src) {
    317     const cap = this.rules.block.table.exec(src);
    318     if (cap) {
    319       const item = {
    320         type: 'table',
    321         header: splitCells(cap[1].replace(/^ *| *\| *$/g, '')),
    322         align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */),
    323         cells: cap[3] ? cap[3].replace(/\n$/, '').split('\n') : []
    324       };
    325 
    326       if (item.header.length === item.align.length) {
    327         item.raw = cap[0];
    328 
    329         let l = item.align.length;
    330         let i;
    331         for (i = 0; i < l; i++) {
    332           if (/^ *-+: *$/.test(item.align[i])) {
    333             item.align[i] = 'right';
    334           } else if (/^ *:-+: *$/.test(item.align[i])) {
    335             item.align[i] = 'center';
    336           } else if (/^ *:-+ *$/.test(item.align[i])) {
    337             item.align[i] = 'left';
    338           } else {
    339             item.align[i] = null;
    340           }
    341         }
    342 
    343         l = item.cells.length;
    344         for (i = 0; i < l; i++) {
    345           item.cells[i] = splitCells(
    346             item.cells[i].replace(/^ *\| *| *\| *$/g, ''),
    347             item.header.length);
    348         }
    349 
    350         return item;
    351       }
    352     }
    353   }
    354 
    355   lheading(src) {
    356     const cap = this.rules.block.lheading.exec(src);
    357     if (cap) {
    358       return {
    359         type: 'heading',
    360         raw: cap[0],
    361         depth: cap[2].charAt(0) === '=' ? 1 : 2,
    362         text: cap[1]
    363       };
    364     }
    365   }
    366 
    367   paragraph(src) {
    368     const cap = this.rules.block.paragraph.exec(src);
    369     if (cap) {
    370       return {
    371         type: 'paragraph',
    372         raw: cap[0],
    373         text: cap[1].charAt(cap[1].length - 1) === '\n'
    374           ? cap[1].slice(0, -1)
    375           : cap[1]
    376       };
    377     }
    378   }
    379 
    380   text(src, tokens) {
    381     const cap = this.rules.block.text.exec(src);
    382     if (cap) {
    383       const lastToken = tokens[tokens.length - 1];
    384       if (lastToken && lastToken.type === 'text') {
    385         return {
    386           raw: cap[0],
    387           text: cap[0]
    388         };
    389       }
    390 
    391       return {
    392         type: 'text',
    393         raw: cap[0],
    394         text: cap[0]
    395       };
    396     }
    397   }
    398 
    399   escape(src) {
    400     const cap = this.rules.inline.escape.exec(src);
    401     if (cap) {
    402       return {
    403         type: 'escape',
    404         raw: cap[0],
    405         text: escape(cap[1])
    406       };
    407     }
    408   }
    409 
    410   tag(src, inLink, inRawBlock) {
    411     const cap = this.rules.inline.tag.exec(src);
    412     if (cap) {
    413       if (!inLink && /^<a /i.test(cap[0])) {
    414         inLink = true;
    415       } else if (inLink && /^<\/a>/i.test(cap[0])) {
    416         inLink = false;
    417       }
    418       if (!inRawBlock && /^<(pre|code|kbd|script)(\s|>)/i.test(cap[0])) {
    419         inRawBlock = true;
    420       } else if (inRawBlock && /^<\/(pre|code|kbd|script)(\s|>)/i.test(cap[0])) {
    421         inRawBlock = false;
    422       }
    423 
    424       return {
    425         type: this.options.sanitize
    426           ? 'text'
    427           : 'html',
    428         raw: cap[0],
    429         inLink,
    430         inRawBlock,
    431         text: this.options.sanitize
    432           ? (this.options.sanitizer
    433             ? this.options.sanitizer(cap[0])
    434             : escape(cap[0]))
    435           : cap[0]
    436       };
    437     }
    438   }
    439 
    440   link(src) {
    441     const cap = this.rules.inline.link.exec(src);
    442     if (cap) {
    443       const lastParenIndex = findClosingBracket(cap[2], '()');
    444       if (lastParenIndex > -1) {
    445         const start = cap[0].indexOf('!') === 0 ? 5 : 4;
    446         const linkLen = start + cap[1].length + lastParenIndex;
    447         cap[2] = cap[2].substring(0, lastParenIndex);
    448         cap[0] = cap[0].substring(0, linkLen).trim();
    449         cap[3] = '';
    450       }
    451       let href = cap[2];
    452       let title = '';
    453       if (this.options.pedantic) {
    454         const link = /^([^'"]*[^\s])\s+(['"])(.*)\2/.exec(href);
    455 
    456         if (link) {
    457           href = link[1];
    458           title = link[3];
    459         } else {
    460           title = '';
    461         }
    462       } else {
    463         title = cap[3] ? cap[3].slice(1, -1) : '';
    464       }
    465       href = href.trim().replace(/^<([\s\S]*)>$/, '$1');
    466       const token = outputLink(cap, {
    467         href: href ? href.replace(this.rules.inline._escapes, '$1') : href,
    468         title: title ? title.replace(this.rules.inline._escapes, '$1') : title
    469       }, cap[0]);
    470       return token;
    471     }
    472   }
    473 
    474   reflink(src, links) {
    475     let cap;
    476     if ((cap = this.rules.inline.reflink.exec(src))
    477         || (cap = this.rules.inline.nolink.exec(src))) {
    478       let link = (cap[2] || cap[1]).replace(/\s+/g, ' ');
    479       link = links[link.toLowerCase()];
    480       if (!link || !link.href) {
    481         const text = cap[0].charAt(0);
    482         return {
    483           type: 'text',
    484           raw: text,
    485           text
    486         };
    487       }
    488       const token = outputLink(cap, link, cap[0]);
    489       return token;
    490     }
    491   }
    492 
    493   strong(src, maskedSrc, prevChar = '') {
    494     let match = this.rules.inline.strong.start.exec(src);
    495 
    496     if (match && (!match[1] || (match[1] && (prevChar === '' || this.rules.inline.punctuation.exec(prevChar))))) {
    497       maskedSrc = maskedSrc.slice(-1 * src.length);
    498       const endReg = match[0] === '**' ? this.rules.inline.strong.endAst : this.rules.inline.strong.endUnd;
    499 
    500       endReg.lastIndex = 0;
    501 
    502       let cap;
    503       while ((match = endReg.exec(maskedSrc)) != null) {
    504         cap = this.rules.inline.strong.middle.exec(maskedSrc.slice(0, match.index + 3));
    505         if (cap) {
    506           return {
    507             type: 'strong',
    508             raw: src.slice(0, cap[0].length),
    509             text: src.slice(2, cap[0].length - 2)
    510           };
    511         }
    512       }
    513     }
    514   }
    515 
    516   em(src, maskedSrc, prevChar = '') {
    517     let match = this.rules.inline.em.start.exec(src);
    518 
    519     if (match && (!match[1] || (match[1] && (prevChar === '' || this.rules.inline.punctuation.exec(prevChar))))) {
    520       maskedSrc = maskedSrc.slice(-1 * src.length);
    521       const endReg = match[0] === '*' ? this.rules.inline.em.endAst : this.rules.inline.em.endUnd;
    522 
    523       endReg.lastIndex = 0;
    524 
    525       let cap;
    526       while ((match = endReg.exec(maskedSrc)) != null) {
    527         cap = this.rules.inline.em.middle.exec(maskedSrc.slice(0, match.index + 2));
    528         if (cap) {
    529           return {
    530             type: 'em',
    531             raw: src.slice(0, cap[0].length),
    532             text: src.slice(1, cap[0].length - 1)
    533           };
    534         }
    535       }
    536     }
    537   }
    538 
    539   codespan(src) {
    540     const cap = this.rules.inline.code.exec(src);
    541     if (cap) {
    542       let text = cap[2].replace(/\n/g, ' ');
    543       const hasNonSpaceChars = /[^ ]/.test(text);
    544       const hasSpaceCharsOnBothEnds = text.startsWith(' ') && text.endsWith(' ');
    545       if (hasNonSpaceChars && hasSpaceCharsOnBothEnds) {
    546         text = text.substring(1, text.length - 1);
    547       }
    548       text = escape(text, true);
    549       return {
    550         type: 'codespan',
    551         raw: cap[0],
    552         text
    553       };
    554     }
    555   }
    556 
    557   br(src) {
    558     const cap = this.rules.inline.br.exec(src);
    559     if (cap) {
    560       return {
    561         type: 'br',
    562         raw: cap[0]
    563       };
    564     }
    565   }
    566 
    567   del(src) {
    568     const cap = this.rules.inline.del.exec(src);
    569     if (cap) {
    570       return {
    571         type: 'del',
    572         raw: cap[0],
    573         text: cap[1]
    574       };
    575     }
    576   }
    577 
    578   autolink(src, mangle) {
    579     const cap = this.rules.inline.autolink.exec(src);
    580     if (cap) {
    581       let text, href;
    582       if (cap[2] === '@') {
    583         text = escape(this.options.mangle ? mangle(cap[1]) : cap[1]);
    584         href = 'mailto:' + text;
    585       } else {
    586         text = escape(cap[1]);
    587         href = text;
    588       }
    589 
    590       return {
    591         type: 'link',
    592         raw: cap[0],
    593         text,
    594         href,
    595         tokens: [
    596           {
    597             type: 'text',
    598             raw: text,
    599             text
    600           }
    601         ]
    602       };
    603     }
    604   }
    605 
    606   url(src, mangle) {
    607     let cap;
    608     if (cap = this.rules.inline.url.exec(src)) {
    609       let text, href;
    610       if (cap[2] === '@') {
    611         text = escape(this.options.mangle ? mangle(cap[0]) : cap[0]);
    612         href = 'mailto:' + text;
    613       } else {
    614         // do extended autolink path validation
    615         let prevCapZero;
    616         do {
    617           prevCapZero = cap[0];
    618           cap[0] = this.rules.inline._backpedal.exec(cap[0])[0];
    619         } while (prevCapZero !== cap[0]);
    620         text = escape(cap[0]);
    621         if (cap[1] === 'www.') {
    622           href = 'http://' + text;
    623         } else {
    624           href = text;
    625         }
    626       }
    627       return {
    628         type: 'link',
    629         raw: cap[0],
    630         text,
    631         href,
    632         tokens: [
    633           {
    634             type: 'text',
    635             raw: text,
    636             text
    637           }
    638         ]
    639       };
    640     }
    641   }
    642 
    643   inlineText(src, inRawBlock, smartypants) {
    644     const cap = this.rules.inline.text.exec(src);
    645     if (cap) {
    646       let text;
    647       if (inRawBlock) {
    648         text = this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0])) : cap[0];
    649       } else {
    650         text = escape(this.options.smartypants ? smartypants(cap[0]) : cap[0]);
    651       }
    652       return {
    653         type: 'text',
    654         raw: cap[0],
    655         text
    656       };
    657     }
    658   }
    659 };