l0bsterssg

node.js static responsive blog post generator
Log | Files | Refs | README

rules.js (13077B)


      1 const {
      2   noopTest,
      3   edit,
      4   merge
      5 } = require('./helpers.js');
      6 
      7 /**
      8  * Block-Level Grammar
      9  */
     10 const block = {
     11   newline: /^\n+/,
     12   code: /^( {4}[^\n]+\n*)+/,
     13   fences: /^ {0,3}(`{3,}(?=[^`\n]*\n)|~{3,})([^\n]*)\n(?:|([\s\S]*?)\n)(?: {0,3}\1[~`]* *(?:\n+|$)|$)/,
     14   hr: /^ {0,3}((?:- *){3,}|(?:_ *){3,}|(?:\* *){3,})(?:\n+|$)/,
     15   heading: /^ {0,3}(#{1,6}) +([^\n]*?)(?: +#+)? *(?:\n+|$)/,
     16   blockquote: /^( {0,3}> ?(paragraph|[^\n]*)(?:\n|$))+/,
     17   list: /^( {0,3})(bull) [\s\S]+?(?:hr|def|\n{2,}(?! )(?!\1bull )\n*|\s*$)/,
     18   html: '^ {0,3}(?:' // optional indentation
     19     + '<(script|pre|style)[\\s>][\\s\\S]*?(?:</\\1>[^\\n]*\\n+|$)' // (1)
     20     + '|comment[^\\n]*(\\n+|$)' // (2)
     21     + '|<\\?[\\s\\S]*?(?:\\?>\\n*|$)' // (3)
     22     + '|<![A-Z][\\s\\S]*?(?:>\\n*|$)' // (4)
     23     + '|<!\\[CDATA\\[[\\s\\S]*?(?:\\]\\]>\\n*|$)' // (5)
     24     + '|</?(tag)(?: +|\\n|/?>)[\\s\\S]*?(?:\\n{2,}|$)' // (6)
     25     + '|<(?!script|pre|style)([a-z][\\w-]*)(?:attribute)*? */?>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:\\n{2,}|$)' // (7) open tag
     26     + '|</(?!script|pre|style)[a-z][\\w-]*\\s*>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:\\n{2,}|$)' // (7) closing tag
     27     + ')',
     28   def: /^ {0,3}\[(label)\]: *\n? *<?([^\s>]+)>?(?:(?: +\n? *| *\n *)(title))? *(?:\n+|$)/,
     29   nptable: noopTest,
     30   table: noopTest,
     31   lheading: /^([^\n]+)\n {0,3}(=+|-+) *(?:\n+|$)/,
     32   // regex template, placeholders will be replaced according to different paragraph
     33   // interruption rules of commonmark and the original markdown spec:
     34   _paragraph: /^([^\n]+(?:\n(?!hr|heading|lheading|blockquote|fences|list|html)[^\n]+)*)/,
     35   text: /^[^\n]+/
     36 };
     37 
     38 block._label = /(?!\s*\])(?:\\[\[\]]|[^\[\]])+/;
     39 block._title = /(?:"(?:\\"?|[^"\\])*"|'[^'\n]*(?:\n[^'\n]+)*\n?'|\([^()]*\))/;
     40 block.def = edit(block.def)
     41   .replace('label', block._label)
     42   .replace('title', block._title)
     43   .getRegex();
     44 
     45 block.bullet = /(?:[*+-]|\d{1,9}[.)])/;
     46 block.item = /^( *)(bull) ?[^\n]*(?:\n(?!\1bull ?)[^\n]*)*/;
     47 block.item = edit(block.item, 'gm')
     48   .replace(/bull/g, block.bullet)
     49   .getRegex();
     50 
     51 block.list = edit(block.list)
     52   .replace(/bull/g, block.bullet)
     53   .replace('hr', '\\n+(?=\\1?(?:(?:- *){3,}|(?:_ *){3,}|(?:\\* *){3,})(?:\\n+|$))')
     54   .replace('def', '\\n+(?=' + block.def.source + ')')
     55   .getRegex();
     56 
     57 block._tag = 'address|article|aside|base|basefont|blockquote|body|caption'
     58   + '|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption'
     59   + '|figure|footer|form|frame|frameset|h[1-6]|head|header|hr|html|iframe'
     60   + '|legend|li|link|main|menu|menuitem|meta|nav|noframes|ol|optgroup|option'
     61   + '|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|title|tr'
     62   + '|track|ul';
     63 block._comment = /<!--(?!-?>)[\s\S]*?(?:-->|$)/;
     64 block.html = edit(block.html, 'i')
     65   .replace('comment', block._comment)
     66   .replace('tag', block._tag)
     67   .replace('attribute', / +[a-zA-Z:_][\w.:-]*(?: *= *"[^"\n]*"| *= *'[^'\n]*'| *= *[^\s"'=<>`]+)?/)
     68   .getRegex();
     69 
     70 block.paragraph = edit(block._paragraph)
     71   .replace('hr', block.hr)
     72   .replace('heading', ' {0,3}#{1,6} ')
     73   .replace('|lheading', '') // setex headings don't interrupt commonmark paragraphs
     74   .replace('blockquote', ' {0,3}>')
     75   .replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n')
     76   .replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt
     77   .replace('html', '</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|!--)')
     78   .replace('tag', block._tag) // pars can be interrupted by type (6) html blocks
     79   .getRegex();
     80 
     81 block.blockquote = edit(block.blockquote)
     82   .replace('paragraph', block.paragraph)
     83   .getRegex();
     84 
     85 /**
     86  * Normal Block Grammar
     87  */
     88 
     89 block.normal = merge({}, block);
     90 
     91 /**
     92  * GFM Block Grammar
     93  */
     94 
     95 block.gfm = merge({}, block.normal, {
     96   nptable: '^ *([^|\\n ].*\\|.*)\\n' // Header
     97     + ' {0,3}([-:]+ *\\|[-| :]*)' // Align
     98     + '(?:\\n((?:(?!\\n|hr|heading|blockquote|code|fences|list|html).*(?:\\n|$))*)\\n*|$)', // Cells
     99   table: '^ *\\|(.+)\\n' // Header
    100     + ' {0,3}\\|?( *[-:]+[-| :]*)' // Align
    101     + '(?:\\n *((?:(?!\\n|hr|heading|blockquote|code|fences|list|html).*(?:\\n|$))*)\\n*|$)' // Cells
    102 });
    103 
    104 block.gfm.nptable = edit(block.gfm.nptable)
    105   .replace('hr', block.hr)
    106   .replace('heading', ' {0,3}#{1,6} ')
    107   .replace('blockquote', ' {0,3}>')
    108   .replace('code', ' {4}[^\\n]')
    109   .replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n')
    110   .replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt
    111   .replace('html', '</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|!--)')
    112   .replace('tag', block._tag) // tables can be interrupted by type (6) html blocks
    113   .getRegex();
    114 
    115 block.gfm.table = edit(block.gfm.table)
    116   .replace('hr', block.hr)
    117   .replace('heading', ' {0,3}#{1,6} ')
    118   .replace('blockquote', ' {0,3}>')
    119   .replace('code', ' {4}[^\\n]')
    120   .replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n')
    121   .replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt
    122   .replace('html', '</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|!--)')
    123   .replace('tag', block._tag) // tables can be interrupted by type (6) html blocks
    124   .getRegex();
    125 
    126 /**
    127  * Pedantic grammar (original John Gruber's loose markdown specification)
    128  */
    129 
    130 block.pedantic = merge({}, block.normal, {
    131   html: edit(
    132     '^ *(?:comment *(?:\\n|\\s*$)'
    133     + '|<(tag)[\\s\\S]+?</\\1> *(?:\\n{2,}|\\s*$)' // closed tag
    134     + '|<tag(?:"[^"]*"|\'[^\']*\'|\\s[^\'"/>\\s]*)*?/?> *(?:\\n{2,}|\\s*$))')
    135     .replace('comment', block._comment)
    136     .replace(/tag/g, '(?!(?:'
    137       + 'a|em|strong|small|s|cite|q|dfn|abbr|data|time|code|var|samp|kbd|sub'
    138       + '|sup|i|b|u|mark|ruby|rt|rp|bdi|bdo|span|br|wbr|ins|del|img)'
    139       + '\\b)\\w+(?!:|[^\\w\\s@]*@)\\b')
    140     .getRegex(),
    141   def: /^ *\[([^\]]+)\]: *<?([^\s>]+)>?(?: +(["(][^\n]+[")]))? *(?:\n+|$)/,
    142   heading: /^ *(#{1,6}) *([^\n]+?) *(?:#+ *)?(?:\n+|$)/,
    143   fences: noopTest, // fences not supported
    144   paragraph: edit(block.normal._paragraph)
    145     .replace('hr', block.hr)
    146     .replace('heading', ' *#{1,6} *[^\n]')
    147     .replace('lheading', block.lheading)
    148     .replace('blockquote', ' {0,3}>')
    149     .replace('|fences', '')
    150     .replace('|list', '')
    151     .replace('|html', '')
    152     .getRegex()
    153 });
    154 
    155 /**
    156  * Inline-Level Grammar
    157  */
    158 const inline = {
    159   escape: /^\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/,
    160   autolink: /^<(scheme:[^\s\x00-\x1f<>]*|email)>/,
    161   url: noopTest,
    162   tag: '^comment'
    163     + '|^</[a-zA-Z][\\w:-]*\\s*>' // self-closing tag
    164     + '|^<[a-zA-Z][\\w-]*(?:attribute)*?\\s*/?>' // open tag
    165     + '|^<\\?[\\s\\S]*?\\?>' // processing instruction, e.g. <?php ?>
    166     + '|^<![a-zA-Z]+\\s[\\s\\S]*?>' // declaration, e.g. <!DOCTYPE html>
    167     + '|^<!\\[CDATA\\[[\\s\\S]*?\\]\\]>', // CDATA section
    168   link: /^!?\[(label)\]\(\s*(href)(?:\s+(title))?\s*\)/,
    169   reflink: /^!?\[(label)\]\[(?!\s*\])((?:\\[\[\]]?|[^\[\]\\])+)\]/,
    170   nolink: /^!?\[(?!\s*\])((?:\[[^\[\]]*\]|\\[\[\]]|[^\[\]])*)\](?:\[\])?/,
    171   reflinkSearch: 'reflink|nolink(?!\\()',
    172   strong: {
    173     start: /^(?:(\*\*(?=[*punctuation]))|\*\*)(?![\s])|__/, // (1) returns if starts w/ punctuation
    174     middle: /^\*\*(?:(?:(?!overlapSkip)(?:[^*]|\\\*)|overlapSkip)|\*(?:(?!overlapSkip)(?:[^*]|\\\*)|overlapSkip)*?\*)+?\*\*$|^__(?![\s])((?:(?:(?!overlapSkip)(?:[^_]|\\_)|overlapSkip)|_(?:(?!overlapSkip)(?:[^_]|\\_)|overlapSkip)*?_)+?)__$/,
    175     endAst: /[^punctuation\s]\*\*(?!\*)|[punctuation]\*\*(?!\*)(?:(?=[punctuation_\s]|$))/, // last char can't be punct, or final * must also be followed by punct (or endline)
    176     endUnd: /[^\s]__(?!_)(?:(?=[punctuation*\s])|$)/ // last char can't be a space, and final _ must preceed punct or \s (or endline)
    177   },
    178   em: {
    179     start: /^(?:(\*(?=[punctuation]))|\*)(?![*\s])|_/, // (1) returns if starts w/ punctuation
    180     middle: /^\*(?:(?:(?!overlapSkip)(?:[^*]|\\\*)|overlapSkip)|\*(?:(?!overlapSkip)(?:[^*]|\\\*)|overlapSkip)*?\*)+?\*$|^_(?![_\s])(?:(?:(?!overlapSkip)(?:[^_]|\\_)|overlapSkip)|_(?:(?!overlapSkip)(?:[^_]|\\_)|overlapSkip)*?_)+?_$/,
    181     endAst: /[^punctuation\s]\*(?!\*)|[punctuation]\*(?!\*)(?:(?=[punctuation_\s]|$))/, // last char can't be punct, or final * must also be followed by punct (or endline)
    182     endUnd: /[^\s]_(?!_)(?:(?=[punctuation*\s])|$)/ // last char can't be a space, and final _ must preceed punct or \s (or endline)
    183   },
    184   code: /^(`+)([^`]|[^`][\s\S]*?[^`])\1(?!`)/,
    185   br: /^( {2,}|\\)\n(?!\s*$)/,
    186   del: noopTest,
    187   text: /^(`+|[^`])(?:(?= {2,}\n)|[\s\S]*?(?:(?=[\\<!\[`*]|\b_|$)|[^ ](?= {2,}\n)))/,
    188   punctuation: /^([\s*punctuation])/
    189 };
    190 
    191 // list of punctuation marks from common mark spec
    192 // without * and _ to workaround cases with double emphasis
    193 inline._punctuation = '!"#$%&\'()+\\-.,/:;<=>?@\\[\\]`^{|}~';
    194 inline.punctuation = edit(inline.punctuation).replace(/punctuation/g, inline._punctuation).getRegex();
    195 
    196 // sequences em should skip over [title](link), `code`, <html>
    197 inline._blockSkip = '\\[[^\\]]*?\\]\\([^\\)]*?\\)|`[^`]*?`|<[^>]*?>';
    198 inline._overlapSkip = '__[^_]*?__|\\*\\*\\[^\\*\\]*?\\*\\*';
    199 
    200 inline._comment = edit(block._comment).replace('(?:-->|$)', '-->').getRegex();
    201 
    202 inline.em.start = edit(inline.em.start)
    203   .replace(/punctuation/g, inline._punctuation)
    204   .getRegex();
    205 
    206 inline.em.middle = edit(inline.em.middle)
    207   .replace(/punctuation/g, inline._punctuation)
    208   .replace(/overlapSkip/g, inline._overlapSkip)
    209   .getRegex();
    210 
    211 inline.em.endAst = edit(inline.em.endAst, 'g')
    212   .replace(/punctuation/g, inline._punctuation)
    213   .getRegex();
    214 
    215 inline.em.endUnd = edit(inline.em.endUnd, 'g')
    216   .replace(/punctuation/g, inline._punctuation)
    217   .getRegex();
    218 
    219 inline.strong.start = edit(inline.strong.start)
    220   .replace(/punctuation/g, inline._punctuation)
    221   .getRegex();
    222 
    223 inline.strong.middle = edit(inline.strong.middle)
    224   .replace(/punctuation/g, inline._punctuation)
    225   .replace(/overlapSkip/g, inline._overlapSkip)
    226   .getRegex();
    227 
    228 inline.strong.endAst = edit(inline.strong.endAst, 'g')
    229   .replace(/punctuation/g, inline._punctuation)
    230   .getRegex();
    231 
    232 inline.strong.endUnd = edit(inline.strong.endUnd, 'g')
    233   .replace(/punctuation/g, inline._punctuation)
    234   .getRegex();
    235 
    236 inline.blockSkip = edit(inline._blockSkip, 'g')
    237   .getRegex();
    238 
    239 inline.overlapSkip = edit(inline._overlapSkip, 'g')
    240   .getRegex();
    241 
    242 inline._escapes = /\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/g;
    243 
    244 inline._scheme = /[a-zA-Z][a-zA-Z0-9+.-]{1,31}/;
    245 inline._email = /[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+(@)[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+(?![-_])/;
    246 inline.autolink = edit(inline.autolink)
    247   .replace('scheme', inline._scheme)
    248   .replace('email', inline._email)
    249   .getRegex();
    250 
    251 inline._attribute = /\s+[a-zA-Z:_][\w.:-]*(?:\s*=\s*"[^"]*"|\s*=\s*'[^']*'|\s*=\s*[^\s"'=<>`]+)?/;
    252 
    253 inline.tag = edit(inline.tag)
    254   .replace('comment', inline._comment)
    255   .replace('attribute', inline._attribute)
    256   .getRegex();
    257 
    258 inline._label = /(?:\[(?:\\.|[^\[\]\\])*\]|\\.|`[^`]*`|[^\[\]\\`])*?/;
    259 inline._href = /<(?:\\[<>]?|[^\s<>\\])*>|[^\s\x00-\x1f]*/;
    260 inline._title = /"(?:\\"?|[^"\\])*"|'(?:\\'?|[^'\\])*'|\((?:\\\)?|[^)\\])*\)/;
    261 
    262 inline.link = edit(inline.link)
    263   .replace('label', inline._label)
    264   .replace('href', inline._href)
    265   .replace('title', inline._title)
    266   .getRegex();
    267 
    268 inline.reflink = edit(inline.reflink)
    269   .replace('label', inline._label)
    270   .getRegex();
    271 
    272 inline.reflinkSearch = edit(inline.reflinkSearch, 'g')
    273   .replace('reflink', inline.reflink)
    274   .replace('nolink', inline.nolink)
    275   .getRegex();
    276 
    277 /**
    278  * Normal Inline Grammar
    279  */
    280 
    281 inline.normal = merge({}, inline);
    282 
    283 /**
    284  * Pedantic Inline Grammar
    285  */
    286 
    287 inline.pedantic = merge({}, inline.normal, {
    288   strong: {
    289     start: /^__|\*\*/,
    290     middle: /^__(?=\S)([\s\S]*?\S)__(?!_)|^\*\*(?=\S)([\s\S]*?\S)\*\*(?!\*)/,
    291     endAst: /\*\*(?!\*)/g,
    292     endUnd: /__(?!_)/g
    293   },
    294   em: {
    295     start: /^_|\*/,
    296     middle: /^()\*(?=\S)([\s\S]*?\S)\*(?!\*)|^_(?=\S)([\s\S]*?\S)_(?!_)/,
    297     endAst: /\*(?!\*)/g,
    298     endUnd: /_(?!_)/g
    299   },
    300   link: edit(/^!?\[(label)\]\((.*?)\)/)
    301     .replace('label', inline._label)
    302     .getRegex(),
    303   reflink: edit(/^!?\[(label)\]\s*\[([^\]]*)\]/)
    304     .replace('label', inline._label)
    305     .getRegex()
    306 });
    307 
    308 /**
    309  * GFM Inline Grammar
    310  */
    311 
    312 inline.gfm = merge({}, inline.normal, {
    313   escape: edit(inline.escape).replace('])', '~|])').getRegex(),
    314   _extended_email: /[A-Za-z0-9._+-]+(@)[a-zA-Z0-9-_]+(?:\.[a-zA-Z0-9-_]*[a-zA-Z0-9])+(?![-_])/,
    315   url: /^((?:ftp|https?):\/\/|www\.)(?:[a-zA-Z0-9\-]+\.?)+[^\s<]*|^email/,
    316   _backpedal: /(?:[^?!.,:;*_~()&]+|\([^)]*\)|&(?![a-zA-Z0-9]+;$)|[?!.,:;*_~)]+(?!$))+/,
    317   del: /^~+(?=\S)([\s\S]*?\S)~+/,
    318   text: /^(`+|[^`])(?:(?= {2,}\n)|[\s\S]*?(?:(?=[\\<!\[`*~]|\b_|https?:\/\/|ftp:\/\/|www\.|$)|[^ ](?= {2,}\n)|[^a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-](?=[a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-]+@))|(?=[a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-]+@))/
    319 });
    320 
    321 inline.gfm.url = edit(inline.gfm.url, 'i')
    322   .replace('email', inline.gfm._extended_email)
    323   .getRegex();
    324 /**
    325  * GFM + Line Breaks Inline Grammar
    326  */
    327 
    328 inline.breaks = merge({}, inline.gfm, {
    329   br: edit(inline.br).replace('{2,}', '*').getRegex(),
    330   text: edit(inline.gfm.text)
    331     .replace('\\b_', '\\b_| {2,}\\n')
    332     .replace(/\{2,\}/g, '*')
    333     .getRegex()
    334 });
    335 
    336 module.exports = {
    337   block,
    338   inline
    339 };