rules.js (13077B)
1 const { 2 noopTest, 3 edit, 4 merge 5 } = require('./helpers.js'); 6 7 /** 8 * Block-Level Grammar 9 */ 10 const block = { 11 newline: /^\n+/, 12 code: /^( {4}[^\n]+\n*)+/, 13 fences: /^ {0,3}(`{3,}(?=[^`\n]*\n)|~{3,})([^\n]*)\n(?:|([\s\S]*?)\n)(?: {0,3}\1[~`]* *(?:\n+|$)|$)/, 14 hr: /^ {0,3}((?:- *){3,}|(?:_ *){3,}|(?:\* *){3,})(?:\n+|$)/, 15 heading: /^ {0,3}(#{1,6}) +([^\n]*?)(?: +#+)? *(?:\n+|$)/, 16 blockquote: /^( {0,3}> ?(paragraph|[^\n]*)(?:\n|$))+/, 17 list: /^( {0,3})(bull) [\s\S]+?(?:hr|def|\n{2,}(?! )(?!\1bull )\n*|\s*$)/, 18 html: '^ {0,3}(?:' // optional indentation 19 + '<(script|pre|style)[\\s>][\\s\\S]*?(?:</\\1>[^\\n]*\\n+|$)' // (1) 20 + '|comment[^\\n]*(\\n+|$)' // (2) 21 + '|<\\?[\\s\\S]*?(?:\\?>\\n*|$)' // (3) 22 + '|<![A-Z][\\s\\S]*?(?:>\\n*|$)' // (4) 23 + '|<!\\[CDATA\\[[\\s\\S]*?(?:\\]\\]>\\n*|$)' // (5) 24 + '|</?(tag)(?: +|\\n|/?>)[\\s\\S]*?(?:\\n{2,}|$)' // (6) 25 + '|<(?!script|pre|style)([a-z][\\w-]*)(?:attribute)*? */?>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:\\n{2,}|$)' // (7) open tag 26 + '|</(?!script|pre|style)[a-z][\\w-]*\\s*>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:\\n{2,}|$)' // (7) closing tag 27 + ')', 28 def: /^ {0,3}\[(label)\]: *\n? *<?([^\s>]+)>?(?:(?: +\n? *| *\n *)(title))? *(?:\n+|$)/, 29 nptable: noopTest, 30 table: noopTest, 31 lheading: /^([^\n]+)\n {0,3}(=+|-+) *(?:\n+|$)/, 32 // regex template, placeholders will be replaced according to different paragraph 33 // interruption rules of commonmark and the original markdown spec: 34 _paragraph: /^([^\n]+(?:\n(?!hr|heading|lheading|blockquote|fences|list|html)[^\n]+)*)/, 35 text: /^[^\n]+/ 36 }; 37 38 block._label = /(?!\s*\])(?:\\[\[\]]|[^\[\]])+/; 39 block._title = /(?:"(?:\\"?|[^"\\])*"|'[^'\n]*(?:\n[^'\n]+)*\n?'|\([^()]*\))/; 40 block.def = edit(block.def) 41 .replace('label', block._label) 42 .replace('title', block._title) 43 .getRegex(); 44 45 block.bullet = /(?:[*+-]|\d{1,9}[.)])/; 46 block.item = /^( *)(bull) ?[^\n]*(?:\n(?!\1bull ?)[^\n]*)*/; 47 block.item = edit(block.item, 'gm') 48 .replace(/bull/g, block.bullet) 49 .getRegex(); 50 51 block.list = edit(block.list) 52 .replace(/bull/g, block.bullet) 53 .replace('hr', '\\n+(?=\\1?(?:(?:- *){3,}|(?:_ *){3,}|(?:\\* *){3,})(?:\\n+|$))') 54 .replace('def', '\\n+(?=' + block.def.source + ')') 55 .getRegex(); 56 57 block._tag = 'address|article|aside|base|basefont|blockquote|body|caption' 58 + '|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption' 59 + '|figure|footer|form|frame|frameset|h[1-6]|head|header|hr|html|iframe' 60 + '|legend|li|link|main|menu|menuitem|meta|nav|noframes|ol|optgroup|option' 61 + '|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|title|tr' 62 + '|track|ul'; 63 block._comment = /<!--(?!-?>)[\s\S]*?(?:-->|$)/; 64 block.html = edit(block.html, 'i') 65 .replace('comment', block._comment) 66 .replace('tag', block._tag) 67 .replace('attribute', / +[a-zA-Z:_][\w.:-]*(?: *= *"[^"\n]*"| *= *'[^'\n]*'| *= *[^\s"'=<>`]+)?/) 68 .getRegex(); 69 70 block.paragraph = edit(block._paragraph) 71 .replace('hr', block.hr) 72 .replace('heading', ' {0,3}#{1,6} ') 73 .replace('|lheading', '') // setex headings don't interrupt commonmark paragraphs 74 .replace('blockquote', ' {0,3}>') 75 .replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n') 76 .replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt 77 .replace('html', '</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|!--)') 78 .replace('tag', block._tag) // pars can be interrupted by type (6) html blocks 79 .getRegex(); 80 81 block.blockquote = edit(block.blockquote) 82 .replace('paragraph', block.paragraph) 83 .getRegex(); 84 85 /** 86 * Normal Block Grammar 87 */ 88 89 block.normal = merge({}, block); 90 91 /** 92 * GFM Block Grammar 93 */ 94 95 block.gfm = merge({}, block.normal, { 96 nptable: '^ *([^|\\n ].*\\|.*)\\n' // Header 97 + ' {0,3}([-:]+ *\\|[-| :]*)' // Align 98 + '(?:\\n((?:(?!\\n|hr|heading|blockquote|code|fences|list|html).*(?:\\n|$))*)\\n*|$)', // Cells 99 table: '^ *\\|(.+)\\n' // Header 100 + ' {0,3}\\|?( *[-:]+[-| :]*)' // Align 101 + '(?:\\n *((?:(?!\\n|hr|heading|blockquote|code|fences|list|html).*(?:\\n|$))*)\\n*|$)' // Cells 102 }); 103 104 block.gfm.nptable = edit(block.gfm.nptable) 105 .replace('hr', block.hr) 106 .replace('heading', ' {0,3}#{1,6} ') 107 .replace('blockquote', ' {0,3}>') 108 .replace('code', ' {4}[^\\n]') 109 .replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n') 110 .replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt 111 .replace('html', '</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|!--)') 112 .replace('tag', block._tag) // tables can be interrupted by type (6) html blocks 113 .getRegex(); 114 115 block.gfm.table = edit(block.gfm.table) 116 .replace('hr', block.hr) 117 .replace('heading', ' {0,3}#{1,6} ') 118 .replace('blockquote', ' {0,3}>') 119 .replace('code', ' {4}[^\\n]') 120 .replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n') 121 .replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt 122 .replace('html', '</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|!--)') 123 .replace('tag', block._tag) // tables can be interrupted by type (6) html blocks 124 .getRegex(); 125 126 /** 127 * Pedantic grammar (original John Gruber's loose markdown specification) 128 */ 129 130 block.pedantic = merge({}, block.normal, { 131 html: edit( 132 '^ *(?:comment *(?:\\n|\\s*$)' 133 + '|<(tag)[\\s\\S]+?</\\1> *(?:\\n{2,}|\\s*$)' // closed tag 134 + '|<tag(?:"[^"]*"|\'[^\']*\'|\\s[^\'"/>\\s]*)*?/?> *(?:\\n{2,}|\\s*$))') 135 .replace('comment', block._comment) 136 .replace(/tag/g, '(?!(?:' 137 + 'a|em|strong|small|s|cite|q|dfn|abbr|data|time|code|var|samp|kbd|sub' 138 + '|sup|i|b|u|mark|ruby|rt|rp|bdi|bdo|span|br|wbr|ins|del|img)' 139 + '\\b)\\w+(?!:|[^\\w\\s@]*@)\\b') 140 .getRegex(), 141 def: /^ *\[([^\]]+)\]: *<?([^\s>]+)>?(?: +(["(][^\n]+[")]))? *(?:\n+|$)/, 142 heading: /^ *(#{1,6}) *([^\n]+?) *(?:#+ *)?(?:\n+|$)/, 143 fences: noopTest, // fences not supported 144 paragraph: edit(block.normal._paragraph) 145 .replace('hr', block.hr) 146 .replace('heading', ' *#{1,6} *[^\n]') 147 .replace('lheading', block.lheading) 148 .replace('blockquote', ' {0,3}>') 149 .replace('|fences', '') 150 .replace('|list', '') 151 .replace('|html', '') 152 .getRegex() 153 }); 154 155 /** 156 * Inline-Level Grammar 157 */ 158 const inline = { 159 escape: /^\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/, 160 autolink: /^<(scheme:[^\s\x00-\x1f<>]*|email)>/, 161 url: noopTest, 162 tag: '^comment' 163 + '|^</[a-zA-Z][\\w:-]*\\s*>' // self-closing tag 164 + '|^<[a-zA-Z][\\w-]*(?:attribute)*?\\s*/?>' // open tag 165 + '|^<\\?[\\s\\S]*?\\?>' // processing instruction, e.g. <?php ?> 166 + '|^<![a-zA-Z]+\\s[\\s\\S]*?>' // declaration, e.g. <!DOCTYPE html> 167 + '|^<!\\[CDATA\\[[\\s\\S]*?\\]\\]>', // CDATA section 168 link: /^!?\[(label)\]\(\s*(href)(?:\s+(title))?\s*\)/, 169 reflink: /^!?\[(label)\]\[(?!\s*\])((?:\\[\[\]]?|[^\[\]\\])+)\]/, 170 nolink: /^!?\[(?!\s*\])((?:\[[^\[\]]*\]|\\[\[\]]|[^\[\]])*)\](?:\[\])?/, 171 reflinkSearch: 'reflink|nolink(?!\\()', 172 strong: { 173 start: /^(?:(\*\*(?=[*punctuation]))|\*\*)(?![\s])|__/, // (1) returns if starts w/ punctuation 174 middle: /^\*\*(?:(?:(?!overlapSkip)(?:[^*]|\\\*)|overlapSkip)|\*(?:(?!overlapSkip)(?:[^*]|\\\*)|overlapSkip)*?\*)+?\*\*$|^__(?![\s])((?:(?:(?!overlapSkip)(?:[^_]|\\_)|overlapSkip)|_(?:(?!overlapSkip)(?:[^_]|\\_)|overlapSkip)*?_)+?)__$/, 175 endAst: /[^punctuation\s]\*\*(?!\*)|[punctuation]\*\*(?!\*)(?:(?=[punctuation_\s]|$))/, // last char can't be punct, or final * must also be followed by punct (or endline) 176 endUnd: /[^\s]__(?!_)(?:(?=[punctuation*\s])|$)/ // last char can't be a space, and final _ must preceed punct or \s (or endline) 177 }, 178 em: { 179 start: /^(?:(\*(?=[punctuation]))|\*)(?![*\s])|_/, // (1) returns if starts w/ punctuation 180 middle: /^\*(?:(?:(?!overlapSkip)(?:[^*]|\\\*)|overlapSkip)|\*(?:(?!overlapSkip)(?:[^*]|\\\*)|overlapSkip)*?\*)+?\*$|^_(?![_\s])(?:(?:(?!overlapSkip)(?:[^_]|\\_)|overlapSkip)|_(?:(?!overlapSkip)(?:[^_]|\\_)|overlapSkip)*?_)+?_$/, 181 endAst: /[^punctuation\s]\*(?!\*)|[punctuation]\*(?!\*)(?:(?=[punctuation_\s]|$))/, // last char can't be punct, or final * must also be followed by punct (or endline) 182 endUnd: /[^\s]_(?!_)(?:(?=[punctuation*\s])|$)/ // last char can't be a space, and final _ must preceed punct or \s (or endline) 183 }, 184 code: /^(`+)([^`]|[^`][\s\S]*?[^`])\1(?!`)/, 185 br: /^( {2,}|\\)\n(?!\s*$)/, 186 del: noopTest, 187 text: /^(`+|[^`])(?:(?= {2,}\n)|[\s\S]*?(?:(?=[\\<!\[`*]|\b_|$)|[^ ](?= {2,}\n)))/, 188 punctuation: /^([\s*punctuation])/ 189 }; 190 191 // list of punctuation marks from common mark spec 192 // without * and _ to workaround cases with double emphasis 193 inline._punctuation = '!"#$%&\'()+\\-.,/:;<=>?@\\[\\]`^{|}~'; 194 inline.punctuation = edit(inline.punctuation).replace(/punctuation/g, inline._punctuation).getRegex(); 195 196 // sequences em should skip over [title](link), `code`, <html> 197 inline._blockSkip = '\\[[^\\]]*?\\]\\([^\\)]*?\\)|`[^`]*?`|<[^>]*?>'; 198 inline._overlapSkip = '__[^_]*?__|\\*\\*\\[^\\*\\]*?\\*\\*'; 199 200 inline._comment = edit(block._comment).replace('(?:-->|$)', '-->').getRegex(); 201 202 inline.em.start = edit(inline.em.start) 203 .replace(/punctuation/g, inline._punctuation) 204 .getRegex(); 205 206 inline.em.middle = edit(inline.em.middle) 207 .replace(/punctuation/g, inline._punctuation) 208 .replace(/overlapSkip/g, inline._overlapSkip) 209 .getRegex(); 210 211 inline.em.endAst = edit(inline.em.endAst, 'g') 212 .replace(/punctuation/g, inline._punctuation) 213 .getRegex(); 214 215 inline.em.endUnd = edit(inline.em.endUnd, 'g') 216 .replace(/punctuation/g, inline._punctuation) 217 .getRegex(); 218 219 inline.strong.start = edit(inline.strong.start) 220 .replace(/punctuation/g, inline._punctuation) 221 .getRegex(); 222 223 inline.strong.middle = edit(inline.strong.middle) 224 .replace(/punctuation/g, inline._punctuation) 225 .replace(/overlapSkip/g, inline._overlapSkip) 226 .getRegex(); 227 228 inline.strong.endAst = edit(inline.strong.endAst, 'g') 229 .replace(/punctuation/g, inline._punctuation) 230 .getRegex(); 231 232 inline.strong.endUnd = edit(inline.strong.endUnd, 'g') 233 .replace(/punctuation/g, inline._punctuation) 234 .getRegex(); 235 236 inline.blockSkip = edit(inline._blockSkip, 'g') 237 .getRegex(); 238 239 inline.overlapSkip = edit(inline._overlapSkip, 'g') 240 .getRegex(); 241 242 inline._escapes = /\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/g; 243 244 inline._scheme = /[a-zA-Z][a-zA-Z0-9+.-]{1,31}/; 245 inline._email = /[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+(@)[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+(?![-_])/; 246 inline.autolink = edit(inline.autolink) 247 .replace('scheme', inline._scheme) 248 .replace('email', inline._email) 249 .getRegex(); 250 251 inline._attribute = /\s+[a-zA-Z:_][\w.:-]*(?:\s*=\s*"[^"]*"|\s*=\s*'[^']*'|\s*=\s*[^\s"'=<>`]+)?/; 252 253 inline.tag = edit(inline.tag) 254 .replace('comment', inline._comment) 255 .replace('attribute', inline._attribute) 256 .getRegex(); 257 258 inline._label = /(?:\[(?:\\.|[^\[\]\\])*\]|\\.|`[^`]*`|[^\[\]\\`])*?/; 259 inline._href = /<(?:\\[<>]?|[^\s<>\\])*>|[^\s\x00-\x1f]*/; 260 inline._title = /"(?:\\"?|[^"\\])*"|'(?:\\'?|[^'\\])*'|\((?:\\\)?|[^)\\])*\)/; 261 262 inline.link = edit(inline.link) 263 .replace('label', inline._label) 264 .replace('href', inline._href) 265 .replace('title', inline._title) 266 .getRegex(); 267 268 inline.reflink = edit(inline.reflink) 269 .replace('label', inline._label) 270 .getRegex(); 271 272 inline.reflinkSearch = edit(inline.reflinkSearch, 'g') 273 .replace('reflink', inline.reflink) 274 .replace('nolink', inline.nolink) 275 .getRegex(); 276 277 /** 278 * Normal Inline Grammar 279 */ 280 281 inline.normal = merge({}, inline); 282 283 /** 284 * Pedantic Inline Grammar 285 */ 286 287 inline.pedantic = merge({}, inline.normal, { 288 strong: { 289 start: /^__|\*\*/, 290 middle: /^__(?=\S)([\s\S]*?\S)__(?!_)|^\*\*(?=\S)([\s\S]*?\S)\*\*(?!\*)/, 291 endAst: /\*\*(?!\*)/g, 292 endUnd: /__(?!_)/g 293 }, 294 em: { 295 start: /^_|\*/, 296 middle: /^()\*(?=\S)([\s\S]*?\S)\*(?!\*)|^_(?=\S)([\s\S]*?\S)_(?!_)/, 297 endAst: /\*(?!\*)/g, 298 endUnd: /_(?!_)/g 299 }, 300 link: edit(/^!?\[(label)\]\((.*?)\)/) 301 .replace('label', inline._label) 302 .getRegex(), 303 reflink: edit(/^!?\[(label)\]\s*\[([^\]]*)\]/) 304 .replace('label', inline._label) 305 .getRegex() 306 }); 307 308 /** 309 * GFM Inline Grammar 310 */ 311 312 inline.gfm = merge({}, inline.normal, { 313 escape: edit(inline.escape).replace('])', '~|])').getRegex(), 314 _extended_email: /[A-Za-z0-9._+-]+(@)[a-zA-Z0-9-_]+(?:\.[a-zA-Z0-9-_]*[a-zA-Z0-9])+(?![-_])/, 315 url: /^((?:ftp|https?):\/\/|www\.)(?:[a-zA-Z0-9\-]+\.?)+[^\s<]*|^email/, 316 _backpedal: /(?:[^?!.,:;*_~()&]+|\([^)]*\)|&(?![a-zA-Z0-9]+;$)|[?!.,:;*_~)]+(?!$))+/, 317 del: /^~+(?=\S)([\s\S]*?\S)~+/, 318 text: /^(`+|[^`])(?:(?= {2,}\n)|[\s\S]*?(?:(?=[\\<!\[`*~]|\b_|https?:\/\/|ftp:\/\/|www\.|$)|[^ ](?= {2,}\n)|[^a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-](?=[a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-]+@))|(?=[a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-]+@))/ 319 }); 320 321 inline.gfm.url = edit(inline.gfm.url, 'i') 322 .replace('email', inline.gfm._extended_email) 323 .getRegex(); 324 /** 325 * GFM + Line Breaks Inline Grammar 326 */ 327 328 inline.breaks = merge({}, inline.gfm, { 329 br: edit(inline.br).replace('{2,}', '*').getRegex(), 330 text: edit(inline.gfm.text) 331 .replace('\\b_', '\\b_| {2,}\\n') 332 .replace(/\{2,\}/g, '*') 333 .getRegex() 334 }); 335 336 module.exports = { 337 block, 338 inline 339 };