Lexer.js (11468B)
1 const Tokenizer = require('./Tokenizer.js'); 2 const { defaults } = require('./defaults.js'); 3 const { block, inline } = require('./rules.js'); 4 5 /** 6 * smartypants text replacement 7 */ 8 function smartypants(text) { 9 return text 10 // em-dashes 11 .replace(/---/g, '\u2014') 12 // en-dashes 13 .replace(/--/g, '\u2013') 14 // opening singles 15 .replace(/(^|[-\u2014/(\[{"\s])'/g, '$1\u2018') 16 // closing singles & apostrophes 17 .replace(/'/g, '\u2019') 18 // opening doubles 19 .replace(/(^|[-\u2014/(\[{\u2018\s])"/g, '$1\u201c') 20 // closing doubles 21 .replace(/"/g, '\u201d') 22 // ellipses 23 .replace(/\.{3}/g, '\u2026'); 24 } 25 26 /** 27 * mangle email addresses 28 */ 29 function mangle(text) { 30 let out = '', 31 i, 32 ch; 33 34 const l = text.length; 35 for (i = 0; i < l; i++) { 36 ch = text.charCodeAt(i); 37 if (Math.random() > 0.5) { 38 ch = 'x' + ch.toString(16); 39 } 40 out += '&#' + ch + ';'; 41 } 42 43 return out; 44 } 45 46 /** 47 * Block Lexer 48 */ 49 module.exports = class Lexer { 50 constructor(options) { 51 this.tokens = []; 52 this.tokens.links = Object.create(null); 53 this.options = options || defaults; 54 this.options.tokenizer = this.options.tokenizer || new Tokenizer(); 55 this.tokenizer = this.options.tokenizer; 56 this.tokenizer.options = this.options; 57 58 const rules = { 59 block: block.normal, 60 inline: inline.normal 61 }; 62 63 if (this.options.pedantic) { 64 rules.block = block.pedantic; 65 rules.inline = inline.pedantic; 66 } else if (this.options.gfm) { 67 rules.block = block.gfm; 68 if (this.options.breaks) { 69 rules.inline = inline.breaks; 70 } else { 71 rules.inline = inline.gfm; 72 } 73 } 74 this.tokenizer.rules = rules; 75 } 76 77 /** 78 * Expose Rules 79 */ 80 static get rules() { 81 return { 82 block, 83 inline 84 }; 85 } 86 87 /** 88 * Static Lex Method 89 */ 90 static lex(src, options) { 91 const lexer = new Lexer(options); 92 return lexer.lex(src); 93 } 94 95 /** 96 * Static Lex Inline Method 97 */ 98 static lexInline(src, options) { 99 const lexer = new Lexer(options); 100 return lexer.inlineTokens(src); 101 } 102 103 /** 104 * Preprocessing 105 */ 106 lex(src) { 107 src = src 108 .replace(/\r\n|\r/g, '\n') 109 .replace(/\t/g, ' '); 110 111 this.blockTokens(src, this.tokens, true); 112 113 this.inline(this.tokens); 114 115 return this.tokens; 116 } 117 118 /** 119 * Lexing 120 */ 121 blockTokens(src, tokens = [], top = true) { 122 src = src.replace(/^ +$/gm, ''); 123 let token, i, l, lastToken; 124 125 while (src) { 126 // newline 127 if (token = this.tokenizer.space(src)) { 128 src = src.substring(token.raw.length); 129 if (token.type) { 130 tokens.push(token); 131 } 132 continue; 133 } 134 135 // code 136 if (token = this.tokenizer.code(src, tokens)) { 137 src = src.substring(token.raw.length); 138 if (token.type) { 139 tokens.push(token); 140 } else { 141 lastToken = tokens[tokens.length - 1]; 142 lastToken.raw += '\n' + token.raw; 143 lastToken.text += '\n' + token.text; 144 } 145 continue; 146 } 147 148 // fences 149 if (token = this.tokenizer.fences(src)) { 150 src = src.substring(token.raw.length); 151 tokens.push(token); 152 continue; 153 } 154 155 // heading 156 if (token = this.tokenizer.heading(src)) { 157 src = src.substring(token.raw.length); 158 tokens.push(token); 159 continue; 160 } 161 162 // table no leading pipe (gfm) 163 if (token = this.tokenizer.nptable(src)) { 164 src = src.substring(token.raw.length); 165 tokens.push(token); 166 continue; 167 } 168 169 // hr 170 if (token = this.tokenizer.hr(src)) { 171 src = src.substring(token.raw.length); 172 tokens.push(token); 173 continue; 174 } 175 176 // blockquote 177 if (token = this.tokenizer.blockquote(src)) { 178 src = src.substring(token.raw.length); 179 token.tokens = this.blockTokens(token.text, [], top); 180 tokens.push(token); 181 continue; 182 } 183 184 // list 185 if (token = this.tokenizer.list(src)) { 186 src = src.substring(token.raw.length); 187 l = token.items.length; 188 for (i = 0; i < l; i++) { 189 token.items[i].tokens = this.blockTokens(token.items[i].text, [], false); 190 } 191 tokens.push(token); 192 continue; 193 } 194 195 // html 196 if (token = this.tokenizer.html(src)) { 197 src = src.substring(token.raw.length); 198 tokens.push(token); 199 continue; 200 } 201 202 // def 203 if (top && (token = this.tokenizer.def(src))) { 204 src = src.substring(token.raw.length); 205 if (!this.tokens.links[token.tag]) { 206 this.tokens.links[token.tag] = { 207 href: token.href, 208 title: token.title 209 }; 210 } 211 continue; 212 } 213 214 // table (gfm) 215 if (token = this.tokenizer.table(src)) { 216 src = src.substring(token.raw.length); 217 tokens.push(token); 218 continue; 219 } 220 221 // lheading 222 if (token = this.tokenizer.lheading(src)) { 223 src = src.substring(token.raw.length); 224 tokens.push(token); 225 continue; 226 } 227 228 // top-level paragraph 229 if (top && (token = this.tokenizer.paragraph(src))) { 230 src = src.substring(token.raw.length); 231 tokens.push(token); 232 continue; 233 } 234 235 // text 236 if (token = this.tokenizer.text(src, tokens)) { 237 src = src.substring(token.raw.length); 238 if (token.type) { 239 tokens.push(token); 240 } else { 241 lastToken = tokens[tokens.length - 1]; 242 lastToken.raw += '\n' + token.raw; 243 lastToken.text += '\n' + token.text; 244 } 245 continue; 246 } 247 248 if (src) { 249 const errMsg = 'Infinite loop on byte: ' + src.charCodeAt(0); 250 if (this.options.silent) { 251 console.error(errMsg); 252 break; 253 } else { 254 throw new Error(errMsg); 255 } 256 } 257 } 258 259 return tokens; 260 } 261 262 inline(tokens) { 263 let i, 264 j, 265 k, 266 l2, 267 row, 268 token; 269 270 const l = tokens.length; 271 for (i = 0; i < l; i++) { 272 token = tokens[i]; 273 switch (token.type) { 274 case 'paragraph': 275 case 'text': 276 case 'heading': { 277 token.tokens = []; 278 this.inlineTokens(token.text, token.tokens); 279 break; 280 } 281 case 'table': { 282 token.tokens = { 283 header: [], 284 cells: [] 285 }; 286 287 // header 288 l2 = token.header.length; 289 for (j = 0; j < l2; j++) { 290 token.tokens.header[j] = []; 291 this.inlineTokens(token.header[j], token.tokens.header[j]); 292 } 293 294 // cells 295 l2 = token.cells.length; 296 for (j = 0; j < l2; j++) { 297 row = token.cells[j]; 298 token.tokens.cells[j] = []; 299 for (k = 0; k < row.length; k++) { 300 token.tokens.cells[j][k] = []; 301 this.inlineTokens(row[k], token.tokens.cells[j][k]); 302 } 303 } 304 305 break; 306 } 307 case 'blockquote': { 308 this.inline(token.tokens); 309 break; 310 } 311 case 'list': { 312 l2 = token.items.length; 313 for (j = 0; j < l2; j++) { 314 this.inline(token.items[j].tokens); 315 } 316 break; 317 } 318 default: { 319 // do nothing 320 } 321 } 322 } 323 324 return tokens; 325 } 326 327 /** 328 * Lexing/Compiling 329 */ 330 inlineTokens(src, tokens = [], inLink = false, inRawBlock = false, prevChar = '') { 331 let token; 332 333 // String with links masked to avoid interference with em and strong 334 let maskedSrc = src; 335 let match; 336 337 // Mask out reflinks 338 if (this.tokens.links) { 339 const links = Object.keys(this.tokens.links); 340 if (links.length > 0) { 341 while ((match = this.tokenizer.rules.inline.reflinkSearch.exec(maskedSrc)) != null) { 342 if (links.includes(match[0].slice(match[0].lastIndexOf('[') + 1, -1))) { 343 maskedSrc = maskedSrc.slice(0, match.index) + '[' + 'a'.repeat(match[0].length - 2) + ']' + maskedSrc.slice(this.tokenizer.rules.inline.reflinkSearch.lastIndex); 344 } 345 } 346 } 347 } 348 // Mask out other blocks 349 while ((match = this.tokenizer.rules.inline.blockSkip.exec(maskedSrc)) != null) { 350 maskedSrc = maskedSrc.slice(0, match.index) + '[' + 'a'.repeat(match[0].length - 2) + ']' + maskedSrc.slice(this.tokenizer.rules.inline.blockSkip.lastIndex); 351 } 352 353 while (src) { 354 // escape 355 if (token = this.tokenizer.escape(src)) { 356 src = src.substring(token.raw.length); 357 tokens.push(token); 358 continue; 359 } 360 361 // tag 362 if (token = this.tokenizer.tag(src, inLink, inRawBlock)) { 363 src = src.substring(token.raw.length); 364 inLink = token.inLink; 365 inRawBlock = token.inRawBlock; 366 tokens.push(token); 367 continue; 368 } 369 370 // link 371 if (token = this.tokenizer.link(src)) { 372 src = src.substring(token.raw.length); 373 if (token.type === 'link') { 374 token.tokens = this.inlineTokens(token.text, [], true, inRawBlock); 375 } 376 tokens.push(token); 377 continue; 378 } 379 380 // reflink, nolink 381 if (token = this.tokenizer.reflink(src, this.tokens.links)) { 382 src = src.substring(token.raw.length); 383 if (token.type === 'link') { 384 token.tokens = this.inlineTokens(token.text, [], true, inRawBlock); 385 } 386 tokens.push(token); 387 continue; 388 } 389 390 // strong 391 if (token = this.tokenizer.strong(src, maskedSrc, prevChar)) { 392 src = src.substring(token.raw.length); 393 token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock); 394 tokens.push(token); 395 continue; 396 } 397 398 // em 399 if (token = this.tokenizer.em(src, maskedSrc, prevChar)) { 400 src = src.substring(token.raw.length); 401 token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock); 402 tokens.push(token); 403 continue; 404 } 405 406 // code 407 if (token = this.tokenizer.codespan(src)) { 408 src = src.substring(token.raw.length); 409 tokens.push(token); 410 continue; 411 } 412 413 // br 414 if (token = this.tokenizer.br(src)) { 415 src = src.substring(token.raw.length); 416 tokens.push(token); 417 continue; 418 } 419 420 // del (gfm) 421 if (token = this.tokenizer.del(src)) { 422 src = src.substring(token.raw.length); 423 token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock); 424 tokens.push(token); 425 continue; 426 } 427 428 // autolink 429 if (token = this.tokenizer.autolink(src, mangle)) { 430 src = src.substring(token.raw.length); 431 tokens.push(token); 432 continue; 433 } 434 435 // url (gfm) 436 if (!inLink && (token = this.tokenizer.url(src, mangle))) { 437 src = src.substring(token.raw.length); 438 tokens.push(token); 439 continue; 440 } 441 442 // text 443 if (token = this.tokenizer.inlineText(src, inRawBlock, smartypants)) { 444 src = src.substring(token.raw.length); 445 prevChar = token.raw.slice(-1); 446 tokens.push(token); 447 continue; 448 } 449 450 if (src) { 451 const errMsg = 'Infinite loop on byte: ' + src.charCodeAt(0); 452 if (this.options.silent) { 453 console.error(errMsg); 454 break; 455 } else { 456 throw new Error(errMsg); 457 } 458 } 459 } 460 461 return tokens; 462 } 463 };