uri.ts (19493B)
1 /** 2 * URI.js 3 * 4 * @fileoverview An RFC 3986 compliant, scheme extendable URI parsing/validating/resolving library for JavaScript. 5 * @author <a href="mailto:gary.court@gmail.com">Gary Court</a> 6 * @see http://github.com/garycourt/uri-js 7 */ 8 9 /** 10 * Copyright 2011 Gary Court. All rights reserved. 11 * 12 * Redistribution and use in source and binary forms, with or without modification, are 13 * permitted provided that the following conditions are met: 14 * 15 * 1. Redistributions of source code must retain the above copyright notice, this list of 16 * conditions and the following disclaimer. 17 * 18 * 2. Redistributions in binary form must reproduce the above copyright notice, this list 19 * of conditions and the following disclaimer in the documentation and/or other materials 20 * provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY GARY COURT ``AS IS'' AND ANY EXPRESS OR IMPLIED 23 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 24 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GARY COURT OR 25 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 27 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 28 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 29 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 30 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 * 32 * The views and conclusions contained in the software and documentation are those of the 33 * authors and should not be interpreted as representing official policies, either expressed 34 * or implied, of Gary Court. 35 */ 36 37 import URI_PROTOCOL from "./regexps-uri"; 38 import IRI_PROTOCOL from "./regexps-iri"; 39 import punycode from "punycode"; 40 import { toUpperCase, typeOf, assign } from "./util"; 41 42 export interface URIComponents { 43 scheme?:string; 44 userinfo?:string; 45 host?:string; 46 port?:number|string; 47 path?:string; 48 query?:string; 49 fragment?:string; 50 reference?:string; 51 error?:string; 52 } 53 54 export interface URIOptions { 55 scheme?:string; 56 reference?:string; 57 tolerant?:boolean; 58 absolutePath?:boolean; 59 iri?:boolean; 60 unicodeSupport?:boolean; 61 domainHost?:boolean; 62 } 63 64 export interface URISchemeHandler<Components extends URIComponents = URIComponents, Options extends URIOptions = URIOptions, ParentComponents extends URIComponents = URIComponents> { 65 scheme:string; 66 parse(components:ParentComponents, options:Options):Components; 67 serialize(components:Components, options:Options):ParentComponents; 68 unicodeSupport?:boolean; 69 domainHost?:boolean; 70 absolutePath?:boolean; 71 } 72 73 export interface URIRegExps { 74 NOT_SCHEME : RegExp, 75 NOT_USERINFO : RegExp, 76 NOT_HOST : RegExp, 77 NOT_PATH : RegExp, 78 NOT_PATH_NOSCHEME : RegExp, 79 NOT_QUERY : RegExp, 80 NOT_FRAGMENT : RegExp, 81 ESCAPE : RegExp, 82 UNRESERVED : RegExp, 83 OTHER_CHARS : RegExp, 84 PCT_ENCODED : RegExp, 85 IPV4ADDRESS : RegExp, 86 IPV6ADDRESS : RegExp, 87 } 88 89 export const SCHEMES:{[scheme:string]:URISchemeHandler} = {}; 90 91 export function pctEncChar(chr:string):string { 92 const c = chr.charCodeAt(0); 93 let e:string; 94 95 if (c < 16) e = "%0" + c.toString(16).toUpperCase(); 96 else if (c < 128) e = "%" + c.toString(16).toUpperCase(); 97 else if (c < 2048) e = "%" + ((c >> 6) | 192).toString(16).toUpperCase() + "%" + ((c & 63) | 128).toString(16).toUpperCase(); 98 else e = "%" + ((c >> 12) | 224).toString(16).toUpperCase() + "%" + (((c >> 6) & 63) | 128).toString(16).toUpperCase() + "%" + ((c & 63) | 128).toString(16).toUpperCase(); 99 100 return e; 101 } 102 103 export function pctDecChars(str:string):string { 104 let newStr = ""; 105 let i = 0; 106 const il = str.length; 107 108 while (i < il) { 109 const c = parseInt(str.substr(i + 1, 2), 16); 110 111 if (c < 128) { 112 newStr += String.fromCharCode(c); 113 i += 3; 114 } 115 else if (c >= 194 && c < 224) { 116 if ((il - i) >= 6) { 117 const c2 = parseInt(str.substr(i + 4, 2), 16); 118 newStr += String.fromCharCode(((c & 31) << 6) | (c2 & 63)); 119 } else { 120 newStr += str.substr(i, 6); 121 } 122 i += 6; 123 } 124 else if (c >= 224) { 125 if ((il - i) >= 9) { 126 const c2 = parseInt(str.substr(i + 4, 2), 16); 127 const c3 = parseInt(str.substr(i + 7, 2), 16); 128 newStr += String.fromCharCode(((c & 15) << 12) | ((c2 & 63) << 6) | (c3 & 63)); 129 } else { 130 newStr += str.substr(i, 9); 131 } 132 i += 9; 133 } 134 else { 135 newStr += str.substr(i, 3); 136 i += 3; 137 } 138 } 139 140 return newStr; 141 } 142 143 function _normalizeComponentEncoding(components:URIComponents, protocol:URIRegExps) { 144 function decodeUnreserved(str:string):string { 145 const decStr = pctDecChars(str); 146 return (!decStr.match(protocol.UNRESERVED) ? str : decStr); 147 } 148 149 if (components.scheme) components.scheme = String(components.scheme).replace(protocol.PCT_ENCODED, decodeUnreserved).toLowerCase().replace(protocol.NOT_SCHEME, ""); 150 if (components.userinfo !== undefined) components.userinfo = String(components.userinfo).replace(protocol.PCT_ENCODED, decodeUnreserved).replace(protocol.NOT_USERINFO, pctEncChar).replace(protocol.PCT_ENCODED, toUpperCase); 151 if (components.host !== undefined) components.host = String(components.host).replace(protocol.PCT_ENCODED, decodeUnreserved).toLowerCase().replace(protocol.NOT_HOST, pctEncChar).replace(protocol.PCT_ENCODED, toUpperCase); 152 if (components.path !== undefined) components.path = String(components.path).replace(protocol.PCT_ENCODED, decodeUnreserved).replace((components.scheme ? protocol.NOT_PATH : protocol.NOT_PATH_NOSCHEME), pctEncChar).replace(protocol.PCT_ENCODED, toUpperCase); 153 if (components.query !== undefined) components.query = String(components.query).replace(protocol.PCT_ENCODED, decodeUnreserved).replace(protocol.NOT_QUERY, pctEncChar).replace(protocol.PCT_ENCODED, toUpperCase); 154 if (components.fragment !== undefined) components.fragment = String(components.fragment).replace(protocol.PCT_ENCODED, decodeUnreserved).replace(protocol.NOT_FRAGMENT, pctEncChar).replace(protocol.PCT_ENCODED, toUpperCase); 155 156 return components; 157 }; 158 159 function _stripLeadingZeros(str:string):string { 160 return str.replace(/^0*(.*)/, "$1") || "0"; 161 } 162 163 function _normalizeIPv4(host:string, protocol:URIRegExps):string { 164 const matches = host.match(protocol.IPV4ADDRESS) || []; 165 const [, address] = matches; 166 167 if (address) { 168 return address.split(".").map(_stripLeadingZeros).join("."); 169 } else { 170 return host; 171 } 172 } 173 174 function _normalizeIPv6(host:string, protocol:URIRegExps):string { 175 const matches = host.match(protocol.IPV6ADDRESS) || []; 176 const [, address, zone] = matches; 177 178 if (address) { 179 const [last, first] = address.toLowerCase().split('::').reverse(); 180 const firstFields = first ? first.split(":").map(_stripLeadingZeros) : []; 181 const lastFields = last.split(":").map(_stripLeadingZeros); 182 const isLastFieldIPv4Address = protocol.IPV4ADDRESS.test(lastFields[lastFields.length - 1]); 183 const fieldCount = isLastFieldIPv4Address ? 7 : 8; 184 const lastFieldsStart = lastFields.length - fieldCount; 185 const fields = Array<string>(fieldCount); 186 187 for (let x = 0; x < fieldCount; ++x) { 188 fields[x] = firstFields[x] || lastFields[lastFieldsStart + x] || ''; 189 } 190 191 if (isLastFieldIPv4Address) { 192 fields[fieldCount - 1] = _normalizeIPv4(fields[fieldCount - 1], protocol); 193 } 194 195 const allZeroFields = fields.reduce<Array<{index:number,length:number}>>((acc, field, index) => { 196 if (!field || field === "0") { 197 const lastLongest = acc[acc.length - 1]; 198 if (lastLongest && lastLongest.index + lastLongest.length === index) { 199 lastLongest.length++; 200 } else { 201 acc.push({ index, length : 1 }); 202 } 203 } 204 return acc; 205 }, []); 206 207 const longestZeroFields = allZeroFields.sort((a, b) => b.length - a.length)[0]; 208 209 let newHost:string; 210 if (longestZeroFields && longestZeroFields.length > 1) { 211 const newFirst = fields.slice(0, longestZeroFields.index) ; 212 const newLast = fields.slice(longestZeroFields.index + longestZeroFields.length); 213 newHost = newFirst.join(":") + "::" + newLast.join(":"); 214 } else { 215 newHost = fields.join(":"); 216 } 217 218 if (zone) { 219 newHost += "%" + zone; 220 } 221 222 return newHost; 223 } else { 224 return host; 225 } 226 } 227 228 const URI_PARSE = /^(?:([^:\/?#]+):)?(?:\/\/((?:([^\/?#@]*)@)?(\[[^\/?#\]]+\]|[^\/?#:]*)(?:\:(\d*))?))?([^?#]*)(?:\?([^#]*))?(?:#((?:.|\n|\r)*))?/i; 229 const NO_MATCH_IS_UNDEFINED = (<RegExpMatchArray>("").match(/(){0}/))[1] === undefined; 230 231 export function parse(uriString:string, options:URIOptions = {}):URIComponents { 232 const components:URIComponents = {}; 233 const protocol = (options.iri !== false ? IRI_PROTOCOL : URI_PROTOCOL); 234 235 if (options.reference === "suffix") uriString = (options.scheme ? options.scheme + ":" : "") + "//" + uriString; 236 237 const matches = uriString.match(URI_PARSE); 238 239 if (matches) { 240 if (NO_MATCH_IS_UNDEFINED) { 241 //store each component 242 components.scheme = matches[1]; 243 components.userinfo = matches[3]; 244 components.host = matches[4]; 245 components.port = parseInt(matches[5], 10); 246 components.path = matches[6] || ""; 247 components.query = matches[7]; 248 components.fragment = matches[8]; 249 250 //fix port number 251 if (isNaN(components.port)) { 252 components.port = matches[5]; 253 } 254 } else { //IE FIX for improper RegExp matching 255 //store each component 256 components.scheme = matches[1] || undefined; 257 components.userinfo = (uriString.indexOf("@") !== -1 ? matches[3] : undefined); 258 components.host = (uriString.indexOf("//") !== -1 ? matches[4] : undefined); 259 components.port = parseInt(matches[5], 10); 260 components.path = matches[6] || ""; 261 components.query = (uriString.indexOf("?") !== -1 ? matches[7] : undefined); 262 components.fragment = (uriString.indexOf("#") !== -1 ? matches[8] : undefined); 263 264 //fix port number 265 if (isNaN(components.port)) { 266 components.port = (uriString.match(/\/\/(?:.|\n)*\:(?:\/|\?|\#|$)/) ? matches[4] : undefined); 267 } 268 } 269 270 if (components.host) { 271 //normalize IP hosts 272 components.host = _normalizeIPv6(_normalizeIPv4(components.host, protocol), protocol); 273 } 274 275 //determine reference type 276 if (components.scheme === undefined && components.userinfo === undefined && components.host === undefined && components.port === undefined && !components.path && components.query === undefined) { 277 components.reference = "same-document"; 278 } else if (components.scheme === undefined) { 279 components.reference = "relative"; 280 } else if (components.fragment === undefined) { 281 components.reference = "absolute"; 282 } else { 283 components.reference = "uri"; 284 } 285 286 //check for reference errors 287 if (options.reference && options.reference !== "suffix" && options.reference !== components.reference) { 288 components.error = components.error || "URI is not a " + options.reference + " reference."; 289 } 290 291 //find scheme handler 292 const schemeHandler = SCHEMES[(options.scheme || components.scheme || "").toLowerCase()]; 293 294 //check if scheme can't handle IRIs 295 if (!options.unicodeSupport && (!schemeHandler || !schemeHandler.unicodeSupport)) { 296 //if host component is a domain name 297 if (components.host && (options.domainHost || (schemeHandler && schemeHandler.domainHost))) { 298 //convert Unicode IDN -> ASCII IDN 299 try { 300 components.host = punycode.toASCII(components.host.replace(protocol.PCT_ENCODED, pctDecChars).toLowerCase()); 301 } catch (e) { 302 components.error = components.error || "Host's domain name can not be converted to ASCII via punycode: " + e; 303 } 304 } 305 //convert IRI -> URI 306 _normalizeComponentEncoding(components, URI_PROTOCOL); 307 } else { 308 //normalize encodings 309 _normalizeComponentEncoding(components, protocol); 310 } 311 312 //perform scheme specific parsing 313 if (schemeHandler && schemeHandler.parse) { 314 schemeHandler.parse(components, options); 315 } 316 } else { 317 components.error = components.error || "URI can not be parsed."; 318 } 319 320 return components; 321 }; 322 323 function _recomposeAuthority(components:URIComponents, options:URIOptions):string|undefined { 324 const protocol = (options.iri !== false ? IRI_PROTOCOL : URI_PROTOCOL); 325 const uriTokens:Array<string> = []; 326 327 if (components.userinfo !== undefined) { 328 uriTokens.push(components.userinfo); 329 uriTokens.push("@"); 330 } 331 332 if (components.host !== undefined) { 333 //normalize IP hosts, add brackets and escape zone separator for IPv6 334 uriTokens.push(_normalizeIPv6(_normalizeIPv4(String(components.host), protocol), protocol).replace(protocol.IPV6ADDRESS, (_, $1, $2) => "[" + $1 + ($2 ? "%25" + $2 : "") + "]")); 335 } 336 337 if (typeof components.port === "number") { 338 uriTokens.push(":"); 339 uriTokens.push(components.port.toString(10)); 340 } 341 342 return uriTokens.length ? uriTokens.join("") : undefined; 343 }; 344 345 const RDS1 = /^\.\.?\//; 346 const RDS2 = /^\/\.(\/|$)/; 347 const RDS3 = /^\/\.\.(\/|$)/; 348 const RDS4 = /^\.\.?$/; 349 const RDS5 = /^\/?(?:.|\n)*?(?=\/|$)/; 350 351 export function removeDotSegments(input:string):string { 352 const output:Array<string> = []; 353 354 while (input.length) { 355 if (input.match(RDS1)) { 356 input = input.replace(RDS1, ""); 357 } else if (input.match(RDS2)) { 358 input = input.replace(RDS2, "/"); 359 } else if (input.match(RDS3)) { 360 input = input.replace(RDS3, "/"); 361 output.pop(); 362 } else if (input === "." || input === "..") { 363 input = ""; 364 } else { 365 const im = input.match(RDS5); 366 if (im) { 367 const s = im[0]; 368 input = input.slice(s.length); 369 output.push(s); 370 } else { 371 throw new Error("Unexpected dot segment condition"); 372 } 373 } 374 } 375 376 return output.join(""); 377 }; 378 379 export function serialize(components:URIComponents, options:URIOptions = {}):string { 380 const protocol = (options.iri ? IRI_PROTOCOL : URI_PROTOCOL); 381 const uriTokens:Array<string> = []; 382 383 //find scheme handler 384 const schemeHandler = SCHEMES[(options.scheme || components.scheme || "").toLowerCase()]; 385 386 //perform scheme specific serialization 387 if (schemeHandler && schemeHandler.serialize) schemeHandler.serialize(components, options); 388 389 if (components.host) { 390 //if host component is an IPv6 address 391 if (protocol.IPV6ADDRESS.test(components.host)) { 392 //TODO: normalize IPv6 address as per RFC 5952 393 } 394 395 //if host component is a domain name 396 else if (options.domainHost || (schemeHandler && schemeHandler.domainHost)) { 397 //convert IDN via punycode 398 try { 399 components.host = (!options.iri ? punycode.toASCII(components.host.replace(protocol.PCT_ENCODED, pctDecChars).toLowerCase()) : punycode.toUnicode(components.host)); 400 } catch (e) { 401 components.error = components.error || "Host's domain name can not be converted to " + (!options.iri ? "ASCII" : "Unicode") + " via punycode: " + e; 402 } 403 } 404 } 405 406 //normalize encoding 407 _normalizeComponentEncoding(components, protocol); 408 409 if (options.reference !== "suffix" && components.scheme) { 410 uriTokens.push(components.scheme); 411 uriTokens.push(":"); 412 } 413 414 const authority = _recomposeAuthority(components, options); 415 if (authority !== undefined) { 416 if (options.reference !== "suffix") { 417 uriTokens.push("//"); 418 } 419 420 uriTokens.push(authority); 421 422 if (components.path && components.path.charAt(0) !== "/") { 423 uriTokens.push("/"); 424 } 425 } 426 427 if (components.path !== undefined) { 428 let s = components.path; 429 430 if (!options.absolutePath && (!schemeHandler || !schemeHandler.absolutePath)) { 431 s = removeDotSegments(s); 432 } 433 434 if (authority === undefined) { 435 s = s.replace(/^\/\//, "/%2F"); //don't allow the path to start with "//" 436 } 437 438 uriTokens.push(s); 439 } 440 441 if (components.query !== undefined) { 442 uriTokens.push("?"); 443 uriTokens.push(components.query); 444 } 445 446 if (components.fragment !== undefined) { 447 uriTokens.push("#"); 448 uriTokens.push(components.fragment); 449 } 450 451 return uriTokens.join(""); //merge tokens into a string 452 }; 453 454 export function resolveComponents(base:URIComponents, relative:URIComponents, options:URIOptions = {}, skipNormalization?:boolean):URIComponents { 455 const target:URIComponents = {}; 456 457 if (!skipNormalization) { 458 base = parse(serialize(base, options), options); //normalize base components 459 relative = parse(serialize(relative, options), options); //normalize relative components 460 } 461 options = options || {}; 462 463 if (!options.tolerant && relative.scheme) { 464 target.scheme = relative.scheme; 465 //target.authority = relative.authority; 466 target.userinfo = relative.userinfo; 467 target.host = relative.host; 468 target.port = relative.port; 469 target.path = removeDotSegments(relative.path || ""); 470 target.query = relative.query; 471 } else { 472 if (relative.userinfo !== undefined || relative.host !== undefined || relative.port !== undefined) { 473 //target.authority = relative.authority; 474 target.userinfo = relative.userinfo; 475 target.host = relative.host; 476 target.port = relative.port; 477 target.path = removeDotSegments(relative.path || ""); 478 target.query = relative.query; 479 } else { 480 if (!relative.path) { 481 target.path = base.path; 482 if (relative.query !== undefined) { 483 target.query = relative.query; 484 } else { 485 target.query = base.query; 486 } 487 } else { 488 if (relative.path.charAt(0) === "/") { 489 target.path = removeDotSegments(relative.path); 490 } else { 491 if ((base.userinfo !== undefined || base.host !== undefined || base.port !== undefined) && !base.path) { 492 target.path = "/" + relative.path; 493 } else if (!base.path) { 494 target.path = relative.path; 495 } else { 496 target.path = base.path.slice(0, base.path.lastIndexOf("/") + 1) + relative.path; 497 } 498 target.path = removeDotSegments(target.path); 499 } 500 target.query = relative.query; 501 } 502 //target.authority = base.authority; 503 target.userinfo = base.userinfo; 504 target.host = base.host; 505 target.port = base.port; 506 } 507 target.scheme = base.scheme; 508 } 509 510 target.fragment = relative.fragment; 511 512 return target; 513 }; 514 515 export function resolve(baseURI:string, relativeURI:string, options?:URIOptions):string { 516 const schemelessOptions = assign({ scheme : 'null' }, options); 517 return serialize(resolveComponents(parse(baseURI, schemelessOptions), parse(relativeURI, schemelessOptions), schemelessOptions, true), schemelessOptions); 518 }; 519 520 export function normalize(uri:string, options?:URIOptions):string; 521 export function normalize(uri:URIComponents, options?:URIOptions):URIComponents; 522 export function normalize(uri:any, options?:URIOptions):any { 523 if (typeof uri === "string") { 524 uri = serialize(parse(uri, options), options); 525 } else if (typeOf(uri) === "object") { 526 uri = parse(serialize(<URIComponents>uri, options), options); 527 } 528 529 return uri; 530 }; 531 532 export function equal(uriA:string, uriB:string, options?: URIOptions):boolean; 533 export function equal(uriA:URIComponents, uriB:URIComponents, options?:URIOptions):boolean; 534 export function equal(uriA:any, uriB:any, options?:URIOptions):boolean { 535 if (typeof uriA === "string") { 536 uriA = serialize(parse(uriA, options), options); 537 } else if (typeOf(uriA) === "object") { 538 uriA = serialize(<URIComponents>uriA, options); 539 } 540 541 if (typeof uriB === "string") { 542 uriB = serialize(parse(uriB, options), options); 543 } else if (typeOf(uriB) === "object") { 544 uriB = serialize(<URIComponents>uriB, options); 545 } 546 547 return uriA === uriB; 548 }; 549 550 export function escapeComponent(str:string, options?:URIOptions):string { 551 return str && str.toString().replace((!options || !options.iri ? URI_PROTOCOL.ESCAPE : IRI_PROTOCOL.ESCAPE), pctEncChar); 552 }; 553 554 export function unescapeComponent(str:string, options?:URIOptions):string { 555 return str && str.toString().replace((!options || !options.iri ? URI_PROTOCOL.PCT_ENCODED : IRI_PROTOCOL.PCT_ENCODED), pctDecChars); 556 };