uri.js (20119B)
1 /** 2 * URI.js 3 * 4 * @fileoverview An RFC 3986 compliant, scheme extendable URI parsing/validating/resolving library for JavaScript. 5 * @author <a href="mailto:gary.court@gmail.com">Gary Court</a> 6 * @see http://github.com/garycourt/uri-js 7 */ 8 /** 9 * Copyright 2011 Gary Court. All rights reserved. 10 * 11 * Redistribution and use in source and binary forms, with or without modification, are 12 * permitted provided that the following conditions are met: 13 * 14 * 1. Redistributions of source code must retain the above copyright notice, this list of 15 * conditions and the following disclaimer. 16 * 17 * 2. Redistributions in binary form must reproduce the above copyright notice, this list 18 * of conditions and the following disclaimer in the documentation and/or other materials 19 * provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY GARY COURT ``AS IS'' AND ANY EXPRESS OR IMPLIED 22 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 23 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GARY COURT OR 24 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 27 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 28 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 29 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 * 31 * The views and conclusions contained in the software and documentation are those of the 32 * authors and should not be interpreted as representing official policies, either expressed 33 * or implied, of Gary Court. 34 */ 35 import URI_PROTOCOL from "./regexps-uri"; 36 import IRI_PROTOCOL from "./regexps-iri"; 37 import punycode from "punycode"; 38 import { toUpperCase, typeOf, assign } from "./util"; 39 export const SCHEMES = {}; 40 export function pctEncChar(chr) { 41 const c = chr.charCodeAt(0); 42 let e; 43 if (c < 16) 44 e = "%0" + c.toString(16).toUpperCase(); 45 else if (c < 128) 46 e = "%" + c.toString(16).toUpperCase(); 47 else if (c < 2048) 48 e = "%" + ((c >> 6) | 192).toString(16).toUpperCase() + "%" + ((c & 63) | 128).toString(16).toUpperCase(); 49 else 50 e = "%" + ((c >> 12) | 224).toString(16).toUpperCase() + "%" + (((c >> 6) & 63) | 128).toString(16).toUpperCase() + "%" + ((c & 63) | 128).toString(16).toUpperCase(); 51 return e; 52 } 53 export function pctDecChars(str) { 54 let newStr = ""; 55 let i = 0; 56 const il = str.length; 57 while (i < il) { 58 const c = parseInt(str.substr(i + 1, 2), 16); 59 if (c < 128) { 60 newStr += String.fromCharCode(c); 61 i += 3; 62 } 63 else if (c >= 194 && c < 224) { 64 if ((il - i) >= 6) { 65 const c2 = parseInt(str.substr(i + 4, 2), 16); 66 newStr += String.fromCharCode(((c & 31) << 6) | (c2 & 63)); 67 } 68 else { 69 newStr += str.substr(i, 6); 70 } 71 i += 6; 72 } 73 else if (c >= 224) { 74 if ((il - i) >= 9) { 75 const c2 = parseInt(str.substr(i + 4, 2), 16); 76 const c3 = parseInt(str.substr(i + 7, 2), 16); 77 newStr += String.fromCharCode(((c & 15) << 12) | ((c2 & 63) << 6) | (c3 & 63)); 78 } 79 else { 80 newStr += str.substr(i, 9); 81 } 82 i += 9; 83 } 84 else { 85 newStr += str.substr(i, 3); 86 i += 3; 87 } 88 } 89 return newStr; 90 } 91 function _normalizeComponentEncoding(components, protocol) { 92 function decodeUnreserved(str) { 93 const decStr = pctDecChars(str); 94 return (!decStr.match(protocol.UNRESERVED) ? str : decStr); 95 } 96 if (components.scheme) 97 components.scheme = String(components.scheme).replace(protocol.PCT_ENCODED, decodeUnreserved).toLowerCase().replace(protocol.NOT_SCHEME, ""); 98 if (components.userinfo !== undefined) 99 components.userinfo = String(components.userinfo).replace(protocol.PCT_ENCODED, decodeUnreserved).replace(protocol.NOT_USERINFO, pctEncChar).replace(protocol.PCT_ENCODED, toUpperCase); 100 if (components.host !== undefined) 101 components.host = String(components.host).replace(protocol.PCT_ENCODED, decodeUnreserved).toLowerCase().replace(protocol.NOT_HOST, pctEncChar).replace(protocol.PCT_ENCODED, toUpperCase); 102 if (components.path !== undefined) 103 components.path = String(components.path).replace(protocol.PCT_ENCODED, decodeUnreserved).replace((components.scheme ? protocol.NOT_PATH : protocol.NOT_PATH_NOSCHEME), pctEncChar).replace(protocol.PCT_ENCODED, toUpperCase); 104 if (components.query !== undefined) 105 components.query = String(components.query).replace(protocol.PCT_ENCODED, decodeUnreserved).replace(protocol.NOT_QUERY, pctEncChar).replace(protocol.PCT_ENCODED, toUpperCase); 106 if (components.fragment !== undefined) 107 components.fragment = String(components.fragment).replace(protocol.PCT_ENCODED, decodeUnreserved).replace(protocol.NOT_FRAGMENT, pctEncChar).replace(protocol.PCT_ENCODED, toUpperCase); 108 return components; 109 } 110 ; 111 function _stripLeadingZeros(str) { 112 return str.replace(/^0*(.*)/, "$1") || "0"; 113 } 114 function _normalizeIPv4(host, protocol) { 115 const matches = host.match(protocol.IPV4ADDRESS) || []; 116 const [, address] = matches; 117 if (address) { 118 return address.split(".").map(_stripLeadingZeros).join("."); 119 } 120 else { 121 return host; 122 } 123 } 124 function _normalizeIPv6(host, protocol) { 125 const matches = host.match(protocol.IPV6ADDRESS) || []; 126 const [, address, zone] = matches; 127 if (address) { 128 const [last, first] = address.toLowerCase().split('::').reverse(); 129 const firstFields = first ? first.split(":").map(_stripLeadingZeros) : []; 130 const lastFields = last.split(":").map(_stripLeadingZeros); 131 const isLastFieldIPv4Address = protocol.IPV4ADDRESS.test(lastFields[lastFields.length - 1]); 132 const fieldCount = isLastFieldIPv4Address ? 7 : 8; 133 const lastFieldsStart = lastFields.length - fieldCount; 134 const fields = Array(fieldCount); 135 for (let x = 0; x < fieldCount; ++x) { 136 fields[x] = firstFields[x] || lastFields[lastFieldsStart + x] || ''; 137 } 138 if (isLastFieldIPv4Address) { 139 fields[fieldCount - 1] = _normalizeIPv4(fields[fieldCount - 1], protocol); 140 } 141 const allZeroFields = fields.reduce((acc, field, index) => { 142 if (!field || field === "0") { 143 const lastLongest = acc[acc.length - 1]; 144 if (lastLongest && lastLongest.index + lastLongest.length === index) { 145 lastLongest.length++; 146 } 147 else { 148 acc.push({ index, length: 1 }); 149 } 150 } 151 return acc; 152 }, []); 153 const longestZeroFields = allZeroFields.sort((a, b) => b.length - a.length)[0]; 154 let newHost; 155 if (longestZeroFields && longestZeroFields.length > 1) { 156 const newFirst = fields.slice(0, longestZeroFields.index); 157 const newLast = fields.slice(longestZeroFields.index + longestZeroFields.length); 158 newHost = newFirst.join(":") + "::" + newLast.join(":"); 159 } 160 else { 161 newHost = fields.join(":"); 162 } 163 if (zone) { 164 newHost += "%" + zone; 165 } 166 return newHost; 167 } 168 else { 169 return host; 170 } 171 } 172 const URI_PARSE = /^(?:([^:\/?#]+):)?(?:\/\/((?:([^\/?#@]*)@)?(\[[^\/?#\]]+\]|[^\/?#:]*)(?:\:(\d*))?))?([^?#]*)(?:\?([^#]*))?(?:#((?:.|\n|\r)*))?/i; 173 const NO_MATCH_IS_UNDEFINED = ("").match(/(){0}/)[1] === undefined; 174 export function parse(uriString, options = {}) { 175 const components = {}; 176 const protocol = (options.iri !== false ? IRI_PROTOCOL : URI_PROTOCOL); 177 if (options.reference === "suffix") 178 uriString = (options.scheme ? options.scheme + ":" : "") + "//" + uriString; 179 const matches = uriString.match(URI_PARSE); 180 if (matches) { 181 if (NO_MATCH_IS_UNDEFINED) { 182 //store each component 183 components.scheme = matches[1]; 184 components.userinfo = matches[3]; 185 components.host = matches[4]; 186 components.port = parseInt(matches[5], 10); 187 components.path = matches[6] || ""; 188 components.query = matches[7]; 189 components.fragment = matches[8]; 190 //fix port number 191 if (isNaN(components.port)) { 192 components.port = matches[5]; 193 } 194 } 195 else { //IE FIX for improper RegExp matching 196 //store each component 197 components.scheme = matches[1] || undefined; 198 components.userinfo = (uriString.indexOf("@") !== -1 ? matches[3] : undefined); 199 components.host = (uriString.indexOf("//") !== -1 ? matches[4] : undefined); 200 components.port = parseInt(matches[5], 10); 201 components.path = matches[6] || ""; 202 components.query = (uriString.indexOf("?") !== -1 ? matches[7] : undefined); 203 components.fragment = (uriString.indexOf("#") !== -1 ? matches[8] : undefined); 204 //fix port number 205 if (isNaN(components.port)) { 206 components.port = (uriString.match(/\/\/(?:.|\n)*\:(?:\/|\?|\#|$)/) ? matches[4] : undefined); 207 } 208 } 209 if (components.host) { 210 //normalize IP hosts 211 components.host = _normalizeIPv6(_normalizeIPv4(components.host, protocol), protocol); 212 } 213 //determine reference type 214 if (components.scheme === undefined && components.userinfo === undefined && components.host === undefined && components.port === undefined && !components.path && components.query === undefined) { 215 components.reference = "same-document"; 216 } 217 else if (components.scheme === undefined) { 218 components.reference = "relative"; 219 } 220 else if (components.fragment === undefined) { 221 components.reference = "absolute"; 222 } 223 else { 224 components.reference = "uri"; 225 } 226 //check for reference errors 227 if (options.reference && options.reference !== "suffix" && options.reference !== components.reference) { 228 components.error = components.error || "URI is not a " + options.reference + " reference."; 229 } 230 //find scheme handler 231 const schemeHandler = SCHEMES[(options.scheme || components.scheme || "").toLowerCase()]; 232 //check if scheme can't handle IRIs 233 if (!options.unicodeSupport && (!schemeHandler || !schemeHandler.unicodeSupport)) { 234 //if host component is a domain name 235 if (components.host && (options.domainHost || (schemeHandler && schemeHandler.domainHost))) { 236 //convert Unicode IDN -> ASCII IDN 237 try { 238 components.host = punycode.toASCII(components.host.replace(protocol.PCT_ENCODED, pctDecChars).toLowerCase()); 239 } 240 catch (e) { 241 components.error = components.error || "Host's domain name can not be converted to ASCII via punycode: " + e; 242 } 243 } 244 //convert IRI -> URI 245 _normalizeComponentEncoding(components, URI_PROTOCOL); 246 } 247 else { 248 //normalize encodings 249 _normalizeComponentEncoding(components, protocol); 250 } 251 //perform scheme specific parsing 252 if (schemeHandler && schemeHandler.parse) { 253 schemeHandler.parse(components, options); 254 } 255 } 256 else { 257 components.error = components.error || "URI can not be parsed."; 258 } 259 return components; 260 } 261 ; 262 function _recomposeAuthority(components, options) { 263 const protocol = (options.iri !== false ? IRI_PROTOCOL : URI_PROTOCOL); 264 const uriTokens = []; 265 if (components.userinfo !== undefined) { 266 uriTokens.push(components.userinfo); 267 uriTokens.push("@"); 268 } 269 if (components.host !== undefined) { 270 //normalize IP hosts, add brackets and escape zone separator for IPv6 271 uriTokens.push(_normalizeIPv6(_normalizeIPv4(String(components.host), protocol), protocol).replace(protocol.IPV6ADDRESS, (_, $1, $2) => "[" + $1 + ($2 ? "%25" + $2 : "") + "]")); 272 } 273 if (typeof components.port === "number") { 274 uriTokens.push(":"); 275 uriTokens.push(components.port.toString(10)); 276 } 277 return uriTokens.length ? uriTokens.join("") : undefined; 278 } 279 ; 280 const RDS1 = /^\.\.?\//; 281 const RDS2 = /^\/\.(\/|$)/; 282 const RDS3 = /^\/\.\.(\/|$)/; 283 const RDS4 = /^\.\.?$/; 284 const RDS5 = /^\/?(?:.|\n)*?(?=\/|$)/; 285 export function removeDotSegments(input) { 286 const output = []; 287 while (input.length) { 288 if (input.match(RDS1)) { 289 input = input.replace(RDS1, ""); 290 } 291 else if (input.match(RDS2)) { 292 input = input.replace(RDS2, "/"); 293 } 294 else if (input.match(RDS3)) { 295 input = input.replace(RDS3, "/"); 296 output.pop(); 297 } 298 else if (input === "." || input === "..") { 299 input = ""; 300 } 301 else { 302 const im = input.match(RDS5); 303 if (im) { 304 const s = im[0]; 305 input = input.slice(s.length); 306 output.push(s); 307 } 308 else { 309 throw new Error("Unexpected dot segment condition"); 310 } 311 } 312 } 313 return output.join(""); 314 } 315 ; 316 export function serialize(components, options = {}) { 317 const protocol = (options.iri ? IRI_PROTOCOL : URI_PROTOCOL); 318 const uriTokens = []; 319 //find scheme handler 320 const schemeHandler = SCHEMES[(options.scheme || components.scheme || "").toLowerCase()]; 321 //perform scheme specific serialization 322 if (schemeHandler && schemeHandler.serialize) 323 schemeHandler.serialize(components, options); 324 if (components.host) { 325 //if host component is an IPv6 address 326 if (protocol.IPV6ADDRESS.test(components.host)) { 327 //TODO: normalize IPv6 address as per RFC 5952 328 } 329 //if host component is a domain name 330 else if (options.domainHost || (schemeHandler && schemeHandler.domainHost)) { 331 //convert IDN via punycode 332 try { 333 components.host = (!options.iri ? punycode.toASCII(components.host.replace(protocol.PCT_ENCODED, pctDecChars).toLowerCase()) : punycode.toUnicode(components.host)); 334 } 335 catch (e) { 336 components.error = components.error || "Host's domain name can not be converted to " + (!options.iri ? "ASCII" : "Unicode") + " via punycode: " + e; 337 } 338 } 339 } 340 //normalize encoding 341 _normalizeComponentEncoding(components, protocol); 342 if (options.reference !== "suffix" && components.scheme) { 343 uriTokens.push(components.scheme); 344 uriTokens.push(":"); 345 } 346 const authority = _recomposeAuthority(components, options); 347 if (authority !== undefined) { 348 if (options.reference !== "suffix") { 349 uriTokens.push("//"); 350 } 351 uriTokens.push(authority); 352 if (components.path && components.path.charAt(0) !== "/") { 353 uriTokens.push("/"); 354 } 355 } 356 if (components.path !== undefined) { 357 let s = components.path; 358 if (!options.absolutePath && (!schemeHandler || !schemeHandler.absolutePath)) { 359 s = removeDotSegments(s); 360 } 361 if (authority === undefined) { 362 s = s.replace(/^\/\//, "/%2F"); //don't allow the path to start with "//" 363 } 364 uriTokens.push(s); 365 } 366 if (components.query !== undefined) { 367 uriTokens.push("?"); 368 uriTokens.push(components.query); 369 } 370 if (components.fragment !== undefined) { 371 uriTokens.push("#"); 372 uriTokens.push(components.fragment); 373 } 374 return uriTokens.join(""); //merge tokens into a string 375 } 376 ; 377 export function resolveComponents(base, relative, options = {}, skipNormalization) { 378 const target = {}; 379 if (!skipNormalization) { 380 base = parse(serialize(base, options), options); //normalize base components 381 relative = parse(serialize(relative, options), options); //normalize relative components 382 } 383 options = options || {}; 384 if (!options.tolerant && relative.scheme) { 385 target.scheme = relative.scheme; 386 //target.authority = relative.authority; 387 target.userinfo = relative.userinfo; 388 target.host = relative.host; 389 target.port = relative.port; 390 target.path = removeDotSegments(relative.path || ""); 391 target.query = relative.query; 392 } 393 else { 394 if (relative.userinfo !== undefined || relative.host !== undefined || relative.port !== undefined) { 395 //target.authority = relative.authority; 396 target.userinfo = relative.userinfo; 397 target.host = relative.host; 398 target.port = relative.port; 399 target.path = removeDotSegments(relative.path || ""); 400 target.query = relative.query; 401 } 402 else { 403 if (!relative.path) { 404 target.path = base.path; 405 if (relative.query !== undefined) { 406 target.query = relative.query; 407 } 408 else { 409 target.query = base.query; 410 } 411 } 412 else { 413 if (relative.path.charAt(0) === "/") { 414 target.path = removeDotSegments(relative.path); 415 } 416 else { 417 if ((base.userinfo !== undefined || base.host !== undefined || base.port !== undefined) && !base.path) { 418 target.path = "/" + relative.path; 419 } 420 else if (!base.path) { 421 target.path = relative.path; 422 } 423 else { 424 target.path = base.path.slice(0, base.path.lastIndexOf("/") + 1) + relative.path; 425 } 426 target.path = removeDotSegments(target.path); 427 } 428 target.query = relative.query; 429 } 430 //target.authority = base.authority; 431 target.userinfo = base.userinfo; 432 target.host = base.host; 433 target.port = base.port; 434 } 435 target.scheme = base.scheme; 436 } 437 target.fragment = relative.fragment; 438 return target; 439 } 440 ; 441 export function resolve(baseURI, relativeURI, options) { 442 const schemelessOptions = assign({ scheme: 'null' }, options); 443 return serialize(resolveComponents(parse(baseURI, schemelessOptions), parse(relativeURI, schemelessOptions), schemelessOptions, true), schemelessOptions); 444 } 445 ; 446 export function normalize(uri, options) { 447 if (typeof uri === "string") { 448 uri = serialize(parse(uri, options), options); 449 } 450 else if (typeOf(uri) === "object") { 451 uri = parse(serialize(uri, options), options); 452 } 453 return uri; 454 } 455 ; 456 export function equal(uriA, uriB, options) { 457 if (typeof uriA === "string") { 458 uriA = serialize(parse(uriA, options), options); 459 } 460 else if (typeOf(uriA) === "object") { 461 uriA = serialize(uriA, options); 462 } 463 if (typeof uriB === "string") { 464 uriB = serialize(parse(uriB, options), options); 465 } 466 else if (typeOf(uriB) === "object") { 467 uriB = serialize(uriB, options); 468 } 469 return uriA === uriB; 470 } 471 ; 472 export function escapeComponent(str, options) { 473 return str && str.toString().replace((!options || !options.iri ? URI_PROTOCOL.ESCAPE : IRI_PROTOCOL.ESCAPE), pctEncChar); 474 } 475 ; 476 export function unescapeComponent(str, options) { 477 return str && str.toString().replace((!options || !options.iri ? URI_PROTOCOL.PCT_ENCODED : IRI_PROTOCOL.PCT_ENCODED), pctDecChars); 478 } 479 ; 480 //# sourceMappingURL=uri.js.map