twitst4tz

twitter statistics web application
Log | Files | Refs | README | LICENSE

punycode.es6.js (12630B)


      1 'use strict';
      2 
      3 /** Highest positive signed 32-bit float value */
      4 const maxInt = 2147483647; // aka. 0x7FFFFFFF or 2^31-1
      5 
      6 /** Bootstring parameters */
      7 const base = 36;
      8 const tMin = 1;
      9 const tMax = 26;
     10 const skew = 38;
     11 const damp = 700;
     12 const initialBias = 72;
     13 const initialN = 128; // 0x80
     14 const delimiter = '-'; // '\x2D'
     15 
     16 /** Regular expressions */
     17 const regexPunycode = /^xn--/;
     18 const regexNonASCII = /[^\0-\x7E]/; // non-ASCII chars
     19 const regexSeparators = /[\x2E\u3002\uFF0E\uFF61]/g; // RFC 3490 separators
     20 
     21 /** Error messages */
     22 const errors = {
     23 	'overflow': 'Overflow: input needs wider integers to process',
     24 	'not-basic': 'Illegal input >= 0x80 (not a basic code point)',
     25 	'invalid-input': 'Invalid input'
     26 };
     27 
     28 /** Convenience shortcuts */
     29 const baseMinusTMin = base - tMin;
     30 const floor = Math.floor;
     31 const stringFromCharCode = String.fromCharCode;
     32 
     33 /*--------------------------------------------------------------------------*/
     34 
     35 /**
     36  * A generic error utility function.
     37  * @private
     38  * @param {String} type The error type.
     39  * @returns {Error} Throws a `RangeError` with the applicable error message.
     40  */
     41 function error(type) {
     42 	throw new RangeError(errors[type]);
     43 }
     44 
     45 /**
     46  * A generic `Array#map` utility function.
     47  * @private
     48  * @param {Array} array The array to iterate over.
     49  * @param {Function} callback The function that gets called for every array
     50  * item.
     51  * @returns {Array} A new array of values returned by the callback function.
     52  */
     53 function map(array, fn) {
     54 	const result = [];
     55 	let length = array.length;
     56 	while (length--) {
     57 		result[length] = fn(array[length]);
     58 	}
     59 	return result;
     60 }
     61 
     62 /**
     63  * A simple `Array#map`-like wrapper to work with domain name strings or email
     64  * addresses.
     65  * @private
     66  * @param {String} domain The domain name or email address.
     67  * @param {Function} callback The function that gets called for every
     68  * character.
     69  * @returns {Array} A new string of characters returned by the callback
     70  * function.
     71  */
     72 function mapDomain(string, fn) {
     73 	const parts = string.split('@');
     74 	let result = '';
     75 	if (parts.length > 1) {
     76 		// In email addresses, only the domain name should be punycoded. Leave
     77 		// the local part (i.e. everything up to `@`) intact.
     78 		result = parts[0] + '@';
     79 		string = parts[1];
     80 	}
     81 	// Avoid `split(regex)` for IE8 compatibility. See #17.
     82 	string = string.replace(regexSeparators, '\x2E');
     83 	const labels = string.split('.');
     84 	const encoded = map(labels, fn).join('.');
     85 	return result + encoded;
     86 }
     87 
     88 /**
     89  * Creates an array containing the numeric code points of each Unicode
     90  * character in the string. While JavaScript uses UCS-2 internally,
     91  * this function will convert a pair of surrogate halves (each of which
     92  * UCS-2 exposes as separate characters) into a single code point,
     93  * matching UTF-16.
     94  * @see `punycode.ucs2.encode`
     95  * @see <https://mathiasbynens.be/notes/javascript-encoding>
     96  * @memberOf punycode.ucs2
     97  * @name decode
     98  * @param {String} string The Unicode input string (UCS-2).
     99  * @returns {Array} The new array of code points.
    100  */
    101 function ucs2decode(string) {
    102 	const output = [];
    103 	let counter = 0;
    104 	const length = string.length;
    105 	while (counter < length) {
    106 		const value = string.charCodeAt(counter++);
    107 		if (value >= 0xD800 && value <= 0xDBFF && counter < length) {
    108 			// It's a high surrogate, and there is a next character.
    109 			const extra = string.charCodeAt(counter++);
    110 			if ((extra & 0xFC00) == 0xDC00) { // Low surrogate.
    111 				output.push(((value & 0x3FF) << 10) + (extra & 0x3FF) + 0x10000);
    112 			} else {
    113 				// It's an unmatched surrogate; only append this code unit, in case the
    114 				// next code unit is the high surrogate of a surrogate pair.
    115 				output.push(value);
    116 				counter--;
    117 			}
    118 		} else {
    119 			output.push(value);
    120 		}
    121 	}
    122 	return output;
    123 }
    124 
    125 /**
    126  * Creates a string based on an array of numeric code points.
    127  * @see `punycode.ucs2.decode`
    128  * @memberOf punycode.ucs2
    129  * @name encode
    130  * @param {Array} codePoints The array of numeric code points.
    131  * @returns {String} The new Unicode string (UCS-2).
    132  */
    133 const ucs2encode = array => String.fromCodePoint(...array);
    134 
    135 /**
    136  * Converts a basic code point into a digit/integer.
    137  * @see `digitToBasic()`
    138  * @private
    139  * @param {Number} codePoint The basic numeric code point value.
    140  * @returns {Number} The numeric value of a basic code point (for use in
    141  * representing integers) in the range `0` to `base - 1`, or `base` if
    142  * the code point does not represent a value.
    143  */
    144 const basicToDigit = function(codePoint) {
    145 	if (codePoint - 0x30 < 0x0A) {
    146 		return codePoint - 0x16;
    147 	}
    148 	if (codePoint - 0x41 < 0x1A) {
    149 		return codePoint - 0x41;
    150 	}
    151 	if (codePoint - 0x61 < 0x1A) {
    152 		return codePoint - 0x61;
    153 	}
    154 	return base;
    155 };
    156 
    157 /**
    158  * Converts a digit/integer into a basic code point.
    159  * @see `basicToDigit()`
    160  * @private
    161  * @param {Number} digit The numeric value of a basic code point.
    162  * @returns {Number} The basic code point whose value (when used for
    163  * representing integers) is `digit`, which needs to be in the range
    164  * `0` to `base - 1`. If `flag` is non-zero, the uppercase form is
    165  * used; else, the lowercase form is used. The behavior is undefined
    166  * if `flag` is non-zero and `digit` has no uppercase form.
    167  */
    168 const digitToBasic = function(digit, flag) {
    169 	//  0..25 map to ASCII a..z or A..Z
    170 	// 26..35 map to ASCII 0..9
    171 	return digit + 22 + 75 * (digit < 26) - ((flag != 0) << 5);
    172 };
    173 
    174 /**
    175  * Bias adaptation function as per section 3.4 of RFC 3492.
    176  * https://tools.ietf.org/html/rfc3492#section-3.4
    177  * @private
    178  */
    179 const adapt = function(delta, numPoints, firstTime) {
    180 	let k = 0;
    181 	delta = firstTime ? floor(delta / damp) : delta >> 1;
    182 	delta += floor(delta / numPoints);
    183 	for (/* no initialization */; delta > baseMinusTMin * tMax >> 1; k += base) {
    184 		delta = floor(delta / baseMinusTMin);
    185 	}
    186 	return floor(k + (baseMinusTMin + 1) * delta / (delta + skew));
    187 };
    188 
    189 /**
    190  * Converts a Punycode string of ASCII-only symbols to a string of Unicode
    191  * symbols.
    192  * @memberOf punycode
    193  * @param {String} input The Punycode string of ASCII-only symbols.
    194  * @returns {String} The resulting string of Unicode symbols.
    195  */
    196 const decode = function(input) {
    197 	// Don't use UCS-2.
    198 	const output = [];
    199 	const inputLength = input.length;
    200 	let i = 0;
    201 	let n = initialN;
    202 	let bias = initialBias;
    203 
    204 	// Handle the basic code points: let `basic` be the number of input code
    205 	// points before the last delimiter, or `0` if there is none, then copy
    206 	// the first basic code points to the output.
    207 
    208 	let basic = input.lastIndexOf(delimiter);
    209 	if (basic < 0) {
    210 		basic = 0;
    211 	}
    212 
    213 	for (let j = 0; j < basic; ++j) {
    214 		// if it's not a basic code point
    215 		if (input.charCodeAt(j) >= 0x80) {
    216 			error('not-basic');
    217 		}
    218 		output.push(input.charCodeAt(j));
    219 	}
    220 
    221 	// Main decoding loop: start just after the last delimiter if any basic code
    222 	// points were copied; start at the beginning otherwise.
    223 
    224 	for (let index = basic > 0 ? basic + 1 : 0; index < inputLength; /* no final expression */) {
    225 
    226 		// `index` is the index of the next character to be consumed.
    227 		// Decode a generalized variable-length integer into `delta`,
    228 		// which gets added to `i`. The overflow checking is easier
    229 		// if we increase `i` as we go, then subtract off its starting
    230 		// value at the end to obtain `delta`.
    231 		let oldi = i;
    232 		for (let w = 1, k = base; /* no condition */; k += base) {
    233 
    234 			if (index >= inputLength) {
    235 				error('invalid-input');
    236 			}
    237 
    238 			const digit = basicToDigit(input.charCodeAt(index++));
    239 
    240 			if (digit >= base || digit > floor((maxInt - i) / w)) {
    241 				error('overflow');
    242 			}
    243 
    244 			i += digit * w;
    245 			const t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);
    246 
    247 			if (digit < t) {
    248 				break;
    249 			}
    250 
    251 			const baseMinusT = base - t;
    252 			if (w > floor(maxInt / baseMinusT)) {
    253 				error('overflow');
    254 			}
    255 
    256 			w *= baseMinusT;
    257 
    258 		}
    259 
    260 		const out = output.length + 1;
    261 		bias = adapt(i - oldi, out, oldi == 0);
    262 
    263 		// `i` was supposed to wrap around from `out` to `0`,
    264 		// incrementing `n` each time, so we'll fix that now:
    265 		if (floor(i / out) > maxInt - n) {
    266 			error('overflow');
    267 		}
    268 
    269 		n += floor(i / out);
    270 		i %= out;
    271 
    272 		// Insert `n` at position `i` of the output.
    273 		output.splice(i++, 0, n);
    274 
    275 	}
    276 
    277 	return String.fromCodePoint(...output);
    278 };
    279 
    280 /**
    281  * Converts a string of Unicode symbols (e.g. a domain name label) to a
    282  * Punycode string of ASCII-only symbols.
    283  * @memberOf punycode
    284  * @param {String} input The string of Unicode symbols.
    285  * @returns {String} The resulting Punycode string of ASCII-only symbols.
    286  */
    287 const encode = function(input) {
    288 	const output = [];
    289 
    290 	// Convert the input in UCS-2 to an array of Unicode code points.
    291 	input = ucs2decode(input);
    292 
    293 	// Cache the length.
    294 	let inputLength = input.length;
    295 
    296 	// Initialize the state.
    297 	let n = initialN;
    298 	let delta = 0;
    299 	let bias = initialBias;
    300 
    301 	// Handle the basic code points.
    302 	for (const currentValue of input) {
    303 		if (currentValue < 0x80) {
    304 			output.push(stringFromCharCode(currentValue));
    305 		}
    306 	}
    307 
    308 	let basicLength = output.length;
    309 	let handledCPCount = basicLength;
    310 
    311 	// `handledCPCount` is the number of code points that have been handled;
    312 	// `basicLength` is the number of basic code points.
    313 
    314 	// Finish the basic string with a delimiter unless it's empty.
    315 	if (basicLength) {
    316 		output.push(delimiter);
    317 	}
    318 
    319 	// Main encoding loop:
    320 	while (handledCPCount < inputLength) {
    321 
    322 		// All non-basic code points < n have been handled already. Find the next
    323 		// larger one:
    324 		let m = maxInt;
    325 		for (const currentValue of input) {
    326 			if (currentValue >= n && currentValue < m) {
    327 				m = currentValue;
    328 			}
    329 		}
    330 
    331 		// Increase `delta` enough to advance the decoder's <n,i> state to <m,0>,
    332 		// but guard against overflow.
    333 		const handledCPCountPlusOne = handledCPCount + 1;
    334 		if (m - n > floor((maxInt - delta) / handledCPCountPlusOne)) {
    335 			error('overflow');
    336 		}
    337 
    338 		delta += (m - n) * handledCPCountPlusOne;
    339 		n = m;
    340 
    341 		for (const currentValue of input) {
    342 			if (currentValue < n && ++delta > maxInt) {
    343 				error('overflow');
    344 			}
    345 			if (currentValue == n) {
    346 				// Represent delta as a generalized variable-length integer.
    347 				let q = delta;
    348 				for (let k = base; /* no condition */; k += base) {
    349 					const t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);
    350 					if (q < t) {
    351 						break;
    352 					}
    353 					const qMinusT = q - t;
    354 					const baseMinusT = base - t;
    355 					output.push(
    356 						stringFromCharCode(digitToBasic(t + qMinusT % baseMinusT, 0))
    357 					);
    358 					q = floor(qMinusT / baseMinusT);
    359 				}
    360 
    361 				output.push(stringFromCharCode(digitToBasic(q, 0)));
    362 				bias = adapt(delta, handledCPCountPlusOne, handledCPCount == basicLength);
    363 				delta = 0;
    364 				++handledCPCount;
    365 			}
    366 		}
    367 
    368 		++delta;
    369 		++n;
    370 
    371 	}
    372 	return output.join('');
    373 };
    374 
    375 /**
    376  * Converts a Punycode string representing a domain name or an email address
    377  * to Unicode. Only the Punycoded parts of the input will be converted, i.e.
    378  * it doesn't matter if you call it on a string that has already been
    379  * converted to Unicode.
    380  * @memberOf punycode
    381  * @param {String} input The Punycoded domain name or email address to
    382  * convert to Unicode.
    383  * @returns {String} The Unicode representation of the given Punycode
    384  * string.
    385  */
    386 const toUnicode = function(input) {
    387 	return mapDomain(input, function(string) {
    388 		return regexPunycode.test(string)
    389 			? decode(string.slice(4).toLowerCase())
    390 			: string;
    391 	});
    392 };
    393 
    394 /**
    395  * Converts a Unicode string representing a domain name or an email address to
    396  * Punycode. Only the non-ASCII parts of the domain name will be converted,
    397  * i.e. it doesn't matter if you call it with a domain that's already in
    398  * ASCII.
    399  * @memberOf punycode
    400  * @param {String} input The domain name or email address to convert, as a
    401  * Unicode string.
    402  * @returns {String} The Punycode representation of the given domain name or
    403  * email address.
    404  */
    405 const toASCII = function(input) {
    406 	return mapDomain(input, function(string) {
    407 		return regexNonASCII.test(string)
    408 			? 'xn--' + encode(string)
    409 			: string;
    410 	});
    411 };
    412 
    413 /*--------------------------------------------------------------------------*/
    414 
    415 /** Define the public API */
    416 const punycode = {
    417 	/**
    418 	 * A string representing the current Punycode.js version number.
    419 	 * @memberOf punycode
    420 	 * @type String
    421 	 */
    422 	'version': '2.1.0',
    423 	/**
    424 	 * An object of methods to convert from JavaScript's internal character
    425 	 * representation (UCS-2) to Unicode code points, and back.
    426 	 * @see <https://mathiasbynens.be/notes/javascript-encoding>
    427 	 * @memberOf punycode
    428 	 * @type Object
    429 	 */
    430 	'ucs2': {
    431 		'decode': ucs2decode,
    432 		'encode': ucs2encode
    433 	},
    434 	'decode': decode,
    435 	'encode': encode,
    436 	'toASCII': toASCII,
    437 	'toUnicode': toUnicode
    438 };
    439 
    440 export { ucs2decode, ucs2encode, decode, encode, toASCII, toUnicode };
    441 export default punycode;