utf8.js - twitst4tz - twitter statistics web application

utf8.js (5145B)
      1 /*! https://mths.be/utf8js v2.1.2 by @mathias */
      2 
      3 var stringFromCharCode = String.fromCharCode;
      4 
      5 // Taken from https://mths.be/punycode
      6 function ucs2decode(string) {
      7 	var output = [];
      8 	var counter = 0;
      9 	var length = string.length;
     10 	var value;
     11 	var extra;
     12 	while (counter < length) {
     13 		value = string.charCodeAt(counter++);
     14 		if (value >= 0xD800 && value <= 0xDBFF && counter < length) {
     15 			// high surrogate, and there is a next character
     16 			extra = string.charCodeAt(counter++);
     17 			if ((extra & 0xFC00) == 0xDC00) { // low surrogate
     18 				output.push(((value & 0x3FF) << 10) + (extra & 0x3FF) + 0x10000);
     19 			} else {
     20 				// unmatched surrogate; only append this code unit, in case the next
     21 				// code unit is the high surrogate of a surrogate pair
     22 				output.push(value);
     23 				counter--;
     24 			}
     25 		} else {
     26 			output.push(value);
     27 		}
     28 	}
     29 	return output;
     30 }
     31 
     32 // Taken from https://mths.be/punycode
     33 function ucs2encode(array) {
     34 	var length = array.length;
     35 	var index = -1;
     36 	var value;
     37 	var output = '';
     38 	while (++index < length) {
     39 		value = array[index];
     40 		if (value > 0xFFFF) {
     41 			value -= 0x10000;
     42 			output += stringFromCharCode(value >>> 10 & 0x3FF | 0xD800);
     43 			value = 0xDC00 | value & 0x3FF;
     44 		}
     45 		output += stringFromCharCode(value);
     46 	}
     47 	return output;
     48 }
     49 
     50 function checkScalarValue(codePoint, strict) {
     51 	if (codePoint >= 0xD800 && codePoint <= 0xDFFF) {
     52 		if (strict) {
     53 			throw Error(
     54 				'Lone surrogate U+' + codePoint.toString(16).toUpperCase() +
     55 				' is not a scalar value'
     56 			);
     57 		}
     58 		return false;
     59 	}
     60 	return true;
     61 }
     62 /*--------------------------------------------------------------------------*/
     63 
     64 function createByte(codePoint, shift) {
     65 	return stringFromCharCode(((codePoint >> shift) & 0x3F) | 0x80);
     66 }
     67 
     68 function encodeCodePoint(codePoint, strict) {
     69 	if ((codePoint & 0xFFFFFF80) == 0) { // 1-byte sequence
     70 		return stringFromCharCode(codePoint);
     71 	}
     72 	var symbol = '';
     73 	if ((codePoint & 0xFFFFF800) == 0) { // 2-byte sequence
     74 		symbol = stringFromCharCode(((codePoint >> 6) & 0x1F) | 0xC0);
     75 	}
     76 	else if ((codePoint & 0xFFFF0000) == 0) { // 3-byte sequence
     77 		if (!checkScalarValue(codePoint, strict)) {
     78 			codePoint = 0xFFFD;
     79 		}
     80 		symbol = stringFromCharCode(((codePoint >> 12) & 0x0F) | 0xE0);
     81 		symbol += createByte(codePoint, 6);
     82 	}
     83 	else if ((codePoint & 0xFFE00000) == 0) { // 4-byte sequence
     84 		symbol = stringFromCharCode(((codePoint >> 18) & 0x07) | 0xF0);
     85 		symbol += createByte(codePoint, 12);
     86 		symbol += createByte(codePoint, 6);
     87 	}
     88 	symbol += stringFromCharCode((codePoint & 0x3F) | 0x80);
     89 	return symbol;
     90 }
     91 
     92 function utf8encode(string, opts) {
     93 	opts = opts || {};
     94 	var strict = false !== opts.strict;
     95 
     96 	var codePoints = ucs2decode(string);
     97 	var length = codePoints.length;
     98 	var index = -1;
     99 	var codePoint;
    100 	var byteString = '';
    101 	while (++index < length) {
    102 		codePoint = codePoints[index];
    103 		byteString += encodeCodePoint(codePoint, strict);
    104 	}
    105 	return byteString;
    106 }
    107 
    108 /*--------------------------------------------------------------------------*/
    109 
    110 function readContinuationByte() {
    111 	if (byteIndex >= byteCount) {
    112 		throw Error('Invalid byte index');
    113 	}
    114 
    115 	var continuationByte = byteArray[byteIndex] & 0xFF;
    116 	byteIndex++;
    117 
    118 	if ((continuationByte & 0xC0) == 0x80) {
    119 		return continuationByte & 0x3F;
    120 	}
    121 
    122 	// If we end up here, it’s not a continuation byte
    123 	throw Error('Invalid continuation byte');
    124 }
    125 
    126 function decodeSymbol(strict) {
    127 	var byte1;
    128 	var byte2;
    129 	var byte3;
    130 	var byte4;
    131 	var codePoint;
    132 
    133 	if (byteIndex > byteCount) {
    134 		throw Error('Invalid byte index');
    135 	}
    136 
    137 	if (byteIndex == byteCount) {
    138 		return false;
    139 	}
    140 
    141 	// Read first byte
    142 	byte1 = byteArray[byteIndex] & 0xFF;
    143 	byteIndex++;
    144 
    145 	// 1-byte sequence (no continuation bytes)
    146 	if ((byte1 & 0x80) == 0) {
    147 		return byte1;
    148 	}
    149 
    150 	// 2-byte sequence
    151 	if ((byte1 & 0xE0) == 0xC0) {
    152 		byte2 = readContinuationByte();
    153 		codePoint = ((byte1 & 0x1F) << 6) | byte2;
    154 		if (codePoint >= 0x80) {
    155 			return codePoint;
    156 		} else {
    157 			throw Error('Invalid continuation byte');
    158 		}
    159 	}
    160 
    161 	// 3-byte sequence (may include unpaired surrogates)
    162 	if ((byte1 & 0xF0) == 0xE0) {
    163 		byte2 = readContinuationByte();
    164 		byte3 = readContinuationByte();
    165 		codePoint = ((byte1 & 0x0F) << 12) | (byte2 << 6) | byte3;
    166 		if (codePoint >= 0x0800) {
    167 			return checkScalarValue(codePoint, strict) ? codePoint : 0xFFFD;
    168 		} else {
    169 			throw Error('Invalid continuation byte');
    170 		}
    171 	}
    172 
    173 	// 4-byte sequence
    174 	if ((byte1 & 0xF8) == 0xF0) {
    175 		byte2 = readContinuationByte();
    176 		byte3 = readContinuationByte();
    177 		byte4 = readContinuationByte();
    178 		codePoint = ((byte1 & 0x07) << 0x12) | (byte2 << 0x0C) |
    179 			(byte3 << 0x06) | byte4;
    180 		if (codePoint >= 0x010000 && codePoint <= 0x10FFFF) {
    181 			return codePoint;
    182 		}
    183 	}
    184 
    185 	throw Error('Invalid UTF-8 detected');
    186 }
    187 
    188 var byteArray;
    189 var byteCount;
    190 var byteIndex;
    191 function utf8decode(byteString, opts) {
    192 	opts = opts || {};
    193 	var strict = false !== opts.strict;
    194 
    195 	byteArray = ucs2decode(byteString);
    196 	byteCount = byteArray.length;
    197 	byteIndex = 0;
    198 	var codePoints = [];
    199 	var tmp;
    200 	while ((tmp = decodeSymbol(strict)) !== false) {
    201 		codePoints.push(tmp);
    202 	}
    203 	return ucs2encode(codePoints);
    204 }
    205 
    206 module.exports = {
    207 	version: '2.1.2',
    208 	encode: utf8encode,
    209 	decode: utf8decode
    210 };
	twitst4tz twitter statistics web application
	Log \| Files \| Refs \| README \| LICENSE