internal.js (6115B)
1 "use strict"; 2 var Buffer = require("safer-buffer").Buffer; 3 4 // Export Node.js internal encodings. 5 6 module.exports = { 7 // Encodings 8 utf8: { type: "_internal", bomAware: true}, 9 cesu8: { type: "_internal", bomAware: true}, 10 unicode11utf8: "utf8", 11 12 ucs2: { type: "_internal", bomAware: true}, 13 utf16le: "ucs2", 14 15 binary: { type: "_internal" }, 16 base64: { type: "_internal" }, 17 hex: { type: "_internal" }, 18 19 // Codec. 20 _internal: InternalCodec, 21 }; 22 23 //------------------------------------------------------------------------------ 24 25 function InternalCodec(codecOptions, iconv) { 26 this.enc = codecOptions.encodingName; 27 this.bomAware = codecOptions.bomAware; 28 29 if (this.enc === "base64") 30 this.encoder = InternalEncoderBase64; 31 else if (this.enc === "cesu8") { 32 this.enc = "utf8"; // Use utf8 for decoding. 33 this.encoder = InternalEncoderCesu8; 34 35 // Add decoder for versions of Node not supporting CESU-8 36 if (Buffer.from('eda0bdedb2a9', 'hex').toString() !== '💩') { 37 this.decoder = InternalDecoderCesu8; 38 this.defaultCharUnicode = iconv.defaultCharUnicode; 39 } 40 } 41 } 42 43 InternalCodec.prototype.encoder = InternalEncoder; 44 InternalCodec.prototype.decoder = InternalDecoder; 45 46 //------------------------------------------------------------------------------ 47 48 // We use node.js internal decoder. Its signature is the same as ours. 49 var StringDecoder = require('string_decoder').StringDecoder; 50 51 if (!StringDecoder.prototype.end) // Node v0.8 doesn't have this method. 52 StringDecoder.prototype.end = function() {}; 53 54 55 function InternalDecoder(options, codec) { 56 StringDecoder.call(this, codec.enc); 57 } 58 59 InternalDecoder.prototype = StringDecoder.prototype; 60 61 62 //------------------------------------------------------------------------------ 63 // Encoder is mostly trivial 64 65 function InternalEncoder(options, codec) { 66 this.enc = codec.enc; 67 } 68 69 InternalEncoder.prototype.write = function(str) { 70 return Buffer.from(str, this.enc); 71 } 72 73 InternalEncoder.prototype.end = function() { 74 } 75 76 77 //------------------------------------------------------------------------------ 78 // Except base64 encoder, which must keep its state. 79 80 function InternalEncoderBase64(options, codec) { 81 this.prevStr = ''; 82 } 83 84 InternalEncoderBase64.prototype.write = function(str) { 85 str = this.prevStr + str; 86 var completeQuads = str.length - (str.length % 4); 87 this.prevStr = str.slice(completeQuads); 88 str = str.slice(0, completeQuads); 89 90 return Buffer.from(str, "base64"); 91 } 92 93 InternalEncoderBase64.prototype.end = function() { 94 return Buffer.from(this.prevStr, "base64"); 95 } 96 97 98 //------------------------------------------------------------------------------ 99 // CESU-8 encoder is also special. 100 101 function InternalEncoderCesu8(options, codec) { 102 } 103 104 InternalEncoderCesu8.prototype.write = function(str) { 105 var buf = Buffer.alloc(str.length * 3), bufIdx = 0; 106 for (var i = 0; i < str.length; i++) { 107 var charCode = str.charCodeAt(i); 108 // Naive implementation, but it works because CESU-8 is especially easy 109 // to convert from UTF-16 (which all JS strings are encoded in). 110 if (charCode < 0x80) 111 buf[bufIdx++] = charCode; 112 else if (charCode < 0x800) { 113 buf[bufIdx++] = 0xC0 + (charCode >>> 6); 114 buf[bufIdx++] = 0x80 + (charCode & 0x3f); 115 } 116 else { // charCode will always be < 0x10000 in javascript. 117 buf[bufIdx++] = 0xE0 + (charCode >>> 12); 118 buf[bufIdx++] = 0x80 + ((charCode >>> 6) & 0x3f); 119 buf[bufIdx++] = 0x80 + (charCode & 0x3f); 120 } 121 } 122 return buf.slice(0, bufIdx); 123 } 124 125 InternalEncoderCesu8.prototype.end = function() { 126 } 127 128 //------------------------------------------------------------------------------ 129 // CESU-8 decoder is not implemented in Node v4.0+ 130 131 function InternalDecoderCesu8(options, codec) { 132 this.acc = 0; 133 this.contBytes = 0; 134 this.accBytes = 0; 135 this.defaultCharUnicode = codec.defaultCharUnicode; 136 } 137 138 InternalDecoderCesu8.prototype.write = function(buf) { 139 var acc = this.acc, contBytes = this.contBytes, accBytes = this.accBytes, 140 res = ''; 141 for (var i = 0; i < buf.length; i++) { 142 var curByte = buf[i]; 143 if ((curByte & 0xC0) !== 0x80) { // Leading byte 144 if (contBytes > 0) { // Previous code is invalid 145 res += this.defaultCharUnicode; 146 contBytes = 0; 147 } 148 149 if (curByte < 0x80) { // Single-byte code 150 res += String.fromCharCode(curByte); 151 } else if (curByte < 0xE0) { // Two-byte code 152 acc = curByte & 0x1F; 153 contBytes = 1; accBytes = 1; 154 } else if (curByte < 0xF0) { // Three-byte code 155 acc = curByte & 0x0F; 156 contBytes = 2; accBytes = 1; 157 } else { // Four or more are not supported for CESU-8. 158 res += this.defaultCharUnicode; 159 } 160 } else { // Continuation byte 161 if (contBytes > 0) { // We're waiting for it. 162 acc = (acc << 6) | (curByte & 0x3f); 163 contBytes--; accBytes++; 164 if (contBytes === 0) { 165 // Check for overlong encoding, but support Modified UTF-8 (encoding NULL as C0 80) 166 if (accBytes === 2 && acc < 0x80 && acc > 0) 167 res += this.defaultCharUnicode; 168 else if (accBytes === 3 && acc < 0x800) 169 res += this.defaultCharUnicode; 170 else 171 // Actually add character. 172 res += String.fromCharCode(acc); 173 } 174 } else { // Unexpected continuation byte 175 res += this.defaultCharUnicode; 176 } 177 } 178 } 179 this.acc = acc; this.contBytes = contBytes; this.accBytes = accBytes; 180 return res; 181 } 182 183 InternalDecoderCesu8.prototype.end = function() { 184 var res = 0; 185 if (this.contBytes > 0) 186 res += this.defaultCharUnicode; 187 return res; 188 }