utf7.js (9215B)
1 "use strict"; 2 var Buffer = require("safer-buffer").Buffer; 3 4 // UTF-7 codec, according to https://tools.ietf.org/html/rfc2152 5 // See also below a UTF-7-IMAP codec, according to http://tools.ietf.org/html/rfc3501#section-5.1.3 6 7 exports.utf7 = Utf7Codec; 8 exports.unicode11utf7 = 'utf7'; // Alias UNICODE-1-1-UTF-7 9 function Utf7Codec(codecOptions, iconv) { 10 this.iconv = iconv; 11 }; 12 13 Utf7Codec.prototype.encoder = Utf7Encoder; 14 Utf7Codec.prototype.decoder = Utf7Decoder; 15 Utf7Codec.prototype.bomAware = true; 16 17 18 // -- Encoding 19 20 var nonDirectChars = /[^A-Za-z0-9'\(\),-\.\/:\? \n\r\t]+/g; 21 22 function Utf7Encoder(options, codec) { 23 this.iconv = codec.iconv; 24 } 25 26 Utf7Encoder.prototype.write = function(str) { 27 // Naive implementation. 28 // Non-direct chars are encoded as "+<base64>-"; single "+" char is encoded as "+-". 29 return Buffer.from(str.replace(nonDirectChars, function(chunk) { 30 return "+" + (chunk === '+' ? '' : 31 this.iconv.encode(chunk, 'utf16-be').toString('base64').replace(/=+$/, '')) 32 + "-"; 33 }.bind(this))); 34 } 35 36 Utf7Encoder.prototype.end = function() { 37 } 38 39 40 // -- Decoding 41 42 function Utf7Decoder(options, codec) { 43 this.iconv = codec.iconv; 44 this.inBase64 = false; 45 this.base64Accum = ''; 46 } 47 48 var base64Regex = /[A-Za-z0-9\/+]/; 49 var base64Chars = []; 50 for (var i = 0; i < 256; i++) 51 base64Chars[i] = base64Regex.test(String.fromCharCode(i)); 52 53 var plusChar = '+'.charCodeAt(0), 54 minusChar = '-'.charCodeAt(0), 55 andChar = '&'.charCodeAt(0); 56 57 Utf7Decoder.prototype.write = function(buf) { 58 var res = "", lastI = 0, 59 inBase64 = this.inBase64, 60 base64Accum = this.base64Accum; 61 62 // The decoder is more involved as we must handle chunks in stream. 63 64 for (var i = 0; i < buf.length; i++) { 65 if (!inBase64) { // We're in direct mode. 66 // Write direct chars until '+' 67 if (buf[i] == plusChar) { 68 res += this.iconv.decode(buf.slice(lastI, i), "ascii"); // Write direct chars. 69 lastI = i+1; 70 inBase64 = true; 71 } 72 } else { // We decode base64. 73 if (!base64Chars[buf[i]]) { // Base64 ended. 74 if (i == lastI && buf[i] == minusChar) {// "+-" -> "+" 75 res += "+"; 76 } else { 77 var b64str = base64Accum + buf.slice(lastI, i).toString(); 78 res += this.iconv.decode(Buffer.from(b64str, 'base64'), "utf16-be"); 79 } 80 81 if (buf[i] != minusChar) // Minus is absorbed after base64. 82 i--; 83 84 lastI = i+1; 85 inBase64 = false; 86 base64Accum = ''; 87 } 88 } 89 } 90 91 if (!inBase64) { 92 res += this.iconv.decode(buf.slice(lastI), "ascii"); // Write direct chars. 93 } else { 94 var b64str = base64Accum + buf.slice(lastI).toString(); 95 96 var canBeDecoded = b64str.length - (b64str.length % 8); // Minimal chunk: 2 quads -> 2x3 bytes -> 3 chars. 97 base64Accum = b64str.slice(canBeDecoded); // The rest will be decoded in future. 98 b64str = b64str.slice(0, canBeDecoded); 99 100 res += this.iconv.decode(Buffer.from(b64str, 'base64'), "utf16-be"); 101 } 102 103 this.inBase64 = inBase64; 104 this.base64Accum = base64Accum; 105 106 return res; 107 } 108 109 Utf7Decoder.prototype.end = function() { 110 var res = ""; 111 if (this.inBase64 && this.base64Accum.length > 0) 112 res = this.iconv.decode(Buffer.from(this.base64Accum, 'base64'), "utf16-be"); 113 114 this.inBase64 = false; 115 this.base64Accum = ''; 116 return res; 117 } 118 119 120 // UTF-7-IMAP codec. 121 // RFC3501 Sec. 5.1.3 Modified UTF-7 (http://tools.ietf.org/html/rfc3501#section-5.1.3) 122 // Differences: 123 // * Base64 part is started by "&" instead of "+" 124 // * Direct characters are 0x20-0x7E, except "&" (0x26) 125 // * In Base64, "," is used instead of "/" 126 // * Base64 must not be used to represent direct characters. 127 // * No implicit shift back from Base64 (should always end with '-') 128 // * String must end in non-shifted position. 129 // * "-&" while in base64 is not allowed. 130 131 132 exports.utf7imap = Utf7IMAPCodec; 133 function Utf7IMAPCodec(codecOptions, iconv) { 134 this.iconv = iconv; 135 }; 136 137 Utf7IMAPCodec.prototype.encoder = Utf7IMAPEncoder; 138 Utf7IMAPCodec.prototype.decoder = Utf7IMAPDecoder; 139 Utf7IMAPCodec.prototype.bomAware = true; 140 141 142 // -- Encoding 143 144 function Utf7IMAPEncoder(options, codec) { 145 this.iconv = codec.iconv; 146 this.inBase64 = false; 147 this.base64Accum = Buffer.alloc(6); 148 this.base64AccumIdx = 0; 149 } 150 151 Utf7IMAPEncoder.prototype.write = function(str) { 152 var inBase64 = this.inBase64, 153 base64Accum = this.base64Accum, 154 base64AccumIdx = this.base64AccumIdx, 155 buf = Buffer.alloc(str.length*5 + 10), bufIdx = 0; 156 157 for (var i = 0; i < str.length; i++) { 158 var uChar = str.charCodeAt(i); 159 if (0x20 <= uChar && uChar <= 0x7E) { // Direct character or '&'. 160 if (inBase64) { 161 if (base64AccumIdx > 0) { 162 bufIdx += buf.write(base64Accum.slice(0, base64AccumIdx).toString('base64').replace(/\//g, ',').replace(/=+$/, ''), bufIdx); 163 base64AccumIdx = 0; 164 } 165 166 buf[bufIdx++] = minusChar; // Write '-', then go to direct mode. 167 inBase64 = false; 168 } 169 170 if (!inBase64) { 171 buf[bufIdx++] = uChar; // Write direct character 172 173 if (uChar === andChar) // Ampersand -> '&-' 174 buf[bufIdx++] = minusChar; 175 } 176 177 } else { // Non-direct character 178 if (!inBase64) { 179 buf[bufIdx++] = andChar; // Write '&', then go to base64 mode. 180 inBase64 = true; 181 } 182 if (inBase64) { 183 base64Accum[base64AccumIdx++] = uChar >> 8; 184 base64Accum[base64AccumIdx++] = uChar & 0xFF; 185 186 if (base64AccumIdx == base64Accum.length) { 187 bufIdx += buf.write(base64Accum.toString('base64').replace(/\//g, ','), bufIdx); 188 base64AccumIdx = 0; 189 } 190 } 191 } 192 } 193 194 this.inBase64 = inBase64; 195 this.base64AccumIdx = base64AccumIdx; 196 197 return buf.slice(0, bufIdx); 198 } 199 200 Utf7IMAPEncoder.prototype.end = function() { 201 var buf = Buffer.alloc(10), bufIdx = 0; 202 if (this.inBase64) { 203 if (this.base64AccumIdx > 0) { 204 bufIdx += buf.write(this.base64Accum.slice(0, this.base64AccumIdx).toString('base64').replace(/\//g, ',').replace(/=+$/, ''), bufIdx); 205 this.base64AccumIdx = 0; 206 } 207 208 buf[bufIdx++] = minusChar; // Write '-', then go to direct mode. 209 this.inBase64 = false; 210 } 211 212 return buf.slice(0, bufIdx); 213 } 214 215 216 // -- Decoding 217 218 function Utf7IMAPDecoder(options, codec) { 219 this.iconv = codec.iconv; 220 this.inBase64 = false; 221 this.base64Accum = ''; 222 } 223 224 var base64IMAPChars = base64Chars.slice(); 225 base64IMAPChars[','.charCodeAt(0)] = true; 226 227 Utf7IMAPDecoder.prototype.write = function(buf) { 228 var res = "", lastI = 0, 229 inBase64 = this.inBase64, 230 base64Accum = this.base64Accum; 231 232 // The decoder is more involved as we must handle chunks in stream. 233 // It is forgiving, closer to standard UTF-7 (for example, '-' is optional at the end). 234 235 for (var i = 0; i < buf.length; i++) { 236 if (!inBase64) { // We're in direct mode. 237 // Write direct chars until '&' 238 if (buf[i] == andChar) { 239 res += this.iconv.decode(buf.slice(lastI, i), "ascii"); // Write direct chars. 240 lastI = i+1; 241 inBase64 = true; 242 } 243 } else { // We decode base64. 244 if (!base64IMAPChars[buf[i]]) { // Base64 ended. 245 if (i == lastI && buf[i] == minusChar) { // "&-" -> "&" 246 res += "&"; 247 } else { 248 var b64str = base64Accum + buf.slice(lastI, i).toString().replace(/,/g, '/'); 249 res += this.iconv.decode(Buffer.from(b64str, 'base64'), "utf16-be"); 250 } 251 252 if (buf[i] != minusChar) // Minus may be absorbed after base64. 253 i--; 254 255 lastI = i+1; 256 inBase64 = false; 257 base64Accum = ''; 258 } 259 } 260 } 261 262 if (!inBase64) { 263 res += this.iconv.decode(buf.slice(lastI), "ascii"); // Write direct chars. 264 } else { 265 var b64str = base64Accum + buf.slice(lastI).toString().replace(/,/g, '/'); 266 267 var canBeDecoded = b64str.length - (b64str.length % 8); // Minimal chunk: 2 quads -> 2x3 bytes -> 3 chars. 268 base64Accum = b64str.slice(canBeDecoded); // The rest will be decoded in future. 269 b64str = b64str.slice(0, canBeDecoded); 270 271 res += this.iconv.decode(Buffer.from(b64str, 'base64'), "utf16-be"); 272 } 273 274 this.inBase64 = inBase64; 275 this.base64Accum = base64Accum; 276 277 return res; 278 } 279 280 Utf7IMAPDecoder.prototype.end = function() { 281 var res = ""; 282 if (this.inBase64 && this.base64Accum.length > 0) 283 res = this.iconv.decode(Buffer.from(this.base64Accum, 'base64'), "utf16-be"); 284 285 this.inBase64 = false; 286 this.base64Accum = ''; 287 return res; 288 } 289 290