twitst4tz

twitter statistics web application
Log | Files | Refs | README | LICENSE

utf7.js (9215B)


      1 "use strict";
      2 var Buffer = require("safer-buffer").Buffer;
      3 
      4 // UTF-7 codec, according to https://tools.ietf.org/html/rfc2152
      5 // See also below a UTF-7-IMAP codec, according to http://tools.ietf.org/html/rfc3501#section-5.1.3
      6 
      7 exports.utf7 = Utf7Codec;
      8 exports.unicode11utf7 = 'utf7'; // Alias UNICODE-1-1-UTF-7
      9 function Utf7Codec(codecOptions, iconv) {
     10     this.iconv = iconv;
     11 };
     12 
     13 Utf7Codec.prototype.encoder = Utf7Encoder;
     14 Utf7Codec.prototype.decoder = Utf7Decoder;
     15 Utf7Codec.prototype.bomAware = true;
     16 
     17 
     18 // -- Encoding
     19 
     20 var nonDirectChars = /[^A-Za-z0-9'\(\),-\.\/:\? \n\r\t]+/g;
     21 
     22 function Utf7Encoder(options, codec) {
     23     this.iconv = codec.iconv;
     24 }
     25 
     26 Utf7Encoder.prototype.write = function(str) {
     27     // Naive implementation.
     28     // Non-direct chars are encoded as "+<base64>-"; single "+" char is encoded as "+-".
     29     return Buffer.from(str.replace(nonDirectChars, function(chunk) {
     30         return "+" + (chunk === '+' ? '' : 
     31             this.iconv.encode(chunk, 'utf16-be').toString('base64').replace(/=+$/, '')) 
     32             + "-";
     33     }.bind(this)));
     34 }
     35 
     36 Utf7Encoder.prototype.end = function() {
     37 }
     38 
     39 
     40 // -- Decoding
     41 
     42 function Utf7Decoder(options, codec) {
     43     this.iconv = codec.iconv;
     44     this.inBase64 = false;
     45     this.base64Accum = '';
     46 }
     47 
     48 var base64Regex = /[A-Za-z0-9\/+]/;
     49 var base64Chars = [];
     50 for (var i = 0; i < 256; i++)
     51     base64Chars[i] = base64Regex.test(String.fromCharCode(i));
     52 
     53 var plusChar = '+'.charCodeAt(0), 
     54     minusChar = '-'.charCodeAt(0),
     55     andChar = '&'.charCodeAt(0);
     56 
     57 Utf7Decoder.prototype.write = function(buf) {
     58     var res = "", lastI = 0,
     59         inBase64 = this.inBase64,
     60         base64Accum = this.base64Accum;
     61 
     62     // The decoder is more involved as we must handle chunks in stream.
     63 
     64     for (var i = 0; i < buf.length; i++) {
     65         if (!inBase64) { // We're in direct mode.
     66             // Write direct chars until '+'
     67             if (buf[i] == plusChar) {
     68                 res += this.iconv.decode(buf.slice(lastI, i), "ascii"); // Write direct chars.
     69                 lastI = i+1;
     70                 inBase64 = true;
     71             }
     72         } else { // We decode base64.
     73             if (!base64Chars[buf[i]]) { // Base64 ended.
     74                 if (i == lastI && buf[i] == minusChar) {// "+-" -> "+"
     75                     res += "+";
     76                 } else {
     77                     var b64str = base64Accum + buf.slice(lastI, i).toString();
     78                     res += this.iconv.decode(Buffer.from(b64str, 'base64'), "utf16-be");
     79                 }
     80 
     81                 if (buf[i] != minusChar) // Minus is absorbed after base64.
     82                     i--;
     83 
     84                 lastI = i+1;
     85                 inBase64 = false;
     86                 base64Accum = '';
     87             }
     88         }
     89     }
     90 
     91     if (!inBase64) {
     92         res += this.iconv.decode(buf.slice(lastI), "ascii"); // Write direct chars.
     93     } else {
     94         var b64str = base64Accum + buf.slice(lastI).toString();
     95 
     96         var canBeDecoded = b64str.length - (b64str.length % 8); // Minimal chunk: 2 quads -> 2x3 bytes -> 3 chars.
     97         base64Accum = b64str.slice(canBeDecoded); // The rest will be decoded in future.
     98         b64str = b64str.slice(0, canBeDecoded);
     99 
    100         res += this.iconv.decode(Buffer.from(b64str, 'base64'), "utf16-be");
    101     }
    102 
    103     this.inBase64 = inBase64;
    104     this.base64Accum = base64Accum;
    105 
    106     return res;
    107 }
    108 
    109 Utf7Decoder.prototype.end = function() {
    110     var res = "";
    111     if (this.inBase64 && this.base64Accum.length > 0)
    112         res = this.iconv.decode(Buffer.from(this.base64Accum, 'base64'), "utf16-be");
    113 
    114     this.inBase64 = false;
    115     this.base64Accum = '';
    116     return res;
    117 }
    118 
    119 
    120 // UTF-7-IMAP codec.
    121 // RFC3501 Sec. 5.1.3 Modified UTF-7 (http://tools.ietf.org/html/rfc3501#section-5.1.3)
    122 // Differences:
    123 //  * Base64 part is started by "&" instead of "+"
    124 //  * Direct characters are 0x20-0x7E, except "&" (0x26)
    125 //  * In Base64, "," is used instead of "/"
    126 //  * Base64 must not be used to represent direct characters.
    127 //  * No implicit shift back from Base64 (should always end with '-')
    128 //  * String must end in non-shifted position.
    129 //  * "-&" while in base64 is not allowed.
    130 
    131 
    132 exports.utf7imap = Utf7IMAPCodec;
    133 function Utf7IMAPCodec(codecOptions, iconv) {
    134     this.iconv = iconv;
    135 };
    136 
    137 Utf7IMAPCodec.prototype.encoder = Utf7IMAPEncoder;
    138 Utf7IMAPCodec.prototype.decoder = Utf7IMAPDecoder;
    139 Utf7IMAPCodec.prototype.bomAware = true;
    140 
    141 
    142 // -- Encoding
    143 
    144 function Utf7IMAPEncoder(options, codec) {
    145     this.iconv = codec.iconv;
    146     this.inBase64 = false;
    147     this.base64Accum = Buffer.alloc(6);
    148     this.base64AccumIdx = 0;
    149 }
    150 
    151 Utf7IMAPEncoder.prototype.write = function(str) {
    152     var inBase64 = this.inBase64,
    153         base64Accum = this.base64Accum,
    154         base64AccumIdx = this.base64AccumIdx,
    155         buf = Buffer.alloc(str.length*5 + 10), bufIdx = 0;
    156 
    157     for (var i = 0; i < str.length; i++) {
    158         var uChar = str.charCodeAt(i);
    159         if (0x20 <= uChar && uChar <= 0x7E) { // Direct character or '&'.
    160             if (inBase64) {
    161                 if (base64AccumIdx > 0) {
    162                     bufIdx += buf.write(base64Accum.slice(0, base64AccumIdx).toString('base64').replace(/\//g, ',').replace(/=+$/, ''), bufIdx);
    163                     base64AccumIdx = 0;
    164                 }
    165 
    166                 buf[bufIdx++] = minusChar; // Write '-', then go to direct mode.
    167                 inBase64 = false;
    168             }
    169 
    170             if (!inBase64) {
    171                 buf[bufIdx++] = uChar; // Write direct character
    172 
    173                 if (uChar === andChar)  // Ampersand -> '&-'
    174                     buf[bufIdx++] = minusChar;
    175             }
    176 
    177         } else { // Non-direct character
    178             if (!inBase64) {
    179                 buf[bufIdx++] = andChar; // Write '&', then go to base64 mode.
    180                 inBase64 = true;
    181             }
    182             if (inBase64) {
    183                 base64Accum[base64AccumIdx++] = uChar >> 8;
    184                 base64Accum[base64AccumIdx++] = uChar & 0xFF;
    185 
    186                 if (base64AccumIdx == base64Accum.length) {
    187                     bufIdx += buf.write(base64Accum.toString('base64').replace(/\//g, ','), bufIdx);
    188                     base64AccumIdx = 0;
    189                 }
    190             }
    191         }
    192     }
    193 
    194     this.inBase64 = inBase64;
    195     this.base64AccumIdx = base64AccumIdx;
    196 
    197     return buf.slice(0, bufIdx);
    198 }
    199 
    200 Utf7IMAPEncoder.prototype.end = function() {
    201     var buf = Buffer.alloc(10), bufIdx = 0;
    202     if (this.inBase64) {
    203         if (this.base64AccumIdx > 0) {
    204             bufIdx += buf.write(this.base64Accum.slice(0, this.base64AccumIdx).toString('base64').replace(/\//g, ',').replace(/=+$/, ''), bufIdx);
    205             this.base64AccumIdx = 0;
    206         }
    207 
    208         buf[bufIdx++] = minusChar; // Write '-', then go to direct mode.
    209         this.inBase64 = false;
    210     }
    211 
    212     return buf.slice(0, bufIdx);
    213 }
    214 
    215 
    216 // -- Decoding
    217 
    218 function Utf7IMAPDecoder(options, codec) {
    219     this.iconv = codec.iconv;
    220     this.inBase64 = false;
    221     this.base64Accum = '';
    222 }
    223 
    224 var base64IMAPChars = base64Chars.slice();
    225 base64IMAPChars[','.charCodeAt(0)] = true;
    226 
    227 Utf7IMAPDecoder.prototype.write = function(buf) {
    228     var res = "", lastI = 0,
    229         inBase64 = this.inBase64,
    230         base64Accum = this.base64Accum;
    231 
    232     // The decoder is more involved as we must handle chunks in stream.
    233     // It is forgiving, closer to standard UTF-7 (for example, '-' is optional at the end).
    234 
    235     for (var i = 0; i < buf.length; i++) {
    236         if (!inBase64) { // We're in direct mode.
    237             // Write direct chars until '&'
    238             if (buf[i] == andChar) {
    239                 res += this.iconv.decode(buf.slice(lastI, i), "ascii"); // Write direct chars.
    240                 lastI = i+1;
    241                 inBase64 = true;
    242             }
    243         } else { // We decode base64.
    244             if (!base64IMAPChars[buf[i]]) { // Base64 ended.
    245                 if (i == lastI && buf[i] == minusChar) { // "&-" -> "&"
    246                     res += "&";
    247                 } else {
    248                     var b64str = base64Accum + buf.slice(lastI, i).toString().replace(/,/g, '/');
    249                     res += this.iconv.decode(Buffer.from(b64str, 'base64'), "utf16-be");
    250                 }
    251 
    252                 if (buf[i] != minusChar) // Minus may be absorbed after base64.
    253                     i--;
    254 
    255                 lastI = i+1;
    256                 inBase64 = false;
    257                 base64Accum = '';
    258             }
    259         }
    260     }
    261 
    262     if (!inBase64) {
    263         res += this.iconv.decode(buf.slice(lastI), "ascii"); // Write direct chars.
    264     } else {
    265         var b64str = base64Accum + buf.slice(lastI).toString().replace(/,/g, '/');
    266 
    267         var canBeDecoded = b64str.length - (b64str.length % 8); // Minimal chunk: 2 quads -> 2x3 bytes -> 3 chars.
    268         base64Accum = b64str.slice(canBeDecoded); // The rest will be decoded in future.
    269         b64str = b64str.slice(0, canBeDecoded);
    270 
    271         res += this.iconv.decode(Buffer.from(b64str, 'base64'), "utf16-be");
    272     }
    273 
    274     this.inBase64 = inBase64;
    275     this.base64Accum = base64Accum;
    276 
    277     return res;
    278 }
    279 
    280 Utf7IMAPDecoder.prototype.end = function() {
    281     var res = "";
    282     if (this.inBase64 && this.base64Accum.length > 0)
    283         res = this.iconv.decode(Buffer.from(this.base64Accum, 'base64'), "utf16-be");
    284 
    285     this.inBase64 = false;
    286     this.base64Accum = '';
    287     return res;
    288 }
    289 
    290