twitst4tz

twitter statistics web application
Log | Files | Refs | README | LICENSE

index.js (6036B)


      1 /*eslint no-var:0, prefer-arrow-callback: 0, object-shorthand: 0 */
      2 'use strict';
      3 
      4 
      5 var Punycode = require('punycode');
      6 
      7 
      8 var internals = {};
      9 
     10 
     11 //
     12 // Read rules from file.
     13 //
     14 internals.rules = require('./data/rules.json').map(function (rule) {
     15 
     16   return {
     17     rule: rule,
     18     suffix: rule.replace(/^(\*\.|\!)/, ''),
     19     punySuffix: -1,
     20     wildcard: rule.charAt(0) === '*',
     21     exception: rule.charAt(0) === '!'
     22   };
     23 });
     24 
     25 
     26 //
     27 // Check is given string ends with `suffix`.
     28 //
     29 internals.endsWith = function (str, suffix) {
     30 
     31   return str.indexOf(suffix, str.length - suffix.length) !== -1;
     32 };
     33 
     34 
     35 //
     36 // Find rule for a given domain.
     37 //
     38 internals.findRule = function (domain) {
     39 
     40   var punyDomain = Punycode.toASCII(domain);
     41   return internals.rules.reduce(function (memo, rule) {
     42 
     43     if (rule.punySuffix === -1){
     44       rule.punySuffix = Punycode.toASCII(rule.suffix);
     45     }
     46     if (!internals.endsWith(punyDomain, '.' + rule.punySuffix) && punyDomain !== rule.punySuffix) {
     47       return memo;
     48     }
     49     // This has been commented out as it never seems to run. This is because
     50     // sub tlds always appear after their parents and we never find a shorter
     51     // match.
     52     //if (memo) {
     53     //  var memoSuffix = Punycode.toASCII(memo.suffix);
     54     //  if (memoSuffix.length >= punySuffix.length) {
     55     //    return memo;
     56     //  }
     57     //}
     58     return rule;
     59   }, null);
     60 };
     61 
     62 
     63 //
     64 // Error codes and messages.
     65 //
     66 exports.errorCodes = {
     67   DOMAIN_TOO_SHORT: 'Domain name too short.',
     68   DOMAIN_TOO_LONG: 'Domain name too long. It should be no more than 255 chars.',
     69   LABEL_STARTS_WITH_DASH: 'Domain name label can not start with a dash.',
     70   LABEL_ENDS_WITH_DASH: 'Domain name label can not end with a dash.',
     71   LABEL_TOO_LONG: 'Domain name label should be at most 63 chars long.',
     72   LABEL_TOO_SHORT: 'Domain name label should be at least 1 character long.',
     73   LABEL_INVALID_CHARS: 'Domain name label can only contain alphanumeric characters or dashes.'
     74 };
     75 
     76 
     77 //
     78 // Validate domain name and throw if not valid.
     79 //
     80 // From wikipedia:
     81 //
     82 // Hostnames are composed of series of labels concatenated with dots, as are all
     83 // domain names. Each label must be between 1 and 63 characters long, and the
     84 // entire hostname (including the delimiting dots) has a maximum of 255 chars.
     85 //
     86 // Allowed chars:
     87 //
     88 // * `a-z`
     89 // * `0-9`
     90 // * `-` but not as a starting or ending character
     91 // * `.` as a separator for the textual portions of a domain name
     92 //
     93 // * http://en.wikipedia.org/wiki/Domain_name
     94 // * http://en.wikipedia.org/wiki/Hostname
     95 //
     96 internals.validate = function (input) {
     97 
     98   // Before we can validate we need to take care of IDNs with unicode chars.
     99   var ascii = Punycode.toASCII(input);
    100 
    101   if (ascii.length < 1) {
    102     return 'DOMAIN_TOO_SHORT';
    103   }
    104   if (ascii.length > 255) {
    105     return 'DOMAIN_TOO_LONG';
    106   }
    107 
    108   // Check each part's length and allowed chars.
    109   var labels = ascii.split('.');
    110   var label;
    111 
    112   for (var i = 0; i < labels.length; ++i) {
    113     label = labels[i];
    114     if (!label.length) {
    115       return 'LABEL_TOO_SHORT';
    116     }
    117     if (label.length > 63) {
    118       return 'LABEL_TOO_LONG';
    119     }
    120     if (label.charAt(0) === '-') {
    121       return 'LABEL_STARTS_WITH_DASH';
    122     }
    123     if (label.charAt(label.length - 1) === '-') {
    124       return 'LABEL_ENDS_WITH_DASH';
    125     }
    126     if (!/^[a-z0-9\-]+$/.test(label)) {
    127       return 'LABEL_INVALID_CHARS';
    128     }
    129   }
    130 };
    131 
    132 
    133 //
    134 // Public API
    135 //
    136 
    137 
    138 //
    139 // Parse domain.
    140 //
    141 exports.parse = function (input) {
    142 
    143   if (typeof input !== 'string') {
    144     throw new TypeError('Domain name must be a string.');
    145   }
    146 
    147   // Force domain to lowercase.
    148   var domain = input.slice(0).toLowerCase();
    149 
    150   // Handle FQDN.
    151   // TODO: Simply remove trailing dot?
    152   if (domain.charAt(domain.length - 1) === '.') {
    153     domain = domain.slice(0, domain.length - 1);
    154   }
    155 
    156   // Validate and sanitise input.
    157   var error = internals.validate(domain);
    158   if (error) {
    159     return {
    160       input: input,
    161       error: {
    162         message: exports.errorCodes[error],
    163         code: error
    164       }
    165     };
    166   }
    167 
    168   var parsed = {
    169     input: input,
    170     tld: null,
    171     sld: null,
    172     domain: null,
    173     subdomain: null,
    174     listed: false
    175   };
    176 
    177   var domainParts = domain.split('.');
    178 
    179   // Non-Internet TLD
    180   if (domainParts[domainParts.length - 1] === 'local') {
    181     return parsed;
    182   }
    183 
    184   var handlePunycode = function () {
    185 
    186     if (!/xn--/.test(domain)) {
    187       return parsed;
    188     }
    189     if (parsed.domain) {
    190       parsed.domain = Punycode.toASCII(parsed.domain);
    191     }
    192     if (parsed.subdomain) {
    193       parsed.subdomain = Punycode.toASCII(parsed.subdomain);
    194     }
    195     return parsed;
    196   };
    197 
    198   var rule = internals.findRule(domain);
    199 
    200   // Unlisted tld.
    201   if (!rule) {
    202     if (domainParts.length < 2) {
    203       return parsed;
    204     }
    205     parsed.tld = domainParts.pop();
    206     parsed.sld = domainParts.pop();
    207     parsed.domain = [parsed.sld, parsed.tld].join('.');
    208     if (domainParts.length) {
    209       parsed.subdomain = domainParts.pop();
    210     }
    211     return handlePunycode();
    212   }
    213 
    214   // At this point we know the public suffix is listed.
    215   parsed.listed = true;
    216 
    217   var tldParts = rule.suffix.split('.');
    218   var privateParts = domainParts.slice(0, domainParts.length - tldParts.length);
    219 
    220   if (rule.exception) {
    221     privateParts.push(tldParts.shift());
    222   }
    223 
    224   parsed.tld = tldParts.join('.');
    225 
    226   if (!privateParts.length) {
    227     return handlePunycode();
    228   }
    229 
    230   if (rule.wildcard) {
    231     tldParts.unshift(privateParts.pop());
    232     parsed.tld = tldParts.join('.');
    233   }
    234 
    235   if (!privateParts.length) {
    236     return handlePunycode();
    237   }
    238 
    239   parsed.sld = privateParts.pop();
    240   parsed.domain = [parsed.sld,  parsed.tld].join('.');
    241 
    242   if (privateParts.length) {
    243     parsed.subdomain = privateParts.join('.');
    244   }
    245 
    246   return handlePunycode();
    247 };
    248 
    249 
    250 //
    251 // Get domain.
    252 //
    253 exports.get = function (domain) {
    254 
    255   if (!domain) {
    256     return null;
    257   }
    258   return exports.parse(domain).domain || null;
    259 };
    260 
    261 
    262 //
    263 // Check whether domain belongs to a known public suffix.
    264 //
    265 exports.isValid = function (domain) {
    266 
    267   var parsed = exports.parse(domain);
    268   return Boolean(parsed.domain && parsed.listed);
    269 };