| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452 |
- // fetches and converts maxmind lite databases
- 'use strict';
- if(!process.env.npm_package_config_update){
- return;
- }
- var user_agent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.36 Safari/537.36';
- var fs = require('fs');
- var https = require('https');
- var path = require('path');
- var url = require('url');
- var zlib = require('zlib');
- fs.existsSync = fs.existsSync || path.existsSync;
- var async = require('async');
- var colors = require('colors');
- var glob = require('glob');
- var iconv = require('iconv-lite');
- var lazy = require('lazy');
- var rimraf = require('rimraf').sync;
- var unzip = require('unzip');
- var utils = require('../lib/utils');
- var dataPath = path.join(__dirname, '..', 'data');
- var tmpPath = path.join(__dirname, '..', 'tmp');
- var databases = [
- {
- type: 'country',
- url: 'https://geolite.maxmind.com/download/geoip/database/GeoIPCountryCSV.zip',
- src: 'GeoIPCountryWhois.csv',
- dest: 'geoip-country.dat'
- },
- {
- type: 'country',
- url: 'https://geolite.maxmind.com/download/geoip/database/GeoIPv6.csv.gz',
- src: 'GeoIPv6.csv',
- dest: 'geoip-country6.dat'
- },
- {
- type: 'city-extended',
- url: 'https://geolite.maxmind.com/download/geoip/database/GeoLiteCity_CSV/GeoLiteCity-latest.zip',
- src: [
- 'GeoLiteCity-Blocks.csv',
- 'GeoLiteCity-Location.csv'
- ],
- dest: [
- 'geoip-city.dat',
- 'geoip-city-names.dat'
- ]
- },
- {
- type: 'city',
- url: 'https://geolite.maxmind.com/download/geoip/database/GeoLiteCityv6-beta/GeoLiteCityv6.csv.gz',
- src: 'GeoLiteCityv6.csv',
- dest: 'geoip-city6.dat'
- }
- ];
- function mkdir(name) {
- var dir = path.dirname(name);
- if (!fs.existsSync(dir)) {
- fs.mkdirSync(dir);
- }
- }
- // Ref: http://stackoverflow.com/questions/8493195/how-can-i-parse-a-csv-string-with-javascript
- // Return array of string values, or NULL if CSV string not well formed.
- function CSVtoArray(text) {
- var re_valid = /^\s*(?:'[^'\\]*(?:\\[\S\s][^'\\]*)*'|"[^"\\]*(?:\\[\S\s][^"\\]*)*"|[^,'"\s\\]*(?:\s+[^,'"\s\\]+)*)\s*(?:,\s*(?:'[^'\\]*(?:\\[\S\s][^'\\]*)*'|"[^"\\]*(?:\\[\S\s][^"\\]*)*"|[^,'"\s\\]*(?:\s+[^,'"\s\\]+)*)\s*)*$/;
- var re_value = /(?!\s*$)\s*(?:'([^'\\]*(?:\\[\S\s][^'\\]*)*)'|"([^"\\]*(?:\\[\S\s][^"\\]*)*)"|([^,'"\s\\]*(?:\s+[^,'"\s\\]+)*))\s*(?:,|$)/g;
- // Return NULL if input string is not well formed CSV string.
- if (!re_valid.test(text)) return null;
- var a = []; // Initialize array to receive values.
- text.replace(re_value, // "Walk" the string using replace with callback.
- function(m0, m1, m2, m3) {
- // Remove backslash from \' in single quoted values.
- if (m1 !== undefined) a.push(m1.replace(/\\'/g, "'"));
- // Remove backslash from \" in double quoted values.
- else if (m2 !== undefined) a.push(m2.replace(/\\"/g, '"'));
- else if (m3 !== undefined) a.push(m3);
- return ''; // Return empty string.
- });
- // Handle special case of empty last value.
- if (/,\s*$/.test(text)) a.push('');
- return a;
- }
- function fetch(database, cb) {
- var downloadUrl = database.url;
- var fileName = downloadUrl.split('/').pop();
- var gzip = path.extname(fileName) === '.gz';
- if (gzip) {
- fileName = fileName.replace('.gz', '');
- }
- var tmpFile = path.join(tmpPath, fileName);
- if (fs.existsSync(tmpFile)) {
- return cb(null, tmpFile, fileName, database);
- }
- console.log('Fetching ', downloadUrl);
- function getOptions() {
- if (process.env.http_proxy) {
- var options = url.parse(process.env.http_proxy);
- options.path = downloadUrl;
- options.headers = {
- Host: url.parse(downloadUrl).host
- };
- return options;
- } else {
- var options = url.parse(downloadUrl);
- options.headers = {
- 'Host': url.parse(downloadUrl).host,
- 'User-Agent': user_agent
- };
- return options;
- }
- }
- function onResponse(response) {
- var status = response.statusCode;
- if (status !== 200) {
- console.log('ERROR'.red + ': HTTP Request Failed [%d %s]', status, https.STATUS_CODES[status]);
- client.abort();
- process.exit();
- }
- var tmpFilePipe;
- var tmpFileStream = fs.createWriteStream(tmpFile);
- if (gzip) {
- tmpFilePipe = response.pipe(zlib.createGunzip()).pipe(tmpFileStream);
- } else {
- tmpFilePipe = response.pipe(tmpFileStream);
- }
- tmpFilePipe.on('close', function() {
- console.log(' DONE'.green);
- cb(null, tmpFile, fileName, database);
- });
- }
- mkdir(tmpFile);
- var client = https.get(getOptions(), onResponse);
- process.stdout.write('Retrieving ' + fileName + ' ...');
- }
- function extract(tmpFile, tmpFileName, database, cb) {
- if (path.extname(tmpFileName) !== '.zip') {
- cb(null, database);
- } else {
- process.stdout.write('Extracting ' + tmpFileName + ' ...');
- fs.createReadStream(tmpFile)
- .pipe(unzip.Parse())
- .on('entry', function(entry) {
- var fileName = path.basename(entry.path);
- var type = entry.type; // 'Directory' or 'File'
- if (type.toLowerCase() === 'file' && path.extname(fileName) === '.csv') {
- entry.pipe(fs.createWriteStream(path.join(tmpPath, fileName)));
- } else {
- entry.autodrain();
- }
- })
- .on('finish', function() {
- cb(null, database);
- });
- }
- }
- function processCountryData(src, dest, cb) {
- var lines=0;
- function processLine(line) {
- var fields = CSVtoArray(line);
- if (!fields || fields.length < 6) {
- console.log("weird line: %s::", line);
- return;
- }
- lines++;
- var sip;
- var eip;
- var cc = fields[4].replace(/"/g, '');
- var b;
- var bsz;
- var i;
- if (fields[0].match(/:/)) {
- // IPv6
- bsz = 34;
- sip = utils.aton6(fields[0]);
- eip = utils.aton6(fields[1]);
- b = new Buffer(bsz);
- for (i = 0; i < sip.length; i++) {
- b.writeUInt32BE(sip[i], i * 4);
- }
- for (i = 0; i < eip.length; i++) {
- b.writeUInt32BE(eip[i], 16 + (i * 4));
- }
- } else {
- // IPv4
- bsz = 10;
- sip = parseInt(fields[2].replace(/"/g, ''), 10);
- eip = parseInt(fields[3].replace(/"/g, ''), 10);
- b = new Buffer(bsz);
- b.fill(0);
- b.writeUInt32BE(sip, 0);
- b.writeUInt32BE(eip, 4);
- }
- b.write(cc, bsz - 2);
- fs.writeSync(datFile, b, 0, bsz, null);
- if(Date.now() - tstart > 5000) {
- tstart = Date.now();
- process.stdout.write('\nStill working (' + lines + ') ...');
- }
- }
- var dataFile = path.join(dataPath, dest);
- var tmpDataFile = path.join(tmpPath, src);
- rimraf(dataFile);
- mkdir(dataFile);
- process.stdout.write('Processing Data (may take a moment) ...');
- var tstart = Date.now();
- var datFile = fs.openSync(dataFile, "w");
- lazy(fs.createReadStream(tmpDataFile))
- .lines
- .map(function(byteArray) {
- return iconv.decode(byteArray, 'latin1');
- })
- .skip(1)
- .map(processLine)
- .on('pipe', function() {
- console.log(' DONE'.green);
- cb();
- });
- }
- function processCityData(src, dest, cb) {
- var lines = 0;
- function processLine(line) {
- if (line.match(/^Copyright/) || !line.match(/\d/)) {
- return;
- }
- var fields = CSVtoArray(line);
- var sip;
- var eip;
- var locId;
- var b;
- var bsz;
- var i;
- lines++;
- if (fields[0].match(/:/)) {
- // IPv6
- var offset = 0;
- var cc = fields[4];
- var city = fields[6];
- var lat = Math.round(parseFloat(fields[7]) * 10000);
- var lon = Math.round(parseFloat(fields[8]) * 10000);
- var rg = fields[5];
- bsz = 58;
- sip = utils.aton6(fields[0]);
- eip = utils.aton6(fields[1]);
- b = new Buffer(bsz);
- b.fill(0);
- for (i = 0; i < sip.length; i++) {
- b.writeUInt32BE(sip[i], offset);
- offset += 4;
- }
- for (i = 0; i < eip.length; i++) {
- b.writeUInt32BE(eip[i], offset);
- offset += 4;
- }
- b.write(cc, offset);
- b.write(rg, offset + 2);
- b.writeInt32BE(lat, offset + 4);
- b.writeInt32BE(lon, offset + 8);
- b.write(city, offset + 12);
- } else {
- // IPv4
- bsz = 12;
- sip = parseInt(fields[0], 10);
- eip = parseInt(fields[1], 10);
- locId = parseInt(fields[2], 10);
- b = new Buffer(bsz);
- b.fill(0);
- b.writeUInt32BE(sip>>>0, 0);
- b.writeUInt32BE(eip>>>0, 4);
- b.writeUInt32BE(locId>>>0, 8);
- }
- fs.writeSync(datFile, b, 0, b.length, null);
- if(Date.now() - tstart > 5000) {
- tstart = Date.now();
- process.stdout.write('\nStill working (' + lines + ') ...');
- }
- }
- var dataFile = path.join(dataPath, dest);
- var tmpDataFile = path.join(tmpPath, src);
- rimraf(dataFile);
- process.stdout.write('Processing Data (may take a moment) ...');
- var tstart = Date.now();
- var datFile = fs.openSync(dataFile, "w");
- lazy(fs.createReadStream(tmpDataFile))
- .lines
- .map(function(byteArray) {
- return iconv.decode(byteArray, 'latin1');
- })
- .skip(1)
- .map(processLine)
- .on('pipe', cb);
- }
- function processCityDataNames(src, dest, cb) {
- var locId = null;
- function processLine(line, i, a) {
- if (line.match(/^Copyright/) || !line.match(/\d/)) {
- return;
- }
- var b;
- var sz = 64;
- var fields = CSVtoArray(line);
- if (locId === null)
- locId = parseInt(fields[0]);
- else {
- if (parseInt(fields[0]) - 1 > locId) {
- b = new Buffer(sz);
- b.fill(0);
- fs.writeSync(datFile, b, 0, b.length, null);
- }
- locId = parseInt(fields[0]);
- }
- var cc = fields[1];
- var rg = fields[2];
- var city = fields[3];
- var lat = Math.round(parseFloat(fields[5]) * 10000);
- var lon = Math.round(parseFloat(fields[6]) * 10000);
- var metro = parseInt(fields[7]);
- b = new Buffer(sz);
- b.fill(0);
- b.write(cc, 0);
- b.write(rg, 2);
- b.writeInt32BE(lat, 4);
- b.writeInt32BE(lon, 8);
- if(metro){
- b.writeInt32BE(metro, 12);
- }
- b.write(city, 16);
- fs.writeSync(datFile, b, 0, b.length, null);
- }
- var dataFile = path.join(dataPath, dest);
- var tmpDataFile = path.join(tmpPath, src);
- rimraf(dataFile);
- var datFile = fs.openSync(dataFile, "w");
- lazy(fs.createReadStream(tmpDataFile))
- .lines
- .map(function(byteArray) {
- return iconv.decode(byteArray, 'latin1');
- })
- .skip(1)
- .map(processLine)
- .on('pipe', cb);
- }
- function processData(database, cb) {
- var type = database.type;
- var src = database.src;
- var dest = database.dest;
- if (type === 'country') {
- processCountryData(src, dest, cb);
- } else if (type === 'city-extended') {
- processCityData(src[0], dest[0], function() {
- processCityDataNames(src[1], dest[1], function() {
- console.log(' DONE'.green);
- cb();
- });
- });
- } else {
- processCityData(src, dest, function() {
- console.log(' DONE'.green);
- cb();
- });
- }
- }
- rimraf(tmpPath);
- mkdir(tmpPath);
- async.eachSeries(databases, function(database, nextDatabase) {
- async.seq(fetch, extract, processData)(database, nextDatabase);
- }, function(err) {
- if (err) {
- console.log('Failed to Update Databases from MaxMind.'.red);
- process.exit(1);
- } else {
- console.log('Successfully Updated Databases from MaxMind.'.green);
- if (process.argv[2]=='debug') console.log('Notice: temporary files are not deleted for debug purposes.'.bold.yellow);
- else rimraf(tmpPath);
- process.exit(0);
- }
- });
|