updatedb.js 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452
  1. // fetches and converts maxmind lite databases
  2. 'use strict';
  3. if(!process.env.npm_package_config_update){
  4. return;
  5. }
  6. var user_agent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.36 Safari/537.36';
  7. var fs = require('fs');
  8. var https = require('https');
  9. var path = require('path');
  10. var url = require('url');
  11. var zlib = require('zlib');
  12. fs.existsSync = fs.existsSync || path.existsSync;
  13. var async = require('async');
  14. var colors = require('colors');
  15. var glob = require('glob');
  16. var iconv = require('iconv-lite');
  17. var lazy = require('lazy');
  18. var rimraf = require('rimraf').sync;
  19. var unzip = require('unzip');
  20. var utils = require('../lib/utils');
  21. var dataPath = path.join(__dirname, '..', 'data');
  22. var tmpPath = path.join(__dirname, '..', 'tmp');
  23. var databases = [
  24. {
  25. type: 'country',
  26. url: 'https://geolite.maxmind.com/download/geoip/database/GeoIPCountryCSV.zip',
  27. src: 'GeoIPCountryWhois.csv',
  28. dest: 'geoip-country.dat'
  29. },
  30. {
  31. type: 'country',
  32. url: 'https://geolite.maxmind.com/download/geoip/database/GeoIPv6.csv.gz',
  33. src: 'GeoIPv6.csv',
  34. dest: 'geoip-country6.dat'
  35. },
  36. {
  37. type: 'city-extended',
  38. url: 'https://geolite.maxmind.com/download/geoip/database/GeoLiteCity_CSV/GeoLiteCity-latest.zip',
  39. src: [
  40. 'GeoLiteCity-Blocks.csv',
  41. 'GeoLiteCity-Location.csv'
  42. ],
  43. dest: [
  44. 'geoip-city.dat',
  45. 'geoip-city-names.dat'
  46. ]
  47. },
  48. {
  49. type: 'city',
  50. url: 'https://geolite.maxmind.com/download/geoip/database/GeoLiteCityv6-beta/GeoLiteCityv6.csv.gz',
  51. src: 'GeoLiteCityv6.csv',
  52. dest: 'geoip-city6.dat'
  53. }
  54. ];
  55. function mkdir(name) {
  56. var dir = path.dirname(name);
  57. if (!fs.existsSync(dir)) {
  58. fs.mkdirSync(dir);
  59. }
  60. }
  61. // Ref: http://stackoverflow.com/questions/8493195/how-can-i-parse-a-csv-string-with-javascript
  62. // Return array of string values, or NULL if CSV string not well formed.
  63. function CSVtoArray(text) {
  64. var re_valid = /^\s*(?:'[^'\\]*(?:\\[\S\s][^'\\]*)*'|"[^"\\]*(?:\\[\S\s][^"\\]*)*"|[^,'"\s\\]*(?:\s+[^,'"\s\\]+)*)\s*(?:,\s*(?:'[^'\\]*(?:\\[\S\s][^'\\]*)*'|"[^"\\]*(?:\\[\S\s][^"\\]*)*"|[^,'"\s\\]*(?:\s+[^,'"\s\\]+)*)\s*)*$/;
  65. var re_value = /(?!\s*$)\s*(?:'([^'\\]*(?:\\[\S\s][^'\\]*)*)'|"([^"\\]*(?:\\[\S\s][^"\\]*)*)"|([^,'"\s\\]*(?:\s+[^,'"\s\\]+)*))\s*(?:,|$)/g;
  66. // Return NULL if input string is not well formed CSV string.
  67. if (!re_valid.test(text)) return null;
  68. var a = []; // Initialize array to receive values.
  69. text.replace(re_value, // "Walk" the string using replace with callback.
  70. function(m0, m1, m2, m3) {
  71. // Remove backslash from \' in single quoted values.
  72. if (m1 !== undefined) a.push(m1.replace(/\\'/g, "'"));
  73. // Remove backslash from \" in double quoted values.
  74. else if (m2 !== undefined) a.push(m2.replace(/\\"/g, '"'));
  75. else if (m3 !== undefined) a.push(m3);
  76. return ''; // Return empty string.
  77. });
  78. // Handle special case of empty last value.
  79. if (/,\s*$/.test(text)) a.push('');
  80. return a;
  81. }
  82. function fetch(database, cb) {
  83. var downloadUrl = database.url;
  84. var fileName = downloadUrl.split('/').pop();
  85. var gzip = path.extname(fileName) === '.gz';
  86. if (gzip) {
  87. fileName = fileName.replace('.gz', '');
  88. }
  89. var tmpFile = path.join(tmpPath, fileName);
  90. if (fs.existsSync(tmpFile)) {
  91. return cb(null, tmpFile, fileName, database);
  92. }
  93. console.log('Fetching ', downloadUrl);
  94. function getOptions() {
  95. if (process.env.http_proxy) {
  96. var options = url.parse(process.env.http_proxy);
  97. options.path = downloadUrl;
  98. options.headers = {
  99. Host: url.parse(downloadUrl).host
  100. };
  101. return options;
  102. } else {
  103. var options = url.parse(downloadUrl);
  104. options.headers = {
  105. 'Host': url.parse(downloadUrl).host,
  106. 'User-Agent': user_agent
  107. };
  108. return options;
  109. }
  110. }
  111. function onResponse(response) {
  112. var status = response.statusCode;
  113. if (status !== 200) {
  114. console.log('ERROR'.red + ': HTTP Request Failed [%d %s]', status, https.STATUS_CODES[status]);
  115. client.abort();
  116. process.exit();
  117. }
  118. var tmpFilePipe;
  119. var tmpFileStream = fs.createWriteStream(tmpFile);
  120. if (gzip) {
  121. tmpFilePipe = response.pipe(zlib.createGunzip()).pipe(tmpFileStream);
  122. } else {
  123. tmpFilePipe = response.pipe(tmpFileStream);
  124. }
  125. tmpFilePipe.on('close', function() {
  126. console.log(' DONE'.green);
  127. cb(null, tmpFile, fileName, database);
  128. });
  129. }
  130. mkdir(tmpFile);
  131. var client = https.get(getOptions(), onResponse);
  132. process.stdout.write('Retrieving ' + fileName + ' ...');
  133. }
  134. function extract(tmpFile, tmpFileName, database, cb) {
  135. if (path.extname(tmpFileName) !== '.zip') {
  136. cb(null, database);
  137. } else {
  138. process.stdout.write('Extracting ' + tmpFileName + ' ...');
  139. fs.createReadStream(tmpFile)
  140. .pipe(unzip.Parse())
  141. .on('entry', function(entry) {
  142. var fileName = path.basename(entry.path);
  143. var type = entry.type; // 'Directory' or 'File'
  144. if (type.toLowerCase() === 'file' && path.extname(fileName) === '.csv') {
  145. entry.pipe(fs.createWriteStream(path.join(tmpPath, fileName)));
  146. } else {
  147. entry.autodrain();
  148. }
  149. })
  150. .on('finish', function() {
  151. cb(null, database);
  152. });
  153. }
  154. }
  155. function processCountryData(src, dest, cb) {
  156. var lines=0;
  157. function processLine(line) {
  158. var fields = CSVtoArray(line);
  159. if (!fields || fields.length < 6) {
  160. console.log("weird line: %s::", line);
  161. return;
  162. }
  163. lines++;
  164. var sip;
  165. var eip;
  166. var cc = fields[4].replace(/"/g, '');
  167. var b;
  168. var bsz;
  169. var i;
  170. if (fields[0].match(/:/)) {
  171. // IPv6
  172. bsz = 34;
  173. sip = utils.aton6(fields[0]);
  174. eip = utils.aton6(fields[1]);
  175. b = new Buffer(bsz);
  176. for (i = 0; i < sip.length; i++) {
  177. b.writeUInt32BE(sip[i], i * 4);
  178. }
  179. for (i = 0; i < eip.length; i++) {
  180. b.writeUInt32BE(eip[i], 16 + (i * 4));
  181. }
  182. } else {
  183. // IPv4
  184. bsz = 10;
  185. sip = parseInt(fields[2].replace(/"/g, ''), 10);
  186. eip = parseInt(fields[3].replace(/"/g, ''), 10);
  187. b = new Buffer(bsz);
  188. b.fill(0);
  189. b.writeUInt32BE(sip, 0);
  190. b.writeUInt32BE(eip, 4);
  191. }
  192. b.write(cc, bsz - 2);
  193. fs.writeSync(datFile, b, 0, bsz, null);
  194. if(Date.now() - tstart > 5000) {
  195. tstart = Date.now();
  196. process.stdout.write('\nStill working (' + lines + ') ...');
  197. }
  198. }
  199. var dataFile = path.join(dataPath, dest);
  200. var tmpDataFile = path.join(tmpPath, src);
  201. rimraf(dataFile);
  202. mkdir(dataFile);
  203. process.stdout.write('Processing Data (may take a moment) ...');
  204. var tstart = Date.now();
  205. var datFile = fs.openSync(dataFile, "w");
  206. lazy(fs.createReadStream(tmpDataFile))
  207. .lines
  208. .map(function(byteArray) {
  209. return iconv.decode(byteArray, 'latin1');
  210. })
  211. .skip(1)
  212. .map(processLine)
  213. .on('pipe', function() {
  214. console.log(' DONE'.green);
  215. cb();
  216. });
  217. }
  218. function processCityData(src, dest, cb) {
  219. var lines = 0;
  220. function processLine(line) {
  221. if (line.match(/^Copyright/) || !line.match(/\d/)) {
  222. return;
  223. }
  224. var fields = CSVtoArray(line);
  225. var sip;
  226. var eip;
  227. var locId;
  228. var b;
  229. var bsz;
  230. var i;
  231. lines++;
  232. if (fields[0].match(/:/)) {
  233. // IPv6
  234. var offset = 0;
  235. var cc = fields[4];
  236. var city = fields[6];
  237. var lat = Math.round(parseFloat(fields[7]) * 10000);
  238. var lon = Math.round(parseFloat(fields[8]) * 10000);
  239. var rg = fields[5];
  240. bsz = 58;
  241. sip = utils.aton6(fields[0]);
  242. eip = utils.aton6(fields[1]);
  243. b = new Buffer(bsz);
  244. b.fill(0);
  245. for (i = 0; i < sip.length; i++) {
  246. b.writeUInt32BE(sip[i], offset);
  247. offset += 4;
  248. }
  249. for (i = 0; i < eip.length; i++) {
  250. b.writeUInt32BE(eip[i], offset);
  251. offset += 4;
  252. }
  253. b.write(cc, offset);
  254. b.write(rg, offset + 2);
  255. b.writeInt32BE(lat, offset + 4);
  256. b.writeInt32BE(lon, offset + 8);
  257. b.write(city, offset + 12);
  258. } else {
  259. // IPv4
  260. bsz = 12;
  261. sip = parseInt(fields[0], 10);
  262. eip = parseInt(fields[1], 10);
  263. locId = parseInt(fields[2], 10);
  264. b = new Buffer(bsz);
  265. b.fill(0);
  266. b.writeUInt32BE(sip>>>0, 0);
  267. b.writeUInt32BE(eip>>>0, 4);
  268. b.writeUInt32BE(locId>>>0, 8);
  269. }
  270. fs.writeSync(datFile, b, 0, b.length, null);
  271. if(Date.now() - tstart > 5000) {
  272. tstart = Date.now();
  273. process.stdout.write('\nStill working (' + lines + ') ...');
  274. }
  275. }
  276. var dataFile = path.join(dataPath, dest);
  277. var tmpDataFile = path.join(tmpPath, src);
  278. rimraf(dataFile);
  279. process.stdout.write('Processing Data (may take a moment) ...');
  280. var tstart = Date.now();
  281. var datFile = fs.openSync(dataFile, "w");
  282. lazy(fs.createReadStream(tmpDataFile))
  283. .lines
  284. .map(function(byteArray) {
  285. return iconv.decode(byteArray, 'latin1');
  286. })
  287. .skip(1)
  288. .map(processLine)
  289. .on('pipe', cb);
  290. }
  291. function processCityDataNames(src, dest, cb) {
  292. var locId = null;
  293. function processLine(line, i, a) {
  294. if (line.match(/^Copyright/) || !line.match(/\d/)) {
  295. return;
  296. }
  297. var b;
  298. var sz = 64;
  299. var fields = CSVtoArray(line);
  300. if (locId === null)
  301. locId = parseInt(fields[0]);
  302. else {
  303. if (parseInt(fields[0]) - 1 > locId) {
  304. b = new Buffer(sz);
  305. b.fill(0);
  306. fs.writeSync(datFile, b, 0, b.length, null);
  307. }
  308. locId = parseInt(fields[0]);
  309. }
  310. var cc = fields[1];
  311. var rg = fields[2];
  312. var city = fields[3];
  313. var lat = Math.round(parseFloat(fields[5]) * 10000);
  314. var lon = Math.round(parseFloat(fields[6]) * 10000);
  315. var metro = parseInt(fields[7]);
  316. b = new Buffer(sz);
  317. b.fill(0);
  318. b.write(cc, 0);
  319. b.write(rg, 2);
  320. b.writeInt32BE(lat, 4);
  321. b.writeInt32BE(lon, 8);
  322. if(metro){
  323. b.writeInt32BE(metro, 12);
  324. }
  325. b.write(city, 16);
  326. fs.writeSync(datFile, b, 0, b.length, null);
  327. }
  328. var dataFile = path.join(dataPath, dest);
  329. var tmpDataFile = path.join(tmpPath, src);
  330. rimraf(dataFile);
  331. var datFile = fs.openSync(dataFile, "w");
  332. lazy(fs.createReadStream(tmpDataFile))
  333. .lines
  334. .map(function(byteArray) {
  335. return iconv.decode(byteArray, 'latin1');
  336. })
  337. .skip(1)
  338. .map(processLine)
  339. .on('pipe', cb);
  340. }
  341. function processData(database, cb) {
  342. var type = database.type;
  343. var src = database.src;
  344. var dest = database.dest;
  345. if (type === 'country') {
  346. processCountryData(src, dest, cb);
  347. } else if (type === 'city-extended') {
  348. processCityData(src[0], dest[0], function() {
  349. processCityDataNames(src[1], dest[1], function() {
  350. console.log(' DONE'.green);
  351. cb();
  352. });
  353. });
  354. } else {
  355. processCityData(src, dest, function() {
  356. console.log(' DONE'.green);
  357. cb();
  358. });
  359. }
  360. }
  361. rimraf(tmpPath);
  362. mkdir(tmpPath);
  363. async.eachSeries(databases, function(database, nextDatabase) {
  364. async.seq(fetch, extract, processData)(database, nextDatabase);
  365. }, function(err) {
  366. if (err) {
  367. console.log('Failed to Update Databases from MaxMind.'.red);
  368. process.exit(1);
  369. } else {
  370. console.log('Successfully Updated Databases from MaxMind.'.green);
  371. if (process.argv[2]=='debug') console.log('Notice: temporary files are not deleted for debug purposes.'.bold.yellow);
  372. else rimraf(tmpPath);
  373. process.exit(0);
  374. }
  375. });