* Resolve most of security vulnerabilities

* Add eslint

* Replace createBlockBlobFromText with createWriteStreamToBlockBlob

* Remove memory cache for upsert, get, and etag
This commit is contained in:
Gene Hazan 2019-06-13 15:09:21 -07:00 коммит произвёл GitHub
Родитель b37be19929
Коммит 3dfcc087e1
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
4 изменённых файлов: 2455 добавлений и 4222 удалений

Просмотреть файл

@ -13,7 +13,7 @@ const CrawlerService = require('./lib/crawlerService');
const Q = require('q');
const QueueSet = require('./providers/queuing/queueSet');
const redlock = require('redlock');
const RefreshingConfig = require('refreshing-config');
const RefreshingConfig = require('@microsoft/refreshing-config');
const RefreshingConfigRedis = require('refreshing-config-redis');
let logger = null;

6604
package-lock.json сгенерированный

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -25,46 +25,45 @@
"url": "https://github.com/microsoft/ghcrawler"
},
"dependencies": {
"@microsoft/refreshing-config": "^0.1.3",
"amqp10": "noodlefrenzy/node-amqp10#issue295",
"amqplib": "^0.5.1",
"async": "^2.6.0",
"azure-sb": "^0.10.6",
"azure-storage": "^1.3.2",
"azure-sb": "^0.11.0",
"azure-storage": "^2.10.3",
"body-parser": "^1.15.2",
"connect-redis": "^3.1.0",
"debug": "^2.6.1",
"express": "^4.14.0",
"connect-redis": "^3.4.1",
"debug": "^4.1.1",
"express": "^4.17.1",
"express-init": "^1.1.0",
"express-joi": "^0.3.1",
"extend": "3.0.2",
"ghrequestor": "^0.1.6",
"ghrequestor": "^0.1.7",
"htmlencode": "0.0.4",
"ip": "^1.1.4",
"memory-cache": "^0.1.6",
"ip": "^1.1.5",
"memory-cache": "^0.2.0",
"mkdirp": "^0.5.1",
"moment": "^2.22.2",
"moment": "^2.24.0",
"mongodb": "2.2.11",
"morgan": "^1.7.0",
"node-uuid": "^1.4.7",
"painless-config": "^0.1.0",
"morgan": "^1.9.1",
"node-uuid": "^1.4.8",
"painless-config": "^0.1.1",
"parse-link-header": "^0.4.1",
"promise-retry": "1.1.1",
"q": "1.4.1",
"q": "1.5.1",
"qlimit": "^0.1.1",
"redis": "2.6.3",
"redis-metrics": "^0.4.1",
"redis": "2.8.0",
"redis-metrics": "^1.3.1",
"redis-rate-limiter": "github:jeffmcaffer/redis-rate-limiter",
"redlock": "2.0.1",
"refreshing-config": "^0.1.2",
"refreshing-config-redis": "^0.1.0",
"tmp": "0.0.33"
},
"devDependencies": {
"chai": "^3.5.0",
"grunt": "^1.0.1",
"grunt-mocha-test": "^0.13.2",
"chai": "^4.2.0",
"eslint": "^5.16.0",
"istanbul": "^0.4.5",
"mocha": "^3.1.2",
"sinon": "^1.17.6"
"mocha": "^6.1.4",
"sinon": "^2.4.1"
}
}

Просмотреть файл

@ -5,6 +5,7 @@ const async = require('async');
const azure = require('azure-storage');
const memoryCache = require('memory-cache');
const Q = require('q');
const { Readable } = require('stream');
const URL = require('url');
class AzureStorageDocStore {
@ -33,7 +34,6 @@ class AzureStorageDocStore {
upsert(document) {
const deferred = Q.defer();
const blobName = this._getBlobNameFromDocument(document);
const text = JSON.stringify(document);
const blobMetadata = {
version: document._metadata.version,
etag: document._metadata.etag,
@ -47,23 +47,22 @@ class AzureStorageDocStore {
blobMetadata.extra = JSON.stringify(document._metadata.extra);
}
const options = { metadata: blobMetadata, contentSettings: { contentType: 'application/json' } };
this.service.createBlockBlobFromText(this.name, blobName, text, options, (error, result, response) => {
if (error) {
const dataStream = new Readable();
dataStream.push(JSON.stringify(document));
dataStream.push(null);
dataStream
.pipe(this.service.createWriteStreamToBlockBlob(this.name, blobName, options))
.on('error', (error) => {
return deferred.reject(error);
}
memoryCache.put(document._metadata.url, { etag: document._metadata.etag, document: document }, this.options.ttl);
deferred.resolve(blobName);
});
})
.on('finish', () => {
deferred.resolve(blobName);
});
return deferred.promise;
}
// TODO: Consistency on whether key is a URL or URN
get(type, key) {
const cached = memoryCache.get(key);
if (cached) {
return Q(cached.document);
}
const deferred = Q.defer();
const blobName = this._getBlobNameFromKey(type, key);
this.service.getBlobToText(this.name, blobName, (error, text, blob, response) => {
@ -71,7 +70,6 @@ class AzureStorageDocStore {
return deferred.reject(error);
}
const result = JSON.parse(text);
memoryCache.put(key, { etag: result._metadata.etag, document: result }, this.options.ttl);
deferred.resolve(result);
});
return deferred.promise;
@ -79,11 +77,6 @@ class AzureStorageDocStore {
// TODO: Consistency on whether key is a URL or URN
etag(type, key) {
const cached = memoryCache.get(key);
if (cached) {
return Q(cached.etag);
}
const deferred = Q.defer();
const blobName = this._getBlobNameFromKey(type, key);
this.service.getBlobMetadata(this.name, blobName, (error, blob, response) => {
@ -100,7 +93,6 @@ class AzureStorageDocStore {
const deferred = Q.defer();
async.doWhilst(
callback => {
var started = new Date().getTime();
this.service.listBlobsSegmented(this.name, continuationToken, { include: azure.BlobUtilities.BlobListingDetails.METADATA, location: azure.StorageUtilities.LocationMode.PRIMARY_THEN_SECONDARY }, function (err, result, response) {
// metricsClient.trackDependency(url.parse(blobService.host.primaryHost).hostname, 'listBlobsSegmented', (new Date().getTime() - started), !err, "Http", { 'Container name': 'download', 'Continuation token present': result == null ? false : (result.continuationToken != null), 'Blob count': result == null ? 0 : result.entries.length });