ghcrawler/providers/storage/googleCloudStorage.js

211 строки
5.3 KiB
JavaScript
Исходник Постоянная ссылка Обычный вид История

// Copyright (c) Google LLC. All rights reserved.
// Licensed under the MIT License.
const Storage = require('@google-cloud/storage');
const memoryCache = require('memory-cache');
const Q = require('q');
function getMetadataFromDocument(document) {
const metadata = {
version: document._metadata.version,
etag: document._metadata.etag,
type: document._metadata.type,
url: document._metadata.url,
urn: document._metadata.links.self.href,
fetchedat: document._metadata.fetchedAt,
processedat: document._metadata.processedAt
};
if (document._metadata.extra) {
metadata.extra = JSON.stringify(document._metadata.extra);
}
return metadata;
}
function getMetadataFromCloudFile(file) {
return file.getMetadata().then((data) => {
// Using `.metadata` as this is the custom metadata we set.
const blobMetadata = data[0].metadata;
return {
version: blobMetadata.version,
etag: blobMetadata.etag,
type: blobMetadata.type,
url: blobMetadata.url,
urn: blobMetadata.urn,
fetchedAt: blobMetadata.fetchedat,
processedAt: blobMetadata.processedat,
extra: blobMetadata.extra ? JSON.parse(blobMetadata.extra) : undefined
};
});
}
function getFileNameFromUrl(type, url) {
if (!(url.startsWith('http:') || url.startsWith('https:'))) {
return url;
}
const parsed = URL.parse(url, true);
return `${type}${parsed.path.toLowerCase()}.json`;
}
function getFileNameFromUrn(type, urn) {
if (!urn.startsWith('urn:')) {
return urn;
}
return `${getPathFromUrn(type, urn)}.json`;
}
function getPathFromUrn(type, urn) {
if (!urn) {
return '';
}
if (!urn.startsWith('urn:')) {
return urn;
}
// String urn: from start and replace ':' with '/'
return urn.slice(4).replace(/:/g, '/').toLowerCase();
}
class GoogleCloudStorage {
constructor(bucketName, projectId, clientEmail, key, options) {
this.bucketName = bucketName;
this.bucket = null;
this.options = options;
// Environment variables will cause new lines to be encoded as'\\n'
// which causes the google-cloud storage SDK to fail as it requires
// '\n' characters.
const parsedPrivateKey = key.replace(/\\n/g, '\n');
this.storage = new Storage({
projectId,
credentials: {
client_email: clientEmail,
private_key: parsedPrivateKey,
}
});
}
connect() {
this.bucket = this.storage.bucket(this.bucketName);
return this.bucket.exists().then((data) => {
if (!data[0]) {
return this.bucket.create();
}
});
}
upsert(document) {
const fileName = this._getFileNameFromDocument(document);
const text = JSON.stringify(document);
const metadata = getMetadataFromDocument(document);
const options = {
contentType: 'application/json',
metadata: {
// The Google Cloud SDK requires custom metadata to be defined under
// `metadata`.
// See https://github.com/googleapis/nodejs-storage/issues/222
metadata,
},
};
const file = this.bucket.file(fileName);
return file.save(text, options).then(() => {
memoryCache.put(fileName, {
etag: document._metadata.etag,
document: document,
}, this.options.ttl);
});
}
get(type, key) {
const fileName = this._getFileName(type, key);
const cached = memoryCache.get(fileName);
if (cached) {
return Q(cached.document);
}
const file = this.bucket.file(fileName);
return file.get().then(function (fileContents) {
return file.getMetadata().then(function(metadata) {
const result = JSON.parse(fileContents);
memoryCache.put(key, {
// Custom metadata is nested under 'metadata' and we want
// to use the etag provided by ghcrawler
etag: metadata.metadata.etag,
document: result
}, this.options.ttl);
return result;
});
});
}
etag(type, key) {
const fileName = this._getFileName(type, key);
const cached = memoryCache.get(fileName);
if (cached) {
return Q(cached.etag);
}
const file = this.bucket.file(fileName);
return file.getMetadata().then(function (metadata) {
return metadata.metadata.etag;
})
.catch(function () {
return null;
});
}
list(type) {
return this.bucket.getFiles({
autoPaginate: true,
directory: type
}).then(function (data) {
return Q.all(data[0].map((file) => {
return getMetadataFromCloudFile(file);
}));
});
}
delete(type, key) {
const fileName = this._getFileName(type, key);
const file = this.bucket.file(fileName);
return file.delete().then(function () {
return true;
});
}
count(type) {
return this.bucket.getFiles({
autoPaginate: true,
directory: type
}).then(function (files) {
return files.length;
});
}
close() {
return Q();
}
_getFileName(type, key) {
if (this.options.blobKey === 'url') {
return getFileNameFromUrl(type, key);
}
return getFileNameFromUrn(type, key);
}
_getFileNameFromDocument(document) {
const type = document._metadata.type;
const key = this.options.blobKey === 'url'?
document._metadata.url : document._metadata.links.self.href;
return this._getFileName(type, key);
}
}
module.exports = GoogleCloudStorage;