ghcrawler/lib/request.js

229 строки
6.2 KiB
JavaScript
Исходник Обычный вид История

// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
2016-12-01 04:54:48 +03:00
const Policy = require('./traversalPolicy');
const TraversalPolicy = require('./traversalPolicy');
2016-11-11 10:59:42 +03:00
/**
2016-12-01 04:54:48 +03:00
* Requests describe a resource to capture and process as well as the context for that processing.
*/
2016-11-11 10:59:42 +03:00
class Request {
constructor(type, url, context = null, relationship = 'contains') {
2016-11-12 03:46:31 +03:00
this.type = type;
this.url = url;
2016-11-21 09:45:18 +03:00
this.context = context || {};
this.relationship = relationship;
this.policy = Policy.default();
2016-11-18 02:33:44 +03:00
}
2016-12-01 04:54:48 +03:00
static adopt(object) {
if (object.__proto__ !== Request.prototype) {
object.__proto__ = Request.prototype;
}
object.policy = Request._getExpandedPolicy(object.policy);
2016-12-04 04:20:17 +03:00
if (object.policy && object.policy.__proto__ !== Policy.prototype) {
2016-12-01 04:54:48 +03:00
object.policy.__proto__ = Policy.prototype;
}
this.relationship = this.relationship || 'contains';
2016-12-01 04:54:48 +03:00
return object;
}
static _getExpandedPolicy(policyOrSpec) {
return typeof policyOrSpec === 'string' ? Policy.getPolicy(policyOrSpec) : policyOrSpec;
}
// Setup some internal context and open this request for handling.
open() {
this.context = this.context || {};
this._addHistory();
this._expandPolicy();
return this;
}
_expandPolicy() {
if (typeof this.policy === 'string') {
const policy = TraversalPolicy.getPolicy(this.policy);
if (!policy) {
return this._queueDead(this, `Unknown request policy: ${this.policy}`);
}
this.policy = policy;
}
}
_addHistory() {
this.context.history = this.context.history || [];
this.context.history.push(this.toString());
}
hasSeen(request) {
const history = this.context.history || [];
return history.includes(request.toString());
}
2017-01-02 22:42:52 +03:00
getTrackedPromises() {
return this.promises || [];
}
2016-11-18 02:33:44 +03:00
track(promises) {
if (!promises) {
2016-12-01 04:54:48 +03:00
return this;
2016-11-18 02:33:44 +03:00
}
2017-01-02 22:42:52 +03:00
this.promises = this.promises || [];
2016-11-18 02:33:44 +03:00
if (Array.isArray(promises)) {
Array.prototype.push.apply(this.promises, promises);
} else {
this.promises.push(promises);
}
2016-12-01 04:54:48 +03:00
return this;
2016-11-11 10:59:42 +03:00
}
addMeta(data) {
this.meta = Object.assign({}, this.meta, data);
2016-11-11 10:59:42 +03:00
return this;
}
2016-11-12 11:13:04 +03:00
addRootSelfLink() {
2016-12-01 04:54:48 +03:00
this.linkResource('self', this.getRootQualifier());
}
addSelfLink(key = 'id') {
this.linkResource('self', this.getChildQualifier(key));
}
getRootQualifier() {
return `urn:${this.type}:${this.document.id}`;
2016-11-12 11:13:04 +03:00
}
2016-12-01 04:54:48 +03:00
getChildQualifier(key = 'id') {
let qualifier = this.context.qualifier;
if (!qualifier || (typeof qualifier !== 'string')) {
throw new Error('Need something on which to base the self link URN');
2016-11-12 11:13:04 +03:00
}
2016-11-11 10:59:42 +03:00
qualifier = qualifier.endsWith(':') ? qualifier : qualifier + ':';
2016-12-01 04:54:48 +03:00
return `${qualifier}${this.type}:${this.document[key]}`;
2016-11-11 10:59:42 +03:00
}
linkResource(name, urn) {
2016-11-11 10:59:42 +03:00
const links = this.document._metadata.links;
const key = Array.isArray(urn) ? 'hrefs' : 'href';
2016-12-01 04:54:48 +03:00
links[name] = {};
links[name][key] = urn;
2016-12-01 04:54:48 +03:00
links[name].type = 'resource';
}
linkSiblings(href) {
const links = this.document._metadata.links;
2016-12-04 03:35:59 +03:00
links.siblings = { href: href, type: 'collection' };
2016-12-01 04:54:48 +03:00
}
linkCollection(name, href) {
const links = this.document._metadata.links;
2016-12-04 03:35:59 +03:00
links[name] = { href: href, type: 'collection' };
2016-11-11 10:59:42 +03:00
}
2016-12-01 04:54:48 +03:00
linkRelation(name, href) {
2016-11-11 10:59:42 +03:00
const links = this.document._metadata.links;
2016-12-04 03:35:59 +03:00
links[name] = { href: href, type: 'relation' };
2016-11-11 10:59:42 +03:00
}
queueRequests(requests, name = null) {
this.track(this.crawler.queue(requests, name));
2016-11-11 10:59:42 +03:00
}
queue(relationship, type, url, context = null, pruneRelation = true, policy = null) {
policy = policy || this.policy.getNextPolicy(this, relationship);
2016-12-01 04:54:48 +03:00
if (!policy) {
return;
}
context = Object.assign({}, this.context, context);
context.qualifier = context.qualifier || 'urn:';
const newRequest = new Request(type, url, context, relationship);
2016-12-01 04:54:48 +03:00
newRequest.policy = policy;
2016-12-07 22:23:02 +03:00
// relations are not transitive so ensure any relation is stripped off
if (pruneRelation) {
delete newRequest.context.relation;
}
this.queueRequests(newRequest);
2016-11-11 10:59:42 +03:00
}
markSkip(outcome, message) {
2016-12-26 19:47:57 +03:00
// if we are already skipping/requeuing, keep the original as the official outcome but log this new one so its not missed
if (this.shouldSkip()) {
this._log('verbose', `Redundant skip: ${outcome}, ${message}`, this.meta);
return this;
}
2016-11-11 21:55:30 +03:00
this.processControl = 'skip';
2016-11-11 10:59:42 +03:00
this.outcome = this.outcome || outcome;
this.message = this.message || message;
return this;
}
2016-11-12 01:06:10 +03:00
markRequeue(outcome, message) {
2016-12-26 19:47:57 +03:00
// if we are already skipping/requeuing, keep the original as the official outcome but log this new one so its not missed
if (this.shouldSkip()) {
this._log('verbose', `Redundant requeue: ${outcome}, ${message}`, this.meta);
return this;
}
2016-11-11 21:55:30 +03:00
this.processControl = 'requeue';
2016-11-12 01:06:10 +03:00
this.outcome = this.outcome || outcome;
2016-11-11 21:55:30 +03:00
this.message = this.message || message;
return this;
}
markSave() {
this.save = true;
}
markNoSave() {
this.save = false;
}
shouldSave() {
return this.document && (this.save === true || (this.save !== false && this.contentOrigin !== 'cacheOfOrigin'));
}
2016-11-11 21:55:30 +03:00
shouldSkip() {
2016-11-12 01:06:10 +03:00
return this.processControl === 'skip' || this.processControl === 'requeue';
}
2016-11-16 09:21:33 +03:00
delayUntil(time) {
if (!this.nextRequestTime || this.nextRequestTime < time) {
this.nextRequestTime = time;
}
}
delay(milliseconds = 2000) {
2016-11-16 09:21:33 +03:00
this.delayUntil(Date.now() + milliseconds);
}
2016-11-11 21:55:30 +03:00
shouldRequeue() {
return this.processControl === 'requeue';
}
createRequeuable() {
// Create a new request data structure that has just the things we should queue
const queuable = new Request(this.type, this.url, this.context, this.relationship);
queuable.attemptCount = this.attemptCount;
queuable.policy = this.policy;
if (this.payload) {
queuable.payload = this.payload;
}
return queuable;
2016-12-09 08:41:28 +03:00
}
2016-12-04 03:35:59 +03:00
toString() {
return `${this.type}@${this.url}`;
}
toUniqueString() {
return `${this.type}@${this.url}:${Request._getExpandedPolicy(this.policy).getShortForm()}`;
2016-12-04 03:35:59 +03:00
}
2016-12-26 19:47:57 +03:00
_log(level, message, meta = null) {
if (this.crawler) {
this.crawler.logger.log(level, message, meta);
}
}
2016-11-11 10:59:42 +03:00
}
module.exports = Request;