зеркало из https://github.com/microsoft/ghcrawler.git
fix fetch type for etagging
This commit is contained in:
Родитель
5ff8222640
Коммит
24d37ee5a7
|
@ -8,9 +8,6 @@ const collections = {
|
|||
orgs: 'org', repos: 'repo', issues: 'issue', issue_comments: 'issue_comment', commits: 'commit', teams: 'team', users: 'user'
|
||||
};
|
||||
|
||||
const immutable = new Set([
|
||||
'commit', 'CommitCommentEvent', 'CreateEvent', 'DeleteEvent', 'DeploymentEvent', 'DeploymentStatusEvent', 'DownloadEvent', 'FollowEvent', 'ForkEvent', 'ForkApplyEvent', 'GistEvent', 'GollumEvent', 'IssueCommentEvent', 'IssuesEvent', 'LabelEvent', 'MemberEvent', 'MembershipEvent', 'MilestoneEvent', 'PageBuildEvent', 'PublicEvent', 'PullRequestEvent', 'PullRequestReviewEvent', 'PullRequestReviewCommentEvent', 'PushEvent', 'ReleaseEvent', 'RepositoryEvent', 'StatusEvent', 'TeamAddEvent', 'WatchEvent']);
|
||||
|
||||
class Crawler {
|
||||
constructor(queue, priorityQueue, store, requestor, config, logger) {
|
||||
this.queue = queue;
|
||||
|
@ -62,14 +59,17 @@ class Crawler {
|
|||
if (request.skip) {
|
||||
return Q.resolve(request);
|
||||
}
|
||||
// rewrite the request type for collections
|
||||
const type = collections[request.type];
|
||||
if (type) {
|
||||
// rewrite the request type for collections remember the collection subType
|
||||
// Also setup 'page' as the document type to look up for etags etc.
|
||||
let fetchType = request.type;
|
||||
let subType = collections[request.type];
|
||||
if (subType) {
|
||||
request.type = 'collection';
|
||||
request.subType = type;
|
||||
request.subType = subType;
|
||||
fetchType = 'page';
|
||||
}
|
||||
const self = this;
|
||||
return this.store.etag(request.type, request.url).then(etag => {
|
||||
return this.store.etag(fetchType, request.url).then(etag => {
|
||||
const options = etag ? { headers: { 'If-None-Match': etag } } : {};
|
||||
const start = Date.now();
|
||||
return self.requestor.get(request.url, options).then(githubResponse => {
|
||||
|
@ -86,7 +86,7 @@ class Crawler {
|
|||
if (!request.force) {
|
||||
return self._markSkip(request, 'Unmodified');
|
||||
}
|
||||
return self.store.get(request.type, request.url).then(document => {
|
||||
return self.store.get(fetchType, request.url).then(document => {
|
||||
request.document = document;
|
||||
request.response = githubResponse;
|
||||
// Our store is up to date so don't '
|
||||
|
@ -184,7 +184,7 @@ class Crawler {
|
|||
}
|
||||
}
|
||||
|
||||
// Now process this page after setting up the document to be a page.
|
||||
// Rewrite the request and document to be a 'page' and then process.
|
||||
request.page = 1;
|
||||
request.document._metadata.type = 'page';
|
||||
return this.page(request);
|
||||
|
@ -198,7 +198,6 @@ class Crawler {
|
|||
const qualifier = request.context.qualifier;
|
||||
this._linkSelf(request, 'self', `${qualifier}:${type}:pages:${request.page}`);
|
||||
document.elements.forEach(item => {
|
||||
// this._queue(request, type, item.url, `${qualifier}:${type}`, request.context, this.priorityQueue);
|
||||
this._queueChild(request, type, item.url, qualifier);
|
||||
});
|
||||
return document;
|
||||
|
|
Загрузка…
Ссылка в новой задаче