This commit is contained in:
Jeff McAffer 2016-11-10 19:57:43 -08:00
Родитель 5ff8222640
Коммит 24d37ee5a7
1 изменённых файлов: 10 добавлений и 11 удалений

Просмотреть файл

@ -8,9 +8,6 @@ const collections = {
orgs: 'org', repos: 'repo', issues: 'issue', issue_comments: 'issue_comment', commits: 'commit', teams: 'team', users: 'user'
};
const immutable = new Set([
'commit', 'CommitCommentEvent', 'CreateEvent', 'DeleteEvent', 'DeploymentEvent', 'DeploymentStatusEvent', 'DownloadEvent', 'FollowEvent', 'ForkEvent', 'ForkApplyEvent', 'GistEvent', 'GollumEvent', 'IssueCommentEvent', 'IssuesEvent', 'LabelEvent', 'MemberEvent', 'MembershipEvent', 'MilestoneEvent', 'PageBuildEvent', 'PublicEvent', 'PullRequestEvent', 'PullRequestReviewEvent', 'PullRequestReviewCommentEvent', 'PushEvent', 'ReleaseEvent', 'RepositoryEvent', 'StatusEvent', 'TeamAddEvent', 'WatchEvent']);
class Crawler {
constructor(queue, priorityQueue, store, requestor, config, logger) {
this.queue = queue;
@ -62,14 +59,17 @@ class Crawler {
if (request.skip) {
return Q.resolve(request);
}
// rewrite the request type for collections
const type = collections[request.type];
if (type) {
// rewrite the request type for collections remember the collection subType
// Also setup 'page' as the document type to look up for etags etc.
let fetchType = request.type;
let subType = collections[request.type];
if (subType) {
request.type = 'collection';
request.subType = type;
request.subType = subType;
fetchType = 'page';
}
const self = this;
return this.store.etag(request.type, request.url).then(etag => {
return this.store.etag(fetchType, request.url).then(etag => {
const options = etag ? { headers: { 'If-None-Match': etag } } : {};
const start = Date.now();
return self.requestor.get(request.url, options).then(githubResponse => {
@ -86,7 +86,7 @@ class Crawler {
if (!request.force) {
return self._markSkip(request, 'Unmodified');
}
return self.store.get(request.type, request.url).then(document => {
return self.store.get(fetchType, request.url).then(document => {
request.document = document;
request.response = githubResponse;
// Our store is up to date so don't '
@ -184,7 +184,7 @@ class Crawler {
}
}
// Now process this page after setting up the document to be a page.
// Rewrite the request and document to be a 'page' and then process.
request.page = 1;
request.document._metadata.type = 'page';
return this.page(request);
@ -198,7 +198,6 @@ class Crawler {
const qualifier = request.context.qualifier;
this._linkSelf(request, 'self', `${qualifier}:${type}:pages:${request.page}`);
document.elements.forEach(item => {
// this._queue(request, type, item.url, `${qualifier}:${type}`, request.context, this.priorityQueue);
this._queueChild(request, type, item.url, qualifier);
});
return document;