From 275052413cd48b6b59ea84a830386ffa7f8342b0 Mon Sep 17 00:00:00 2001 From: Jeff McAffer Date: Thu, 17 Nov 2016 17:03:11 -0800 Subject: [PATCH] make the crawl loop more resilient --- lib/crawler.js | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/lib/crawler.js b/lib/crawler.js index e32e5b7..66e56ce 100644 --- a/lib/crawler.js +++ b/lib/crawler.js @@ -137,14 +137,26 @@ class Crawler { _startNext(name, request) { const now = Date.now(); - const requestGate = now + (request.shouldDelay() ? 1000 : 0); - const delayGate = request.nextRequestTime || now; - const nextRequestTime = Math.max(requestGate, delayGate, now); - const delay = Math.max(0, nextRequestTime - now); + let delay = 0; + if (request) { + const requestGate = now + (request.shouldDelay() ? 1000 : 0); + const delayGate = request.nextRequestTime || now; + const nextRequestTime = Math.max(requestGate, delayGate, now); + delay = Math.max(0, nextRequestTime - now); + } if (delay) { this.logger.verbose(`Crawler: ${name} waiting for ${delay}ms`); } - setTimeout(this.start.bind(this, name), delay); + setTimeout(() => { + try { + this.start(name); + } catch (error) { + // If for some reason we throw all the way out of start, log and restart the loop + this.logger.error(new Error('PANIC! Crawl loop exited unexpectedly')); + this.logger.error(error); + this._startNext(name, null); + } + }, delay); } _filter(request) {