From bf9fccafddc55e1e7dd1e14546d1ad3eb1bc699e Mon Sep 17 00:00:00 2001 From: Jacob Alber Date: Fri, 6 Oct 2017 11:05:32 -0400 Subject: [PATCH] Add 404 and 503 to targeted exclusions from hard failure at Download stage of Crawl --- Crawl/Crawl/Crawl.cs | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/Crawl/Crawl/Crawl.cs b/Crawl/Crawl/Crawl.cs index 904c9d99..39eb0d03 100644 --- a/Crawl/Crawl/Crawl.cs +++ b/Crawl/Crawl/Crawl.cs @@ -116,8 +116,18 @@ namespace Microsoft.DecisionService.Crawl } catch (WebException we) { - if ((we.Response as HttpWebResponse)?.StatusCode == HttpStatusCode.Forbidden) - continue; + HttpWebResponse httpResponse = we.Response as HttpWebResponse; + if (httpResponse != null) + { + // Ignore known cases where crawl fails due to error on the crawl-target side - these should not + // cause a hard failure on our end. + if (httpResponse.StatusCode == HttpStatusCode.Forbidden || + httpResponse.StatusCode == HttpStatusCode.NotFound || + httpResponse.StatusCode == HttpStatusCode.ServiceUnavailable) + { + continue; + } + } throw; }