Merge pull request #102 from lokitoth/lokitoth/dev/HardenCrawler

Crawler pipeline hardening
This commit is contained in:
Markus Cozowicz 2017-10-06 13:22:24 -04:00 коммит произвёл GitHub
Родитель 908216e869 139a42521a
Коммит c2df9e5dd7
1 изменённых файлов: 38 добавлений и 2 удалений

Просмотреть файл

@ -116,8 +116,44 @@ namespace Microsoft.DecisionService.Crawl
}
catch (WebException we)
{
if ((we.Response as HttpWebResponse)?.StatusCode == HttpStatusCode.Forbidden)
continue;
HttpWebResponse httpResponse = we.Response as HttpWebResponse;
if (we.Status == WebExceptionStatus.ServerProtocolViolation && we.Response != null)
{
// Get a little more telemetry about what is going on here.
IDictionary<string, string> traceData = new Dictionary<string, string>()
{
{ "Response.SupportsHeaders", we.Response.SupportsHeaders.ToString() }
};
if (we.Response.SupportsHeaders)
{
for (int i = 0; i < we.Response.Headers.Count; i++)
{
string headerName = we.Response.Headers.GetKey(i);
string headerValue = we.Response.Headers.Get(i);
traceData[$"Response.Headers.{headerName}"] = headerValue;
}
}
if (httpResponse != null)
{
traceData["HttpResponse.StatusCode"] = httpResponse.StatusCode.ToString();
}
Services.TelemetryClient.TrackTrace($"Download target ({uri}) ServerProtocolViolation", SeverityLevel.Error, traceData);
}
if (httpResponse != null)
{
// Ignore known cases where crawl fails due to error on the crawl-target side - these should not
// cause a hard failure on our end.
if (httpResponse.StatusCode == HttpStatusCode.Forbidden ||
httpResponse.StatusCode == HttpStatusCode.NotFound ||
httpResponse.StatusCode == HttpStatusCode.ServiceUnavailable)
{
continue;
}
}
throw;
}