зеркало из https://github.com/microsoft/mwt-ds.git
Merge pull request #102 from lokitoth/lokitoth/dev/HardenCrawler
Crawler pipeline hardening
This commit is contained in:
Коммит
c2df9e5dd7
|
@ -116,8 +116,44 @@ namespace Microsoft.DecisionService.Crawl
|
|||
}
|
||||
catch (WebException we)
|
||||
{
|
||||
if ((we.Response as HttpWebResponse)?.StatusCode == HttpStatusCode.Forbidden)
|
||||
continue;
|
||||
HttpWebResponse httpResponse = we.Response as HttpWebResponse;
|
||||
if (we.Status == WebExceptionStatus.ServerProtocolViolation && we.Response != null)
|
||||
{
|
||||
// Get a little more telemetry about what is going on here.
|
||||
IDictionary<string, string> traceData = new Dictionary<string, string>()
|
||||
{
|
||||
{ "Response.SupportsHeaders", we.Response.SupportsHeaders.ToString() }
|
||||
};
|
||||
|
||||
if (we.Response.SupportsHeaders)
|
||||
{
|
||||
for (int i = 0; i < we.Response.Headers.Count; i++)
|
||||
{
|
||||
string headerName = we.Response.Headers.GetKey(i);
|
||||
string headerValue = we.Response.Headers.Get(i);
|
||||
traceData[$"Response.Headers.{headerName}"] = headerValue;
|
||||
}
|
||||
}
|
||||
|
||||
if (httpResponse != null)
|
||||
{
|
||||
traceData["HttpResponse.StatusCode"] = httpResponse.StatusCode.ToString();
|
||||
}
|
||||
|
||||
Services.TelemetryClient.TrackTrace($"Download target ({uri}) ServerProtocolViolation", SeverityLevel.Error, traceData);
|
||||
}
|
||||
|
||||
if (httpResponse != null)
|
||||
{
|
||||
// Ignore known cases where crawl fails due to error on the crawl-target side - these should not
|
||||
// cause a hard failure on our end.
|
||||
if (httpResponse.StatusCode == HttpStatusCode.Forbidden ||
|
||||
httpResponse.StatusCode == HttpStatusCode.NotFound ||
|
||||
httpResponse.StatusCode == HttpStatusCode.ServiceUnavailable)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
throw;
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче