зеркало из https://github.com/microsoft/mwt-ds.git
support empty content-type
This commit is contained in:
Родитель
de5bdb60f5
Коммит
e13b97d62a
|
@ -98,7 +98,7 @@ namespace Microsoft.DecisionService.Crawl
|
|||
|
||||
CrawlResponse result = null;
|
||||
|
||||
if (contentType.StartsWith("text/html"))
|
||||
if (string.IsNullOrWhiteSpace(contentType) || contentType.StartsWith("text/html"))
|
||||
result = await DownloadHtml(uri, userAgent, reqBody);
|
||||
|
||||
if (contentType.StartsWith("application/json"))
|
||||
|
|
|
@ -149,7 +149,7 @@ namespace Microsoft.DecisionService.Crawl
|
|||
response.Description = FindMeta(head, "meta[@property='og:description' or name='og:description' or @property='twitter:description' or @name='twitter:description' or @name='description']");
|
||||
|
||||
if (string.IsNullOrEmpty(response.Description))
|
||||
response.Title = FindValue(head, "title");
|
||||
response.Description = FindValue(head, "title");
|
||||
|
||||
if (response.Description != null)
|
||||
response.Description = WebUtility.HtmlDecode(response.Description.Trim());
|
||||
|
@ -170,6 +170,11 @@ namespace Microsoft.DecisionService.Crawl
|
|||
response.Image = img;
|
||||
}
|
||||
|
||||
// extract keywords
|
||||
var keywords = FindMeta(head, "meta[@name='keywords']");
|
||||
if (!string.IsNullOrEmpty(keywords))
|
||||
response.Keywords = keywords.Split(',').Select(k => k.Trim()).ToList();
|
||||
|
||||
// build article
|
||||
var articleText = new StringBuilder();
|
||||
|
||||
|
@ -189,6 +194,15 @@ namespace Microsoft.DecisionService.Crawl
|
|||
if (!string.IsNullOrEmpty(text))
|
||||
articleText.AppendLine(text);
|
||||
}
|
||||
|
||||
if (string.IsNullOrWhiteSpace(articleText.ToString()))
|
||||
{
|
||||
if (!string.IsNullOrEmpty(response.Title))
|
||||
articleText.AppendLine(response.Title);
|
||||
|
||||
if (!string.IsNullOrEmpty(response.Description))
|
||||
articleText.AppendLine(response.Description);
|
||||
}
|
||||
|
||||
response.Article = WebUtility.HtmlDecode(articleText.ToString());
|
||||
|
||||
|
|
|
@ -29,6 +29,9 @@ namespace Microsoft.DecisionService.Crawl.Data
|
|||
[JsonProperty("description", NullValueHandling = NullValueHandling.Ignore)]
|
||||
public string Description { get; set; }
|
||||
|
||||
[JsonProperty("keywords", NullValueHandling = NullValueHandling.Ignore)]
|
||||
public List<string> Keywords { get; set; }
|
||||
|
||||
[JsonProperty("type", NullValueHandling = NullValueHandling.Ignore)]
|
||||
public string Type { get; set; }
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче