зеркало из https://github.com/microsoft/mwt-ds.git
added DecisionService HTML scrape
This commit is contained in:
Родитель
ae5c30fcac
Коммит
d120c132c9
|
@ -0,0 +1,64 @@
|
|||
using System.Threading.Tasks;
|
||||
using Microsoft.Azure.WebJobs.Host;
|
||||
using System.Net.Http;
|
||||
using Newtonsoft.Json;
|
||||
using Microsoft.DecisionService.Crawl.Data;
|
||||
using Newtonsoft.Json.Linq;
|
||||
using System.Linq;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text;
|
||||
using System.Net.Http.Headers;
|
||||
using System.Threading;
|
||||
|
||||
namespace Microsoft.DecisionService.Crawl
|
||||
{
|
||||
public class AzureMLTopic
|
||||
{
|
||||
private static readonly HttpCachedService cachedService;
|
||||
|
||||
static AzureMLTopic()
|
||||
{
|
||||
cachedService = new HttpCachedService("AzureMLTopic");
|
||||
cachedService.client.DefaultRequestHeaders.Authorization = new AuthenticationHeaderValue("Bearer", cachedService.apiKey);
|
||||
}
|
||||
|
||||
public static Task<HttpResponseMessage> Run(HttpRequestMessage req, TraceWriter log, CancellationToken cancellationToken)
|
||||
{
|
||||
return cachedService.InvokeAsync(req, log,
|
||||
reqBody =>
|
||||
{
|
||||
var scoreRequest = new
|
||||
{
|
||||
Inputs = new Dictionary<string, StringTable>(),
|
||||
GlobalParameters = new Dictionary<string, string>() { }
|
||||
};
|
||||
|
||||
scoreRequest.Inputs.Add("input1", new StringTable
|
||||
{
|
||||
ColumnNames = new string[] { "Text" },
|
||||
Values = new string[,] { { reqBody.Article } }
|
||||
});
|
||||
|
||||
return scoreRequest;
|
||||
},
|
||||
(reqBody, blobContent) =>
|
||||
{
|
||||
blobContent.Output = new JObject();
|
||||
|
||||
var jobj = JObject.Parse(blobContent.Value);
|
||||
var topicRemoteRaw = jobj.SelectToken("$.Results.output1.value.Values[0][0]");
|
||||
if (topicRemoteRaw != null)
|
||||
blobContent.Output.Add(new JProperty("topics", topicRemoteRaw.Value<string>().Split(',').Select(float.Parse).ToArray()));
|
||||
},
|
||||
cancellationToken);
|
||||
}
|
||||
|
||||
public class StringTable
|
||||
{
|
||||
public string[] ColumnNames { get; set; }
|
||||
|
||||
public string[,] Values { get; set; }
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,18 @@
|
|||
{
|
||||
"scriptFile": "..\\bin\\Crawl.dll",
|
||||
"entryPoint": "Microsoft.DecisionService.Crawl.AzureMLTopic.Run",
|
||||
"bindings": [
|
||||
{
|
||||
"authLevel": "function",
|
||||
"name": "req",
|
||||
"type": "httpTrigger",
|
||||
"direction": "in"
|
||||
},
|
||||
{
|
||||
"name": "$return",
|
||||
"type": "http",
|
||||
"direction": "out"
|
||||
}
|
||||
],
|
||||
"disabled": false
|
||||
}
|
|
@ -0,0 +1,124 @@
|
|||
//------------------------------------------------------------------------------
|
||||
// <copyright company="Microsoft Corporation">
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// </copyright>
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
using Microsoft.WindowsAzure.Storage;
|
||||
using Microsoft.WindowsAzure.Storage.Blob;
|
||||
using Newtonsoft.Json;
|
||||
using System;
|
||||
using System.Text;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Microsoft.DecisionService.Crawl
|
||||
{
|
||||
public sealed class BlobCache
|
||||
{
|
||||
private readonly CloudBlobClient blobClient;
|
||||
|
||||
public BlobCache(string storageConnectionString)
|
||||
{
|
||||
var account = CloudStorageAccount.Parse(storageConnectionString);
|
||||
this.blobClient = account.CreateCloudBlobClient();
|
||||
}
|
||||
|
||||
private async Task<CloudBlobContainer> GetContainer(DateTime now, string service)
|
||||
{
|
||||
var container = this.blobClient.GetContainerReference($"{now:yyyyMM}{service}".ToLowerInvariant());
|
||||
await container.CreateIfNotExistsAsync();
|
||||
|
||||
return container;
|
||||
}
|
||||
|
||||
private string ToBlobName(string site, string id)
|
||||
{
|
||||
// escape for blob name
|
||||
id = id.Replace("//", "__")
|
||||
.Replace(":", "_");
|
||||
|
||||
// https://docs.microsoft.com/en-us/rest/api/storageservices/fileservices/naming-and-referencing-containers--blobs--and-metadata
|
||||
var maxIdLength = 1024 - (site.Length + 1);
|
||||
if (id.Length > maxIdLength)
|
||||
id = id.Substring(0, maxIdLength);
|
||||
|
||||
// <site>/<url>
|
||||
var sb = new StringBuilder();
|
||||
sb.Append(site);
|
||||
if (!id.StartsWith("/"))
|
||||
sb.Append('/');
|
||||
sb.Append(id);
|
||||
|
||||
return sb.ToString();
|
||||
}
|
||||
|
||||
public async Task<BlobContent> GetAsync(string site, string id, string service, string input, TimeSpan refreshTimeSpan, CancellationToken cancellationToken)
|
||||
{
|
||||
var now = DateTime.UtcNow;
|
||||
CacheItem cacheItem = null;
|
||||
CloudBlockBlob currentBlob = null;
|
||||
|
||||
for (int i = 0; i < 2 && cacheItem == null; i++)
|
||||
{
|
||||
var container = await this.GetContainer(now.AddMonths(-i), service);
|
||||
var blobName = this.ToBlobName(site, id);
|
||||
var blob = container.GetBlockBlobReference(blobName);
|
||||
if (currentBlob == null)
|
||||
currentBlob = blob;
|
||||
|
||||
// TODO: CreateIfNotExists() and check for empty
|
||||
if (await blob.ExistsAsync())
|
||||
{
|
||||
var json = await blob.DownloadTextAsync(cancellationToken);
|
||||
cacheItem = JsonConvert.DeserializeObject<CacheItem>(json);
|
||||
|
||||
// replicate in current month
|
||||
if (i > 0)
|
||||
await currentBlob.UploadTextAsync(json, cancellationToken);
|
||||
|
||||
// if it isn't up for refresh, just return the existing
|
||||
if (cacheItem.NextRefreshTimestamp > DateTime.UtcNow)
|
||||
return new BlobContent
|
||||
{
|
||||
Value = cacheItem.Output,
|
||||
Expires = cacheItem.NextRefreshTimestamp
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
if (cacheItem == null)
|
||||
cacheItem = new CacheItem();
|
||||
|
||||
cacheItem.Input = input;
|
||||
cacheItem.NextRefreshTimestamp = DateTime.UtcNow + refreshTimeSpan;
|
||||
|
||||
await currentBlob.UploadTextAsync(JsonConvert.SerializeObject(cacheItem), cancellationToken);
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
public async Task<BlobContent> PersistAsync(string site, string id, string service, string input, string output, TimeSpan refreshTimeSpan, CancellationToken cancellationToken)
|
||||
{
|
||||
var container = await this.GetContainer(DateTime.UtcNow, service);
|
||||
var blobName = this.ToBlobName(site, id);
|
||||
var blob = container.GetBlockBlobReference(blobName);
|
||||
|
||||
var cacheItem = new CacheItem
|
||||
{
|
||||
NextRefreshTimestamp = DateTime.UtcNow + refreshTimeSpan,
|
||||
// put input in there to to be consistent
|
||||
Input = input,
|
||||
Output = output
|
||||
};
|
||||
|
||||
await blob.UploadTextAsync(JsonConvert.SerializeObject(cacheItem), cancellationToken);
|
||||
|
||||
return new BlobContent
|
||||
{
|
||||
Value = output,
|
||||
Expires = cacheItem.NextRefreshTimestamp
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,20 @@
|
|||
//------------------------------------------------------------------------------
|
||||
// <copyright company="Microsoft Corporation">
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// </copyright>
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
using Newtonsoft.Json.Linq;
|
||||
using System;
|
||||
|
||||
namespace Microsoft.DecisionService.Crawl
|
||||
{
|
||||
public sealed class BlobContent
|
||||
{
|
||||
public string Value { get; set; }
|
||||
|
||||
public DateTime Expires { get; set; }
|
||||
|
||||
public JObject Output { get; set; }
|
||||
}
|
||||
}
|
|
@ -0,0 +1,25 @@
|
|||
//------------------------------------------------------------------------------
|
||||
// <copyright company="Microsoft Corporation">
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// </copyright>
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
using Newtonsoft.Json;
|
||||
using System;
|
||||
|
||||
namespace Microsoft.DecisionService.Crawl
|
||||
{
|
||||
public sealed class CacheItem
|
||||
{
|
||||
[JsonProperty("nextRefreshTimestamp")]
|
||||
public DateTime NextRefreshTimestamp { get; set; }
|
||||
|
||||
[JsonProperty("input")]
|
||||
[JsonConverter(typeof(RawStringConverter))]
|
||||
public string Input { get; set; }
|
||||
|
||||
[JsonProperty("output")]
|
||||
[JsonConverter(typeof(RawStringConverter))]
|
||||
public string Output { get; set; }
|
||||
}
|
||||
}
|
|
@ -0,0 +1,32 @@
|
|||
using System;
|
||||
using System.Linq;
|
||||
using System.Security.Cryptography.X509Certificates;
|
||||
|
||||
namespace Microsoft.DecisionService.Crawl
|
||||
{
|
||||
public static class CertificateUtil
|
||||
{
|
||||
public static X509Certificate2 FindCertificateByThumbprint(StoreLocation storeLocation, string thumbprint)
|
||||
{
|
||||
X509Store store = new X509Store(StoreName.My, storeLocation);
|
||||
try
|
||||
{
|
||||
store.Open(OpenFlags.ReadOnly);
|
||||
X509Certificate2Collection col = store.Certificates.Find(
|
||||
X509FindType.FindByThumbprint,
|
||||
thumbprint,
|
||||
validOnly:false); // Don't validate certs as they're self-signed
|
||||
if (col == null || col.Count == 0)
|
||||
{
|
||||
var availableCertThumbprints = string.Join(",", store.Certificates.OfType<X509Certificate2>().Select(c => c.Thumbprint));
|
||||
throw new Exception($"Cannot find certificate in My\\{storeLocation} with thumbprint '{thumbprint}'. Available certs are {availableCertThumbprints}");
|
||||
}
|
||||
return col[0];
|
||||
}
|
||||
finally
|
||||
{
|
||||
store.Close();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,33 @@
|
|||
//------------------------------------------------------------------------------
|
||||
// <copyright company="Microsoft Corporation">
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// </copyright>
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
using System;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Microsoft.DecisionService.Crawl
|
||||
{
|
||||
public class CognitiveService : HttpCachedService
|
||||
{
|
||||
private readonly string queryParams;
|
||||
|
||||
public CognitiveService(string containerName, string queryParams = null) : base(containerName)
|
||||
{
|
||||
this.queryParams = queryParams;
|
||||
}
|
||||
|
||||
protected override void Initialize()
|
||||
{
|
||||
// TODO: need to re-create client (can't just update base address if the key changes...)
|
||||
//if (this.client.DefaultRequestHeaders.Contains("Ocp-Apim-Subscription-Key"))
|
||||
// this.client.DefaultRequestHeaders.Remove("Ocp-Apim-Subscription-Key");
|
||||
|
||||
this.client.DefaultRequestHeaders.Add("Ocp-Apim-Subscription-Key", apiKey);
|
||||
|
||||
if (!string.IsNullOrEmpty(queryParams))
|
||||
this.client.BaseAddress = new Uri(this.client.BaseAddress.ToString() + queryParams);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,73 @@
|
|||
//------------------------------------------------------------------------------
|
||||
// <copyright company="Microsoft Corporation">
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// </copyright>
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Azure.WebJobs.Host;
|
||||
using System.Net.Http;
|
||||
using Newtonsoft.Json;
|
||||
using Newtonsoft.Json.Linq;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading;
|
||||
|
||||
namespace Microsoft.DecisionService.Crawl
|
||||
{
|
||||
public class CognitiveServiceEntityLinking
|
||||
{
|
||||
private static readonly CognitiveService cogService;
|
||||
|
||||
static CognitiveServiceEntityLinking()
|
||||
{
|
||||
cogService = new CognitiveService("CogEntityLinking");
|
||||
}
|
||||
|
||||
public static Task<HttpResponseMessage> Run(HttpRequestMessage req, TraceWriter log, CancellationToken cancellationToken)
|
||||
{
|
||||
return cogService.InvokeAsync(req, log,
|
||||
reqBody =>
|
||||
{
|
||||
var textBuilder = new StringBuilder();
|
||||
|
||||
if (!string.IsNullOrEmpty(reqBody.Title))
|
||||
textBuilder.AppendLine(reqBody.Title);
|
||||
if (!string.IsNullOrEmpty(reqBody.Article))
|
||||
textBuilder.AppendLine(reqBody.Article);
|
||||
|
||||
return Services.Limit(textBuilder.ToString(), 10240);
|
||||
},
|
||||
(reqBody, blobContent) =>
|
||||
{
|
||||
blobContent.Output = new JObject();
|
||||
var entityResponse = JsonConvert.DeserializeObject<EntityResponse>(blobContent.Value);
|
||||
|
||||
if (entityResponse?.Entities != null)
|
||||
{
|
||||
var q = entityResponse.Entities
|
||||
.GroupBy(e => e.Name)
|
||||
.Select(e => new JProperty(e.Key, e.Max(x => x.Score)));
|
||||
|
||||
blobContent.Output.Add("Tags", new JObject(q));
|
||||
}
|
||||
},
|
||||
cancellationToken);
|
||||
}
|
||||
|
||||
public class EntityResponse
|
||||
{
|
||||
[JsonProperty("entities")]
|
||||
public Entity[] Entities { get; set; }
|
||||
}
|
||||
|
||||
public class Entity
|
||||
{
|
||||
[JsonProperty("name")]
|
||||
public string Name { get; set; }
|
||||
|
||||
[JsonProperty("score")]
|
||||
public float Score { get; set; }
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,18 @@
|
|||
{
|
||||
"scriptFile": "..\\bin\\Crawl.dll",
|
||||
"entryPoint": "Microsoft.DecisionService.Crawl.CognitiveServiceEntityLinking.Run",
|
||||
"bindings": [
|
||||
{
|
||||
"authLevel": "function",
|
||||
"name": "req",
|
||||
"type": "httpTrigger",
|
||||
"direction": "in"
|
||||
},
|
||||
{
|
||||
"name": "$return",
|
||||
"type": "http",
|
||||
"direction": "out"
|
||||
}
|
||||
],
|
||||
"disabled": false
|
||||
}
|
|
@ -0,0 +1,115 @@
|
|||
//------------------------------------------------------------------------------
|
||||
// <copyright company="Microsoft Corporation">
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// </copyright>
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Azure.WebJobs.Host;
|
||||
using System.Net.Http;
|
||||
using Newtonsoft.Json;
|
||||
using Newtonsoft.Json.Linq;
|
||||
using System.Collections.Generic;
|
||||
using System.Text;
|
||||
using System.Threading;
|
||||
|
||||
namespace Microsoft.DecisionService.Crawl
|
||||
{
|
||||
public class CognitiveServiceTextAnalytics
|
||||
{
|
||||
private static readonly CognitiveService cogService;
|
||||
|
||||
static CognitiveServiceTextAnalytics()
|
||||
{
|
||||
cogService = new CognitiveService("CogTextAnalytics");
|
||||
}
|
||||
|
||||
public static Task<HttpResponseMessage> Run(HttpRequestMessage req, TraceWriter log, CancellationToken cancellationToken)
|
||||
{
|
||||
return cogService.InvokeAsync(req, log,
|
||||
reqBody =>
|
||||
{
|
||||
var textBuilder = new StringBuilder();
|
||||
|
||||
if (!string.IsNullOrEmpty(reqBody.Title))
|
||||
textBuilder.AppendLine(reqBody.Title);
|
||||
if (!string.IsNullOrEmpty(reqBody.Article))
|
||||
textBuilder.AppendLine(reqBody.Article);
|
||||
|
||||
var text = textBuilder.ToString();
|
||||
|
||||
// Based on email thread with Arvind Krishnaa Jagannathan <arjagann@microsoft.com>
|
||||
if (text.Length >= 10240 / 2)
|
||||
text = text.Substring(0, 10240 / 2);
|
||||
|
||||
return new TextAnalyticRequest
|
||||
{
|
||||
Documents = new List<TextAnalyticDocument>
|
||||
{
|
||||
new TextAnalyticDocument
|
||||
{
|
||||
//Language = "english",
|
||||
Text = text,
|
||||
Id = "1"
|
||||
}
|
||||
}
|
||||
};
|
||||
},
|
||||
(reqBody, blobContent) =>
|
||||
{
|
||||
blobContent.Output = new JObject();
|
||||
|
||||
var responseObj = JsonConvert.DeserializeObject<TextAnalyticResponse>(blobContent.Value);
|
||||
if (responseObj?.Documents?.Length == 1)
|
||||
blobContent.Output.Add(new JProperty("XSentiment", responseObj.Documents[0].Score));
|
||||
},
|
||||
cancellationToken);
|
||||
}
|
||||
|
||||
public class TextAnalyticRequest
|
||||
{
|
||||
[JsonProperty("documents")]
|
||||
public List<TextAnalyticDocument> Documents { get; set; }
|
||||
}
|
||||
|
||||
public class TextAnalyticDocument
|
||||
{
|
||||
[JsonProperty("language", NullValueHandling = NullValueHandling.Ignore)]
|
||||
public string Language { get; set; }
|
||||
|
||||
[JsonProperty("id")]
|
||||
public string Id { get; set; }
|
||||
|
||||
[JsonProperty("text")]
|
||||
public string Text { get; set; }
|
||||
}
|
||||
|
||||
|
||||
public class TextAnalyticResponse
|
||||
{
|
||||
[JsonProperty("documents")]
|
||||
public TextAnalyticResponseDocument[] Documents { get; set; }
|
||||
|
||||
[JsonProperty("errors")]
|
||||
public TextAnalyticResponseError[] Errors { get; set; }
|
||||
}
|
||||
|
||||
public class TextAnalyticResponseDocument
|
||||
{
|
||||
[JsonProperty("id")]
|
||||
public string Id { get; set; }
|
||||
|
||||
[JsonProperty("score")]
|
||||
public float Score { get; set; }
|
||||
}
|
||||
|
||||
public class TextAnalyticResponseError
|
||||
{
|
||||
[JsonProperty("id")]
|
||||
public string Id { get; set; }
|
||||
|
||||
[JsonProperty("message")]
|
||||
public string Message { get; set; }
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,18 @@
|
|||
{
|
||||
"scriptFile": "..\\bin\\Crawl.dll",
|
||||
"entryPoint": "Microsoft.DecisionService.Crawl.CognitiveServiceTextAnalytics.Run",
|
||||
"bindings": [
|
||||
{
|
||||
"authLevel": "function",
|
||||
"name": "req",
|
||||
"type": "httpTrigger",
|
||||
"direction": "in"
|
||||
},
|
||||
{
|
||||
"name": "$return",
|
||||
"type": "http",
|
||||
"direction": "out"
|
||||
}
|
||||
],
|
||||
"disabled": false
|
||||
}
|
|
@ -0,0 +1,151 @@
|
|||
//------------------------------------------------------------------------------
|
||||
// <copyright company="Microsoft Corporation">
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// </copyright>
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Azure.WebJobs.Host;
|
||||
using System.Net.Http;
|
||||
using Newtonsoft.Json;
|
||||
using Crawl.Data;
|
||||
using Crawl;
|
||||
using Newtonsoft.Json.Linq;
|
||||
using System.Linq;
|
||||
using System.Threading;
|
||||
|
||||
namespace Microsoft.DecisionService.Crawl
|
||||
{
|
||||
public class CognitiveServiceVision
|
||||
{
|
||||
private static readonly CognitiveService cogService;
|
||||
|
||||
static CognitiveServiceVision()
|
||||
{
|
||||
cogService = new CognitiveService("CogVision", queryParams: "?visualFeatures=Categories,Tags,Adult,Faces&details=Celebrities&language=en");
|
||||
}
|
||||
|
||||
public static async Task<HttpResponseMessage> Run(HttpRequestMessage req, TraceWriter log, CancellationToken cancellationToken)
|
||||
{
|
||||
return await cogService.InvokeAsync(req, log,
|
||||
reqBody => new UrlHolder { Url = reqBody.Image },
|
||||
(reqBody, blobContent) =>
|
||||
{
|
||||
var visionResponse = JsonConvert.DeserializeObject<VisionResponse>(blobContent.Value);
|
||||
|
||||
// multiple namespaces
|
||||
blobContent.Output = new JObject();
|
||||
|
||||
// R,S,T,U
|
||||
if (visionResponse.Tags != null)
|
||||
blobContent.Output.Add(
|
||||
new JProperty("RVisionTags",
|
||||
new JObject(
|
||||
visionResponse.Tags.Select(t => new JProperty(t.Name, t.Confidence)))));
|
||||
|
||||
if (visionResponse.Adult != null)
|
||||
blobContent.Output.Add(
|
||||
new JProperty("SVisionAdult",
|
||||
JObject.Parse(JsonConvert.SerializeObject(visionResponse.Adult))));
|
||||
|
||||
if (visionResponse.Categories != null)
|
||||
{
|
||||
// not for now
|
||||
//output.Add(
|
||||
// new JProperty("TVisionCategories",
|
||||
// new JObject(
|
||||
// visionResponse.Categories.Select(t => new JProperty(t.Name, t.Score)))));
|
||||
|
||||
var celebs =
|
||||
visionResponse.Categories
|
||||
.Where(c => c.Detail != null && c.Detail.Celebrities != null)
|
||||
.SelectMany(c => c.Detail.Celebrities)
|
||||
.GroupBy(c => c.Name)
|
||||
.ToList();
|
||||
|
||||
if (celebs.Count > 0)
|
||||
blobContent.Output.Add(
|
||||
new JProperty("TVisionCelebrities",
|
||||
new JObject(
|
||||
celebs.Select(t => new JProperty(t.Key, t.Max(x => x.Confidence))))));
|
||||
}
|
||||
},
|
||||
cancellationToken);
|
||||
}
|
||||
|
||||
public class VisionResponse
|
||||
{
|
||||
[JsonProperty("categories")]
|
||||
public Category[] Categories { get; set; }
|
||||
|
||||
[JsonProperty("adult")]
|
||||
public Adult Adult { get; set; }
|
||||
|
||||
[JsonProperty("tags")]
|
||||
public Tag[] Tags { get; set; }
|
||||
|
||||
[JsonProperty("faces")]
|
||||
public Face[] Faces { get; set; }
|
||||
}
|
||||
|
||||
public class Category
|
||||
{
|
||||
[JsonProperty("name")]
|
||||
public string Name { get; set; }
|
||||
|
||||
[JsonProperty("score")]
|
||||
public float Score { get; set; }
|
||||
|
||||
[JsonProperty("detail")]
|
||||
public CategoryDetail Detail { get; set; }
|
||||
}
|
||||
|
||||
public class CategoryDetail
|
||||
{
|
||||
[JsonProperty("celebrities")]
|
||||
public Celebrity[] Celebrities { get; set; }
|
||||
}
|
||||
|
||||
public class Celebrity
|
||||
{
|
||||
[JsonProperty("name")]
|
||||
public string Name { get; set; }
|
||||
|
||||
[JsonProperty("confidence")]
|
||||
public float Confidence { get; set; }
|
||||
}
|
||||
|
||||
public class Adult
|
||||
{
|
||||
[JsonProperty("isAdultContent")]
|
||||
public bool IsAdultContent { get; set; }
|
||||
|
||||
[JsonProperty("isRacyContent")]
|
||||
public bool IsRacyContent { get; set; }
|
||||
|
||||
[JsonProperty("adultScore")]
|
||||
public float AdultScore { get; set; }
|
||||
|
||||
[JsonProperty("racyScore")]
|
||||
public float RacyScore { get; set; }
|
||||
}
|
||||
|
||||
public class Tag
|
||||
{
|
||||
[JsonProperty("name")]
|
||||
public string Name { get; set; }
|
||||
|
||||
[JsonProperty("confidence")]
|
||||
public float Confidence { get; set; }
|
||||
}
|
||||
|
||||
public class Face
|
||||
{
|
||||
[JsonProperty("age")]
|
||||
public int Age { get; set; }
|
||||
|
||||
[JsonProperty("gender")]
|
||||
public string Gender { get; set; }
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,18 @@
|
|||
{
|
||||
"scriptFile": "..\\bin\\Crawl.dll",
|
||||
"entryPoint": "Microsoft.DecisionService.Crawl.CognitiveServiceVision.Run",
|
||||
"bindings": [
|
||||
{
|
||||
"authLevel": "function",
|
||||
"name": "req",
|
||||
"type": "httpTrigger",
|
||||
"direction": "in"
|
||||
},
|
||||
{
|
||||
"name": "$return",
|
||||
"type": "http",
|
||||
"direction": "out"
|
||||
}
|
||||
],
|
||||
"disabled": false
|
||||
}
|
|
@ -0,0 +1,224 @@
|
|||
<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<Import Project="..\packages\Microsoft.CodeDom.Providers.DotNetCompilerPlatform.1.0.4\build\net45\Microsoft.CodeDom.Providers.DotNetCompilerPlatform.props" Condition="Exists('..\packages\Microsoft.CodeDom.Providers.DotNetCompilerPlatform.1.0.4\build\net45\Microsoft.CodeDom.Providers.DotNetCompilerPlatform.props')" />
|
||||
<Import Project="..\packages\Microsoft.Net.Compilers.2.1.0\build\Microsoft.Net.Compilers.props" Condition="Exists('..\packages\Microsoft.Net.Compilers.2.1.0\build\Microsoft.Net.Compilers.props')" />
|
||||
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
|
||||
<PropertyGroup>
|
||||
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
|
||||
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
|
||||
<ProductVersion>
|
||||
</ProductVersion>
|
||||
<SchemaVersion>2.0</SchemaVersion>
|
||||
<ProjectGuid>{28285B58-63A5-48F9-99DA-7498E0E3AB22}</ProjectGuid>
|
||||
<ProjectTypeGuids>{349c5851-65df-11da-9384-00065b846f21};{fae04ec0-301f-11d3-bf4b-00c04f79efbc}</ProjectTypeGuids>
|
||||
<OutputType>Library</OutputType>
|
||||
<AppDesignerFolder>Properties</AppDesignerFolder>
|
||||
<RootNamespace>Crawl</RootNamespace>
|
||||
<AssemblyName>Crawl</AssemblyName>
|
||||
<TargetFrameworkVersion>v4.6.2</TargetFrameworkVersion>
|
||||
<UseIISExpress>true</UseIISExpress>
|
||||
<IISExpressSSLPort />
|
||||
<IISExpressAnonymousAuthentication />
|
||||
<IISExpressWindowsAuthentication />
|
||||
<IISExpressUseClassicPipelineMode />
|
||||
<UseGlobalApplicationHostFile />
|
||||
<NuGetPackageImportStamp>
|
||||
</NuGetPackageImportStamp>
|
||||
<TargetFrameworkProfile />
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
|
||||
<DebugSymbols>true</DebugSymbols>
|
||||
<DebugType>full</DebugType>
|
||||
<Optimize>false</Optimize>
|
||||
<OutputPath>bin\</OutputPath>
|
||||
<DefineConstants>DEBUG;TRACE</DefineConstants>
|
||||
<ErrorReport>prompt</ErrorReport>
|
||||
<WarningLevel>4</WarningLevel>
|
||||
<PlatformTarget>AnyCPU</PlatformTarget>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
|
||||
<DebugSymbols>true</DebugSymbols>
|
||||
<DebugType>pdbonly</DebugType>
|
||||
<Optimize>true</Optimize>
|
||||
<OutputPath>bin\</OutputPath>
|
||||
<DefineConstants>TRACE</DefineConstants>
|
||||
<ErrorReport>prompt</ErrorReport>
|
||||
<WarningLevel>4</WarningLevel>
|
||||
<ExcludeGeneratedDebugSymbol>false</ExcludeGeneratedDebugSymbol>
|
||||
<PlatformTarget>x64</PlatformTarget>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<Reference Include="HtmlAgilityPack, Version=1.4.9.5, Culture=neutral, PublicKeyToken=bd319b19eaf3b43a, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\HtmlAgilityPack.1.4.9.5\lib\Net45\HtmlAgilityPack.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.ApplicationInsights, Version=2.3.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.ApplicationInsights.2.3.0\lib\net46\Microsoft.ApplicationInsights.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.Azure.KeyVault, Version=2.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.Azure.KeyVault.2.0.6\lib\net45\Microsoft.Azure.KeyVault.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.Azure.KeyVault.Core, Version=2.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.Azure.KeyVault.Core.2.0.4\lib\net45\Microsoft.Azure.KeyVault.Core.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.Azure.KeyVault.WebKey, Version=2.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.Azure.KeyVault.WebKey.2.0.5\lib\net452\Microsoft.Azure.KeyVault.WebKey.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.Azure.WebJobs, Version=2.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.Azure.WebJobs.Core.2.0.0\lib\net45\Microsoft.Azure.WebJobs.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.Azure.WebJobs.Host, Version=2.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.Azure.WebJobs.2.0.0\lib\net45\Microsoft.Azure.WebJobs.Host.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.CodeDom.Providers.DotNetCompilerPlatform, Version=1.0.4.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.CodeDom.Providers.DotNetCompilerPlatform.1.0.4\lib\net45\Microsoft.CodeDom.Providers.DotNetCompilerPlatform.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.CSharp" />
|
||||
<Reference Include="Microsoft.Data.Edm, Version=5.8.1.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.Data.Edm.5.8.2\lib\net40\Microsoft.Data.Edm.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.Data.OData, Version=5.8.1.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.Data.OData.5.8.2\lib\net40\Microsoft.Data.OData.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.Data.Services.Client, Version=5.8.1.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.Data.Services.Client.5.8.2\lib\net40\Microsoft.Data.Services.Client.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.IdentityModel.Clients.ActiveDirectory, Version=3.13.9.1126, Culture=neutral, PublicKeyToken=31bf3856ad364e35, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.IdentityModel.Clients.ActiveDirectory.3.13.9\lib\net45\Microsoft.IdentityModel.Clients.ActiveDirectory.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.IdentityModel.Clients.ActiveDirectory.Platform, Version=3.13.9.1126, Culture=neutral, PublicKeyToken=31bf3856ad364e35, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.IdentityModel.Clients.ActiveDirectory.3.13.9\lib\net45\Microsoft.IdentityModel.Clients.ActiveDirectory.Platform.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.Rest.ClientRuntime, Version=2.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.Rest.ClientRuntime.2.3.7\lib\net452\Microsoft.Rest.ClientRuntime.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.Rest.ClientRuntime.Azure, Version=3.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.Rest.ClientRuntime.Azure.3.3.6\lib\net452\Microsoft.Rest.ClientRuntime.Azure.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.WindowsAzure.Storage, Version=8.1.1.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\WindowsAzure.Storage.8.1.1\lib\net45\Microsoft.WindowsAzure.Storage.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Newtonsoft.Json, Version=9.0.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Newtonsoft.Json.9.0.1\lib\net45\Newtonsoft.Json.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="System.Data.DataSetExtensions" />
|
||||
<Reference Include="System.Net" />
|
||||
<Reference Include="System.Net.Http">
|
||||
</Reference>
|
||||
<Reference Include="System.Net.Http.WebRequest" />
|
||||
<Reference Include="System.Runtime.Serialization" />
|
||||
<Reference Include="System.Security.Cryptography.Algorithms, Version=4.1.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\System.Security.Cryptography.Algorithms.4.3.0\lib\net461\System.Security.Cryptography.Algorithms.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="System.Security.Cryptography.Encoding, Version=4.0.1.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\System.Security.Cryptography.Encoding.4.3.0\lib\net46\System.Security.Cryptography.Encoding.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="System.Security.Cryptography.Primitives, Version=4.0.1.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\System.Security.Cryptography.Primitives.4.3.0\lib\net46\System.Security.Cryptography.Primitives.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="System.Security.Cryptography.X509Certificates, Version=4.1.1.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\System.Security.Cryptography.X509Certificates.4.3.0\lib\net461\System.Security.Cryptography.X509Certificates.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="System.Spatial, Version=5.8.1.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\System.Spatial.5.8.2\lib\net40\System.Spatial.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="System.Web.DynamicData" />
|
||||
<Reference Include="System.Web.Entity" />
|
||||
<Reference Include="System.Web.ApplicationServices" />
|
||||
<Reference Include="System.ComponentModel.DataAnnotations" />
|
||||
<Reference Include="System" />
|
||||
<Reference Include="System.Data" />
|
||||
<Reference Include="System.Drawing" />
|
||||
<Reference Include="System.Web" />
|
||||
<Reference Include="System.Web.Extensions" />
|
||||
<Reference Include="System.Xml" />
|
||||
<Reference Include="System.Configuration" />
|
||||
<Reference Include="System.Web.Services" />
|
||||
<Reference Include="System.EnterpriseServices" />
|
||||
<Reference Include="System.Xml.Linq" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Content Include="packages.config">
|
||||
<SubType>Designer</SubType>
|
||||
</Content>
|
||||
<Content Include="Crawl\function.json" />
|
||||
<Content Include="host.json" />
|
||||
<Content Include="RSS\function.json" />
|
||||
<Compile Include="AzureMLTopic\AzureMLTopic.cs" />
|
||||
<Compile Include="BlobCache.cs" />
|
||||
<Compile Include="BlobContent.cs" />
|
||||
<Compile Include="CacheItem.cs" />
|
||||
<Compile Include="CognitiveServiceEmotion\CognitiveServiceEmotion.cs" />
|
||||
<Compile Include="CognitiveServiceEntityLinking\CognitiveServiceEntityLinking.cs" />
|
||||
<Compile Include="CognitiveServiceTextAnalytics\CognitiveServiceTextAnalytics.cs" />
|
||||
<Compile Include="HttpCachedService.cs" />
|
||||
<Compile Include="RSS\RSS.cs" />
|
||||
<Content Include="CognitiveServiceVision\function.json" />
|
||||
<Content Include="CognitiveServiceEmotion\function.json" />
|
||||
<Content Include="CognitiveServiceEntityLinking\function.json" />
|
||||
<Content Include="CognitiveServiceTextAnalytics\function.json" />
|
||||
<Content Include="AzureMLTopic\function.json" />
|
||||
<None Include="Properties\PublishProfiles\DevProfile.pubxml" />
|
||||
<None Include="Web.Debug.config">
|
||||
<DependentUpon>Web.config</DependentUpon>
|
||||
</None>
|
||||
<None Include="Web.Release.config">
|
||||
<DependentUpon>Web.config</DependentUpon>
|
||||
</None>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Content Include="Web.config">
|
||||
<SubType>Designer</SubType>
|
||||
</Content>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Compile Include="CognitiveService.cs" />
|
||||
<Compile Include="CognitiveServiceVision\CognitiveServiceVision.cs" />
|
||||
<Compile Include="Crawl\HtmlExtractor.cs" />
|
||||
<Compile Include="Crawl\Crawl.cs" />
|
||||
<Compile Include="Data\CrawlRequest.cs" />
|
||||
<Compile Include="Data\CrawlResponse.cs" />
|
||||
<Compile Include="Data\UrlHolder.cs" />
|
||||
<Compile Include="Properties\AssemblyInfo.cs" />
|
||||
<Compile Include="Services.cs" />
|
||||
<Compile Include="KeyVaultHelper.cs" />
|
||||
<Compile Include="CertificateUtil.cs" />
|
||||
</ItemGroup>
|
||||
<PropertyGroup>
|
||||
<VisualStudioVersion Condition="'$(VisualStudioVersion)' == ''">10.0</VisualStudioVersion>
|
||||
<VSToolsPath Condition="'$(VSToolsPath)' == ''">$(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)</VSToolsPath>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
|
||||
<Import Project="$(VSToolsPath)\WebApplications\Microsoft.WebApplication.targets" Condition="'$(VSToolsPath)' != ''" />
|
||||
<Import Project="$(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v10.0\WebApplications\Microsoft.WebApplication.targets" Condition="false" />
|
||||
<ProjectExtensions>
|
||||
<VisualStudio>
|
||||
<FlavorProperties GUID="{349c5851-65df-11da-9384-00065b846f21}">
|
||||
<WebProjectProperties>
|
||||
<UseIIS>True</UseIIS>
|
||||
<AutoAssignPort>True</AutoAssignPort>
|
||||
<DevelopmentServerPort>33183</DevelopmentServerPort>
|
||||
<DevelopmentServerVPath>/</DevelopmentServerVPath>
|
||||
<IISUrl>http://localhost:33183/</IISUrl>
|
||||
<NTLMAuthentication>False</NTLMAuthentication>
|
||||
<UseCustomServer>False</UseCustomServer>
|
||||
<CustomServerUrl>
|
||||
</CustomServerUrl>
|
||||
<SaveServerSettingsInUserFile>False</SaveServerSettingsInUserFile>
|
||||
</WebProjectProperties>
|
||||
</FlavorProperties>
|
||||
</VisualStudio>
|
||||
</ProjectExtensions>
|
||||
<Target Name="EnsureNuGetPackageBuildImports" BeforeTargets="PrepareForBuild">
|
||||
<PropertyGroup>
|
||||
<ErrorText>This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}.</ErrorText>
|
||||
</PropertyGroup>
|
||||
<Error Condition="!Exists('..\packages\Microsoft.Net.Compilers.2.1.0\build\Microsoft.Net.Compilers.props')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\Microsoft.Net.Compilers.2.1.0\build\Microsoft.Net.Compilers.props'))" />
|
||||
<Error Condition="!Exists('..\packages\Microsoft.CodeDom.Providers.DotNetCompilerPlatform.1.0.4\build\net45\Microsoft.CodeDom.Providers.DotNetCompilerPlatform.props')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\Microsoft.CodeDom.Providers.DotNetCompilerPlatform.1.0.4\build\net45\Microsoft.CodeDom.Providers.DotNetCompilerPlatform.props'))" />
|
||||
</Target>
|
||||
<!-- To modify your build process, add your task inside one of the targets below and uncomment it.
|
||||
Other similar extension points exist, see Microsoft.Common.targets.
|
||||
<Target Name="BeforeBuild">
|
||||
</Target>
|
||||
<Target Name="AfterBuild">
|
||||
</Target>
|
||||
-->
|
||||
</Project>
|
|
@ -0,0 +1,112 @@
|
|||
//------------------------------------------------------------------------------
|
||||
// <copyright company="Microsoft Corporation">
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// </copyright>
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
using System.Globalization;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Azure.WebJobs.Host;
|
||||
using System.IO;
|
||||
using System.Net;
|
||||
using System.Net.Http;
|
||||
using Crawl.Crawl;
|
||||
using Newtonsoft.Json;
|
||||
using System;
|
||||
using Microsoft.DecisionService.Crawl.Data;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.Text;
|
||||
using System.Text.RegularExpressions;
|
||||
using Microsoft.ApplicationInsights.DataContracts;
|
||||
using Microsoft.ApplicationInsights;
|
||||
|
||||
namespace Microsoft.DecisionService.Crawl
|
||||
{
|
||||
public class Crawl
|
||||
{
|
||||
// <meta property="microsoft:ds_id" content="some-id">
|
||||
//private static Regex MetaMicrosoftDsIdRegex = new Regex(@"<meta[^>]+property\s*=\s*[""']microsoft:ds_id[""'][^>]*>", RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
||||
//private static Regex MetaContentRegex = new Regex(@"content\s*=\s*[""']([^""']+)", RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
||||
|
||||
public static async Task<HttpResponseMessage> Run(HttpRequestMessage req, TraceWriter log)
|
||||
{
|
||||
CrawlRequest crawlRequest = null;
|
||||
string reqBodyStr = null;
|
||||
try
|
||||
{
|
||||
using (var operation = Services.TelemetryClient.StartOperation<DependencyTelemetry>("Crawl.HTML"))
|
||||
{
|
||||
reqBodyStr = await req.Content.ReadAsStringAsync();
|
||||
var reqBody = JsonConvert.DeserializeObject<CrawlRequest>(reqBodyStr);
|
||||
|
||||
operation.Telemetry.Properties.Add("AppId", reqBody.Site);
|
||||
operation.Telemetry.Properties.Add("ActionId", reqBody.Id);
|
||||
operation.Telemetry.Properties.Add("Url", reqBody.Url);
|
||||
|
||||
log.Info($"Crawl AppId={reqBody.Site} Id={reqBody.Id} Url={reqBody.Url}");
|
||||
|
||||
var request = (HttpWebRequest)WebRequest.Create(reqBody.Url);
|
||||
|
||||
if (!string.IsNullOrEmpty(reqBody.ETag))
|
||||
request.Headers.Add(HttpRequestHeader.IfNoneMatch, reqBody.ETag);
|
||||
|
||||
request.Method = "GET";
|
||||
request.KeepAlive = true;
|
||||
request.UserAgent = "DSbot/1.0 (+https://ds.microsoft.com/bot.htm)";
|
||||
|
||||
using (var response = (HttpWebResponse)await request.GetResponseAsync())
|
||||
{
|
||||
operation.Telemetry.ResultCode = response.StatusCode.ToString();
|
||||
|
||||
using (var stream = response.GetResponseStream())
|
||||
using (var reader = new StreamReader(stream))
|
||||
{
|
||||
// TODO: allow direct JSON
|
||||
// TODO: look for schema.org
|
||||
var html = await reader.ReadToEndAsync();
|
||||
|
||||
// TODO: support microsoft:ds_id
|
||||
var result = HtmlExtractor.Parse(html, new Uri(reqBody.Url));
|
||||
result.Url = reqBody.Url;
|
||||
result.Site = reqBody.Site;
|
||||
result.Id = reqBody.Id;
|
||||
|
||||
return new HttpResponseMessage(HttpStatusCode.OK)
|
||||
{
|
||||
Content = new StringContent(
|
||||
JsonConvert.SerializeObject(result, new JsonSerializerSettings
|
||||
{
|
||||
Formatting = Formatting.None,
|
||||
StringEscapeHandling = StringEscapeHandling.EscapeNonAscii
|
||||
}),
|
||||
new UTF8Encoding(encoderShouldEmitUTF8Identifier: false),
|
||||
"application/json")
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
var props = new Dictionary<string, string>
|
||||
{
|
||||
{ "Service", req.RequestUri.ToString() }
|
||||
};
|
||||
|
||||
if (crawlRequest == null)
|
||||
props.Add("JSON", reqBodyStr);
|
||||
else
|
||||
{
|
||||
props.Add("Url", crawlRequest.Url);
|
||||
props.Add("AppId", crawlRequest.Site);
|
||||
props.Add("ActionId", crawlRequest.Id);
|
||||
}
|
||||
|
||||
Services.TelemetryClient.TrackException(ex, props);
|
||||
|
||||
throw ex;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,203 @@
|
|||
//------------------------------------------------------------------------------
|
||||
// <copyright company="Microsoft Corporation">
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// </copyright>
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
using Microsoft.DecisionService.Crawl.Data;
|
||||
using HtmlAgilityPack;
|
||||
using Newtonsoft.Json.Linq;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Net;
|
||||
using System.Text;
|
||||
|
||||
namespace Microsoft.DecisionService.Crawl
|
||||
{
|
||||
/// <summary>
|
||||
/// https://moz.com/blog/meta-data-templates-123
|
||||
/// </summary>
|
||||
public static class HtmlExtractor
|
||||
{
|
||||
private static readonly HashSet<string> TitleProperties;
|
||||
private static readonly HashSet<string> DescriptionProperties;
|
||||
|
||||
static HtmlExtractor()
|
||||
{
|
||||
TitleProperties = new HashSet<string>
|
||||
{ "og:title", "twitter:title" };
|
||||
|
||||
DescriptionProperties = new HashSet<string>
|
||||
{ "og:description", "twitter:description" };
|
||||
}
|
||||
|
||||
private static string FirstOrNull(HtmlNodeCollection collection, HashSet<string> properties)
|
||||
{
|
||||
var node = collection.First(n => properties.Contains(n.Attributes["property"].Name));
|
||||
return node != null ? node.Attributes["content"].Value : null;
|
||||
}
|
||||
|
||||
private static string FindMeta(HtmlNode headNode, string xpath)
|
||||
{
|
||||
var nodes = headNode.SelectNodes(xpath);
|
||||
if (nodes == null)
|
||||
return null;
|
||||
|
||||
foreach (var node in nodes)
|
||||
{
|
||||
var attr = node.Attributes["content"];
|
||||
if (attr != null)
|
||||
return attr.Value;
|
||||
|
||||
attr = node.Attributes["value"];
|
||||
if (attr != null)
|
||||
return attr.Value;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static string FindValue(HtmlNode headNode, string xpath)
|
||||
{
|
||||
var nodes = headNode.SelectNodes(xpath);
|
||||
if (nodes == null)
|
||||
return null;
|
||||
|
||||
foreach (var node in nodes)
|
||||
{
|
||||
var title = new StringBuilder();
|
||||
StripTags(node, title);
|
||||
|
||||
if (title.Length > 0)
|
||||
return title.ToString();
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
private static IEnumerable<string> FindAll(HtmlNode headNode, string xpath)
|
||||
{
|
||||
var nodes = headNode.SelectNodes(xpath);
|
||||
if (nodes == null)
|
||||
yield break;
|
||||
|
||||
foreach (var node in nodes)
|
||||
{
|
||||
var attr = node.Attributes["content"];
|
||||
if (attr != null)
|
||||
yield return attr.Value;
|
||||
|
||||
attr = node.Attributes["value"];
|
||||
if (attr != null)
|
||||
yield return attr.Value;
|
||||
}
|
||||
}
|
||||
|
||||
private static HashSet<string> skipTags = new HashSet<string>()
|
||||
{
|
||||
"script", "style"
|
||||
};
|
||||
|
||||
private static void StripTags(HtmlNode root, StringBuilder plaintext)
|
||||
{
|
||||
foreach (var node in root.ChildNodes)
|
||||
{
|
||||
if (skipTags.Contains(node.Name.ToLowerInvariant()) || node.NodeType == HtmlNodeType.Comment)
|
||||
continue;
|
||||
|
||||
if (!node.HasChildNodes)
|
||||
{
|
||||
string text = node.InnerText;
|
||||
if (!string.IsNullOrWhiteSpace(text))
|
||||
plaintext.Append(text.Trim()).Append(' ');
|
||||
}
|
||||
else
|
||||
StripTags(node, plaintext);
|
||||
}
|
||||
}
|
||||
|
||||
public static string StripTags(HtmlNode root)
|
||||
{
|
||||
var plaintext = new StringBuilder();
|
||||
|
||||
StripTags(root, plaintext);
|
||||
|
||||
return plaintext.ToString();
|
||||
}
|
||||
|
||||
public static CrawlResponse Parse(string html, Uri sourceUrl)
|
||||
{
|
||||
var response = new CrawlResponse();
|
||||
|
||||
var doc = new HtmlDocument();
|
||||
doc.LoadHtml(html);
|
||||
|
||||
var head = doc.DocumentNode.SelectSingleNode("html/head");
|
||||
if (head == null)
|
||||
return response;
|
||||
|
||||
response.Title = FindMeta(head, "meta[@property='og:title' or name='og:title' or @property='twitter:title' or @name='twitter:title']");
|
||||
|
||||
if (string.IsNullOrEmpty(response.Title))
|
||||
response.Title = FindValue(head, "title");
|
||||
|
||||
if (!string.IsNullOrEmpty(response.Title))
|
||||
response.Title = WebUtility.HtmlDecode(response.Title.Trim());
|
||||
|
||||
response.Description = FindMeta(head, "meta[@property='og:description' or name='og:description' or @property='twitter:description' or @name='twitter:description' or @name='description']");
|
||||
|
||||
if (string.IsNullOrEmpty(response.Description))
|
||||
response.Title = FindValue(head, "title");
|
||||
|
||||
if (response.Description != null)
|
||||
response.Description = WebUtility.HtmlDecode(response.Description.Trim());
|
||||
|
||||
response.Type = FindMeta(head, "meta[@property='og:type' or name='og:type']");
|
||||
var categories = FindAll(head, "meta[@property='article:tag' or @name='article:tag']").ToList();
|
||||
if (categories.Count > 0)
|
||||
response.Categories = categories;
|
||||
|
||||
// TODO: get the better resolution
|
||||
var img = FindMeta(head, "meta[@property='og:image' or name='og:image' or @property='twitter:image' or @name='twitter:image']");
|
||||
if (img != null)
|
||||
{
|
||||
if (img.StartsWith("//"))
|
||||
img = sourceUrl.Scheme + ":" + img;
|
||||
|
||||
// TODO: support relative URLs too
|
||||
response.Image = img;
|
||||
}
|
||||
|
||||
// build article
|
||||
var articleText = new StringBuilder();
|
||||
|
||||
var articles = doc.DocumentNode.SelectNodes("//article");
|
||||
|
||||
if (articles != null)
|
||||
{
|
||||
// find the longest article text
|
||||
string text = null;
|
||||
foreach (var art in articles)
|
||||
{
|
||||
var newText = StripTags(art);
|
||||
if (text == null || text.Length < newText.Length)
|
||||
text = newText;
|
||||
}
|
||||
|
||||
if (!string.IsNullOrEmpty(text))
|
||||
articleText.AppendLine(text);
|
||||
}
|
||||
|
||||
response.Article = WebUtility.HtmlDecode(articleText.ToString());
|
||||
|
||||
// <meta property="microsoft:ds_id" content="255308" data-react-helmet="true">
|
||||
var dsId = FindMeta(head, "meta[@property='microsoft:ds_id' or name='microsoft:ds_id']");
|
||||
response.PassThroughDetails = WebUtility.HtmlDecode(dsId);
|
||||
|
||||
return response;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,18 @@
|
|||
{
|
||||
"scriptFile": "..\\bin\\Crawl.dll",
|
||||
"entryPoint": "Microsoft.DecisionService.Crawl.Crawl.Run",
|
||||
"bindings": [
|
||||
{
|
||||
"authLevel": "function",
|
||||
"name": "req",
|
||||
"type": "httpTrigger",
|
||||
"direction": "in"
|
||||
},
|
||||
{
|
||||
"name": "$return",
|
||||
"type": "http",
|
||||
"direction": "out"
|
||||
}
|
||||
],
|
||||
"disabled": false
|
||||
}
|
|
@ -0,0 +1,29 @@
|
|||
//------------------------------------------------------------------------------
|
||||
// <copyright company="Microsoft Corporation">
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// </copyright>
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
using Newtonsoft.Json;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Web;
|
||||
|
||||
namespace Microsoft.DecisionService.Crawl.Data
|
||||
{
|
||||
public class CrawlRequest
|
||||
{
|
||||
[JsonProperty("site")]
|
||||
public string Site { get; set; }
|
||||
|
||||
[JsonProperty("id")]
|
||||
public string Id { get; set; }
|
||||
|
||||
[JsonProperty("url")]
|
||||
public string Url { get; set; }
|
||||
|
||||
[JsonProperty("etag")]
|
||||
public string ETag { get; set; }
|
||||
}
|
||||
}
|
|
@ -0,0 +1,50 @@
|
|||
//------------------------------------------------------------------------------
|
||||
// <copyright company="Microsoft Corporation">
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// </copyright>
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
using Newtonsoft.Json;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Web;
|
||||
|
||||
namespace Microsoft.DecisionService.Crawl.Data
|
||||
{
|
||||
public class CrawlResponse
|
||||
{
|
||||
[JsonProperty("site")]
|
||||
public string Site { get; set; }
|
||||
|
||||
[JsonProperty("id")]
|
||||
public string Id { get; set; }
|
||||
|
||||
[JsonProperty("url", NullValueHandling = NullValueHandling.Ignore)]
|
||||
public string Url { get; set; }
|
||||
|
||||
[JsonProperty("title", NullValueHandling = NullValueHandling.Ignore)]
|
||||
public string Title { get; set; }
|
||||
|
||||
[JsonProperty("description", NullValueHandling = NullValueHandling.Ignore)]
|
||||
public string Description { get; set; }
|
||||
|
||||
[JsonProperty("type", NullValueHandling = NullValueHandling.Ignore)]
|
||||
public string Type { get; set; }
|
||||
|
||||
[JsonProperty("categories", NullValueHandling = NullValueHandling.Ignore)]
|
||||
public List<string> Categories { get; set; }
|
||||
|
||||
[JsonProperty("image", NullValueHandling = NullValueHandling.Ignore)]
|
||||
public string Image { get; set; }
|
||||
|
||||
[JsonProperty("article", NullValueHandling = NullValueHandling.Ignore)]
|
||||
public string Article { get; set; }
|
||||
|
||||
[JsonProperty("ds_id", NullValueHandling = NullValueHandling.Ignore)]
|
||||
public string PassThroughDetails { get; set; }
|
||||
|
||||
[JsonProperty("forceRefresh")]
|
||||
public bool ForceRefresh { get; set; } = false;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,20 @@
|
|||
//------------------------------------------------------------------------------
|
||||
// <copyright company="Microsoft Corporation">
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// </copyright>
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
using Newtonsoft.Json;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Web;
|
||||
|
||||
namespace Microsoft.DecisionService.Crawl.Data
|
||||
{
|
||||
public class UrlHolder
|
||||
{
|
||||
[JsonProperty("url")]
|
||||
public string Url { get; set; }
|
||||
}
|
||||
}
|
|
@ -0,0 +1,235 @@
|
|||
//------------------------------------------------------------------------------
|
||||
// <copyright company="Microsoft Corporation">
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// </copyright>
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
using Crawl.Data;
|
||||
using Microsoft.ApplicationInsights;
|
||||
using Microsoft.ApplicationInsights.DataContracts;
|
||||
using Microsoft.Azure.KeyVault;
|
||||
using Microsoft.Azure.WebJobs.Host;
|
||||
using Newtonsoft.Json;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Configuration;
|
||||
using System.Diagnostics;
|
||||
using System.Globalization;
|
||||
using System.Net.Http;
|
||||
using System.Security.Cryptography.X509Certificates;
|
||||
using System.Text;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using System.Web.Configuration;
|
||||
|
||||
namespace Microsoft.DecisionService.Crawl
|
||||
{
|
||||
public class HttpCachedService
|
||||
{
|
||||
internal readonly string containerName;
|
||||
internal HttpClient client;
|
||||
internal string endpoint;
|
||||
internal string apiKey;
|
||||
internal string storageConnectionString;
|
||||
|
||||
public HttpCachedService(string containerName)
|
||||
{
|
||||
// limit due to Azure Storage container name
|
||||
if (containerName.Length > 24 - 6 /* yyyyMM */)
|
||||
throw new ArgumentException($"{nameof(containerName)}: '{containerName}' is too long. Must be {24 - 6} characters at most.");
|
||||
this.containerName = containerName;
|
||||
}
|
||||
|
||||
protected virtual void Initialize()
|
||||
{ }
|
||||
|
||||
private async Task InitializeAsync()
|
||||
{
|
||||
if (this.client != null)
|
||||
return;
|
||||
|
||||
var keyVaultUrl = ConfigurationManager.AppSettings["KeyVaultUrl"];
|
||||
|
||||
var keyVaultHelper = new KeyVaultHelper(
|
||||
StoreLocation.CurrentUser,
|
||||
ConfigurationManager.AppSettings["AzureActiveDirectoryClientId"],
|
||||
ConfigurationManager.AppSettings["AzureActiveDirectoryCertificateThumbprint"]);
|
||||
|
||||
var keyVault = new KeyVaultClient(new KeyVaultClient.AuthenticationCallback(keyVaultHelper.GetAccessToken));
|
||||
|
||||
this.endpoint = (await keyVault.GetSecretAsync(keyVaultUrl, containerName + "Endpoint").ConfigureAwait(false)).Value;
|
||||
this.apiKey = (await keyVault.GetSecretAsync(keyVaultUrl, containerName + "Key").ConfigureAwait(false)).Value;
|
||||
this.storageConnectionString = (await keyVault.GetSecretAsync(keyVaultUrl, "StorageConnectionString").ConfigureAwait(false)).Value;
|
||||
|
||||
this.client = new HttpClient()
|
||||
{
|
||||
BaseAddress = new Uri(this.endpoint)
|
||||
};
|
||||
|
||||
this.Initialize();
|
||||
}
|
||||
|
||||
public async Task<BlobContent> PostAsync(TraceWriter log, string site, string id, object request, bool forceRefresh, CancellationToken cancellationToken)
|
||||
{
|
||||
await this.InitializeAsync();
|
||||
|
||||
var stopwatch = Stopwatch.StartNew();
|
||||
var cacheHit = true;
|
||||
HttpResponseMessage responseMessage = null;
|
||||
string body = null;
|
||||
|
||||
try
|
||||
{
|
||||
body = request as string;
|
||||
string input;
|
||||
string contentType;
|
||||
if (body != null)
|
||||
{
|
||||
// if this is a raw string, we need to escape for storage
|
||||
input = JsonConvert.SerializeObject(request);
|
||||
contentType = "text/plain";
|
||||
}
|
||||
else
|
||||
{
|
||||
body = JsonConvert.SerializeObject(request);
|
||||
input = body;
|
||||
contentType = "application/json";
|
||||
}
|
||||
|
||||
log.Trace(new TraceEvent(TraceLevel.Verbose,
|
||||
$"Requesting {this.containerName} at {this.endpoint}: {body}"));
|
||||
|
||||
var blobCache = new BlobCache(this.storageConnectionString);
|
||||
|
||||
// lookup Azure Blob storage cache first
|
||||
// have a 5min timeout for retries
|
||||
BlobContent blobContent = null;
|
||||
if (!forceRefresh)
|
||||
blobContent = await blobCache.GetAsync(site, id, this.containerName, input, TimeSpan.FromMinutes(5), cancellationToken);
|
||||
|
||||
if (blobContent == null)
|
||||
{
|
||||
cacheHit = false;
|
||||
|
||||
var stopwatchReqeust = Stopwatch.StartNew();
|
||||
|
||||
// make the actual HTTP request
|
||||
responseMessage = await this.client.PostAsync(
|
||||
string.Empty,
|
||||
new StringContent(
|
||||
body,
|
||||
new UTF8Encoding(encoderShouldEmitUTF8Identifier: false),
|
||||
contentType));
|
||||
|
||||
Services.TelemetryClient.TrackDependency(this.containerName, this.endpoint, this.containerName, null,
|
||||
DateTime.UtcNow, stopwatchReqeust.Elapsed,
|
||||
$"{responseMessage.StatusCode} {responseMessage.ReasonPhrase}", responseMessage.IsSuccessStatusCode);
|
||||
|
||||
log.Trace(new TraceEvent(TraceLevel.Verbose, $"Response: {responseMessage.StatusCode} {responseMessage.ReasonPhrase}"));
|
||||
|
||||
if (!responseMessage.IsSuccessStatusCode)
|
||||
{
|
||||
blobContent = new BlobContent
|
||||
{
|
||||
// TODO: random expiration
|
||||
Expires = DateTime.UtcNow + TimeSpan.FromMinutes(1),
|
||||
};
|
||||
}
|
||||
else
|
||||
{
|
||||
var responseStr = await responseMessage.Content.ReadAsStringAsync();
|
||||
|
||||
log.Trace(new TraceEvent(TraceLevel.Verbose, $"Result {this.containerName} at {this.endpoint}: {responseStr}"));
|
||||
|
||||
// once we got a response, cache for 3 days
|
||||
// TODO: add configuration option
|
||||
// TODO: add force refresh parameter
|
||||
blobContent = await blobCache.PersistAsync(site, id, this.containerName, input, responseStr, TimeSpan.FromDays(3), cancellationToken);
|
||||
}
|
||||
}
|
||||
|
||||
return blobContent;
|
||||
}
|
||||
finally
|
||||
{
|
||||
var props = new Dictionary<string, string>
|
||||
{
|
||||
{ "site", site },
|
||||
{ "id", id },
|
||||
{ "cacheHit", cacheHit.ToString() },
|
||||
{ "StatusCode", responseMessage?.StatusCode.ToString() },
|
||||
{ "Reason", responseMessage?.ReasonPhrase }
|
||||
};
|
||||
|
||||
var sb = new StringBuilder(this.containerName);
|
||||
if (responseMessage != null && responseMessage.StatusCode != System.Net.HttpStatusCode.OK)
|
||||
{
|
||||
props.Add("Request", body);
|
||||
sb.Append(" failed");
|
||||
}
|
||||
|
||||
Services.TelemetryClient.TrackEvent(
|
||||
sb.ToString(),
|
||||
props,
|
||||
metrics: new Dictionary<string, double>
|
||||
{
|
||||
{ "requestTime", stopwatch.ElapsedMilliseconds }
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
public async Task<HttpResponseMessage> InvokeAsync(HttpRequestMessage req, TraceWriter log,
|
||||
Func<CrawlResponse, object> requestBodyFunc,
|
||||
Action<CrawlResponse, BlobContent> responseAction,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
log.Info("Crawl." + this.containerName);
|
||||
|
||||
await this.InitializeAsync();
|
||||
|
||||
string reqBodyStr = null;
|
||||
CrawlResponse reqBody = null;
|
||||
BlobContent blobContent = null;
|
||||
|
||||
try
|
||||
{
|
||||
using (var operation = Services.TelemetryClient.StartOperation<DependencyTelemetry>("Crawl." + this.containerName))
|
||||
{
|
||||
reqBodyStr = await req.Content.ReadAsStringAsync();
|
||||
reqBody = JsonConvert.DeserializeObject<CrawlResponse>(reqBodyStr);
|
||||
|
||||
operation.Telemetry.Target = this.endpoint;
|
||||
operation.Telemetry.Properties.Add("AppId", reqBody.Site);
|
||||
operation.Telemetry.Properties.Add("ActionId", reqBody.Id);
|
||||
|
||||
blobContent = await this.PostAsync(
|
||||
log,
|
||||
reqBody.Site,
|
||||
reqBody.Id,
|
||||
requestBodyFunc(reqBody),
|
||||
reqBody.ForceRefresh,
|
||||
cancellationToken);
|
||||
|
||||
if (blobContent != null)
|
||||
{
|
||||
operation.Telemetry.Properties.Add("Expires", blobContent.Expires.ToString(CultureInfo.InvariantCulture));
|
||||
|
||||
if (blobContent.Value != null)
|
||||
{
|
||||
responseAction(reqBody, blobContent);
|
||||
|
||||
operation.Telemetry.ResultCode = "OK";
|
||||
}
|
||||
}
|
||||
|
||||
return req.CreateResponse(blobContent);
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Services.TrackException(ex, req, log, reqBodyStr, reqBody, blobContent);
|
||||
throw ex;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
using Microsoft.IdentityModel.Clients.ActiveDirectory;
|
||||
using System;
|
||||
using System.Security.Cryptography.X509Certificates;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Microsoft.DecisionService.Crawl
|
||||
{
|
||||
/// <summary>
|
||||
/// see https://azure.microsoft.com/en-us/documentation/articles/key-vault-use-from-web-application/.
|
||||
/// </summary>
|
||||
public class KeyVaultHelper
|
||||
{
|
||||
private readonly ClientAssertionCertificate assertionCert;
|
||||
|
||||
public KeyVaultHelper(StoreLocation storeLocation, string clientId, string thumbprint)
|
||||
{
|
||||
var clientAssertionCertPfx = CertificateUtil.FindCertificateByThumbprint(storeLocation, thumbprint);
|
||||
this.assertionCert = new ClientAssertionCertificate(clientId, clientAssertionCertPfx);
|
||||
}
|
||||
|
||||
public async Task<string> GetAccessToken(string authority, string resource, string scope)
|
||||
{
|
||||
var context = new AuthenticationContext(authority, TokenCache.DefaultShared);
|
||||
return (await context.AcquireTokenAsync(resource, assertionCert)).AccessToken;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,35 @@
|
|||
using System.Reflection;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
// General Information about an assembly is controlled through the following
|
||||
// set of attributes. Change these attribute values to modify the information
|
||||
// associated with an assembly.
|
||||
[assembly: AssemblyTitle("Crawl")]
|
||||
[assembly: AssemblyDescription("")]
|
||||
[assembly: AssemblyConfiguration("")]
|
||||
[assembly: AssemblyCompany("")]
|
||||
[assembly: AssemblyProduct("Crawl")]
|
||||
[assembly: AssemblyCopyright("Copyright © 2017")]
|
||||
[assembly: AssemblyTrademark("")]
|
||||
[assembly: AssemblyCulture("")]
|
||||
|
||||
// Setting ComVisible to false makes the types in this assembly not visible
|
||||
// to COM components. If you need to access a type in this assembly from
|
||||
// COM, set the ComVisible attribute to true on that type.
|
||||
[assembly: ComVisible(false)]
|
||||
|
||||
// The following GUID is for the ID of the typelib if this project is exposed to COM
|
||||
[assembly: Guid("28285b58-63a5-48f9-99da-7498e0e3ab22")]
|
||||
|
||||
// Version information for an assembly consists of the following four values:
|
||||
//
|
||||
// Major Version
|
||||
// Minor Version
|
||||
// Build Number
|
||||
// Revision
|
||||
//
|
||||
// You can specify all the values or you can default the Revision and Build Numbers
|
||||
// by using the '*' as shown below:
|
||||
[assembly: AssemblyVersion("1.0.0.0")]
|
||||
[assembly: AssemblyFileVersion("1.0.0.0")]
|
|
@ -0,0 +1,147 @@
|
|||
//------------------------------------------------------------------------------
|
||||
// <copyright company="Microsoft Corporation">
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// </copyright>
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
using System.Globalization;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Azure.WebJobs.Host;
|
||||
using System.Net.Http;
|
||||
using System;
|
||||
using Crawl.Data;
|
||||
using Crawl;
|
||||
using System.Collections.Generic;
|
||||
using System.Xml.Linq;
|
||||
using System.Linq;
|
||||
using System.Diagnostics;
|
||||
using System.Text;
|
||||
using Newtonsoft.Json;
|
||||
using Newtonsoft.Json.Linq;
|
||||
|
||||
namespace Microsoft.DecisionService.Crawl
|
||||
{
|
||||
public sealed class RSS
|
||||
{
|
||||
private static HttpClient client = new HttpClient();
|
||||
|
||||
public class URLHolder
|
||||
{
|
||||
[JsonProperty("url")]
|
||||
public string Url { get; set; }
|
||||
}
|
||||
|
||||
public static async Task<HttpResponseMessage> Run(HttpRequestMessage req, TraceWriter log)
|
||||
{
|
||||
var url = string.Empty;
|
||||
var stopwatch = Stopwatch.StartNew();
|
||||
var jsonResponse = string.Empty;
|
||||
|
||||
try
|
||||
{
|
||||
var reqBodyStr = await req.Content.ReadAsStringAsync();
|
||||
var reqBody = JsonConvert.DeserializeObject<URLHolder>(reqBodyStr);
|
||||
|
||||
url = reqBody.Url;
|
||||
log.Info("RSS " + url);
|
||||
|
||||
// TODO: use HttpCachedService (also as means of failover if the RSS stream is down)
|
||||
string data = await client.GetStringAsync(reqBody.Url.ToString());
|
||||
var rss = XDocument.Parse(data);
|
||||
|
||||
string parseFormat = "ddd, dd MMM yyyy HH:mm:ss zzz";
|
||||
string parseFormat2 = "ddd, dd MMM yyyy HH:mm:ss Z";
|
||||
|
||||
var items = rss.DescendantNodes()
|
||||
.OfType<XElement>()
|
||||
.Where(a => a.Name == "item")
|
||||
.Select((elem, index) =>
|
||||
{
|
||||
|
||||
var pubDateStr = elem.Descendants("pubDate").FirstOrDefault()?.Value;
|
||||
if (pubDateStr != null)
|
||||
pubDateStr = pubDateStr.Trim();
|
||||
|
||||
if (!DateTime.TryParseExact(pubDateStr, parseFormat, CultureInfo.InvariantCulture, DateTimeStyles.None, out DateTime pubDate))
|
||||
if (!DateTime.TryParseExact(pubDateStr, parseFormat2, CultureInfo.InvariantCulture, DateTimeStyles.None, out pubDate))
|
||||
pubDate = DateTime.UtcNow;
|
||||
|
||||
return new { elem, pubDate, index };
|
||||
})
|
||||
.OrderByDescending(elem => elem.pubDate)
|
||||
// limit the feed to avoid getting too many
|
||||
.Take(15)
|
||||
// Note: this is very important for the Dashboard
|
||||
// The order of the items allows customers to specify their base-line policy
|
||||
.OrderBy(elem => elem.index)
|
||||
.Select(x => x.elem);
|
||||
|
||||
var actions = items.Select(x => new
|
||||
{
|
||||
ids = new[] { new { id = x.Descendants("link").FirstOrDefault()?.Value } },
|
||||
features = new
|
||||
{
|
||||
_title = x.Descendants("title").FirstOrDefault()?.Value
|
||||
},
|
||||
details = new []
|
||||
{
|
||||
// TODO: properly support 4.2.6. The "atom:id" Element
|
||||
new { guid = x.Descendants("guid").FirstOrDefault()?.Value }
|
||||
}
|
||||
}).ToList();
|
||||
|
||||
jsonResponse = JsonConvert.SerializeObject(actions);
|
||||
|
||||
if (log.Level == TraceLevel.Verbose)
|
||||
log.Trace(new TraceEvent(TraceLevel.Verbose, $"Successfully transformed '{url}' '{data}' to '{jsonResponse}'"));
|
||||
else
|
||||
log.Info($"Successfully transformed '{url}'");
|
||||
}
|
||||
catch (HttpRequestException hre)
|
||||
{
|
||||
var msg = $"RSS Featurization failed '{url}' for '{req.RequestUri.ToString()}': '{hre.Message}'";
|
||||
|
||||
log.Warning(msg);
|
||||
// TODO: maybe switch to dependency w/ status failed?
|
||||
Services.TelemetryClient.TrackEvent(msg,
|
||||
new Dictionary<string, string>
|
||||
{
|
||||
{ "Service", req.RequestUri.ToString() },
|
||||
{ "Url", url },
|
||||
{ "Exception", hre.Message}
|
||||
});
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
log.Error($"Failed to process '{url}'", ex);
|
||||
|
||||
Services.TelemetryClient.TrackException(
|
||||
ex,
|
||||
new Dictionary<string, string>
|
||||
{
|
||||
{ "Service", req.RequestUri.ToString() },
|
||||
{ "Url", url }
|
||||
});
|
||||
|
||||
// swallow the error message and return empty. That way we can differentiate between real outages
|
||||
// remote errors
|
||||
}
|
||||
finally
|
||||
{
|
||||
Services.TelemetryClient.TrackEvent($"RSS {url}",
|
||||
metrics: new Dictionary<string, double>
|
||||
{
|
||||
{ "requestTime", stopwatch.ElapsedMilliseconds }
|
||||
});
|
||||
}
|
||||
|
||||
return new HttpResponseMessage(System.Net.HttpStatusCode.OK)
|
||||
{
|
||||
Content = new StringContent(
|
||||
jsonResponse,
|
||||
new UTF8Encoding(encoderShouldEmitUTF8Identifier: false),
|
||||
"application/json")
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,18 @@
|
|||
{
|
||||
"scriptFile": "..\\bin\\Crawl.dll",
|
||||
"entryPoint": "Microsoft.DecisionService.Crawl.RSS.Run",
|
||||
"bindings": [
|
||||
{
|
||||
"authLevel": "function",
|
||||
"name": "req",
|
||||
"type": "httpTrigger",
|
||||
"direction": "in"
|
||||
},
|
||||
{
|
||||
"name": "$return",
|
||||
"type": "http",
|
||||
"direction": "out"
|
||||
}
|
||||
],
|
||||
"disabled": false
|
||||
}
|
|
@ -0,0 +1,121 @@
|
|||
using Newtonsoft.Json;
|
||||
using System;
|
||||
using System.Globalization;
|
||||
using System.Text;
|
||||
|
||||
namespace Microsoft.DecisionService.Crawl
|
||||
{
|
||||
/// <summary>
|
||||
/// Custom JSON converter returning the underlying raw json (avoiding object allocation)
|
||||
/// </summary>
|
||||
public class RawStringConverter : JsonConverter
|
||||
{
|
||||
public override bool CanConvert(Type objectType)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
public override object ReadJson(JsonReader reader, Type objectType, object existingValue, JsonSerializer serializer)
|
||||
{
|
||||
var sb = new StringBuilder();
|
||||
JsonToken previousToken = JsonToken.None;
|
||||
|
||||
int depth = 0;
|
||||
|
||||
do
|
||||
{
|
||||
if (sb.Length > 0)
|
||||
{
|
||||
if (!reader.Read())
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
if ((previousToken == JsonToken.Boolean
|
||||
|| previousToken == JsonToken.Date || previousToken == JsonToken.String
|
||||
|| previousToken == JsonToken.Float || previousToken == JsonToken.Integer
|
||||
|| previousToken == JsonToken.Raw || previousToken == JsonToken.Null
|
||||
|| previousToken == JsonToken.Bytes) &&
|
||||
(reader.TokenType != JsonToken.EndArray && reader.TokenType != JsonToken.EndObject))
|
||||
{
|
||||
sb.Append(",");
|
||||
}
|
||||
else if ((previousToken == JsonToken.EndObject && reader.TokenType == JsonToken.StartObject)
|
||||
|| (previousToken == JsonToken.EndArray && reader.TokenType == JsonToken.StartArray))
|
||||
{
|
||||
sb.Append(",");
|
||||
}
|
||||
}
|
||||
|
||||
switch (reader.TokenType)
|
||||
{
|
||||
case JsonToken.PropertyName:
|
||||
if (previousToken == JsonToken.EndObject || previousToken == JsonToken.EndArray)
|
||||
{
|
||||
sb.Append(',');
|
||||
}
|
||||
|
||||
sb.AppendFormat(CultureInfo.InvariantCulture, "\"{0}\":", reader.Value.ToString().Replace("\"", "\\\""));
|
||||
break;
|
||||
|
||||
case JsonToken.Boolean:
|
||||
sb.AppendFormat((bool)reader.Value ? "true" : "false");
|
||||
break;
|
||||
|
||||
case JsonToken.Bytes:
|
||||
case JsonToken.Comment:
|
||||
case JsonToken.Integer:
|
||||
case JsonToken.Float:
|
||||
sb.AppendFormat(CultureInfo.InvariantCulture, "{0}", reader.Value);
|
||||
break;
|
||||
|
||||
case JsonToken.Date:
|
||||
sb.Append(JsonConvert.SerializeObject(reader.Value));
|
||||
break;
|
||||
|
||||
case JsonToken.Null:
|
||||
sb.Append("null");
|
||||
break;
|
||||
|
||||
case JsonToken.String:
|
||||
sb.Append(JsonConvert.SerializeObject((string)reader.Value));
|
||||
break;
|
||||
|
||||
case JsonToken.Raw:
|
||||
sb.Append(reader.Value);
|
||||
break;
|
||||
|
||||
case JsonToken.StartArray:
|
||||
sb.Append('[');
|
||||
depth++;
|
||||
break;
|
||||
|
||||
case JsonToken.EndArray:
|
||||
sb.Append(']');
|
||||
depth--;
|
||||
break;
|
||||
|
||||
case JsonToken.StartObject:
|
||||
sb.Append('{');
|
||||
depth++;
|
||||
break;
|
||||
|
||||
case JsonToken.EndObject:
|
||||
sb.Append('}');
|
||||
depth--;
|
||||
break;
|
||||
}
|
||||
|
||||
previousToken = reader.TokenType;
|
||||
}
|
||||
while (depth > 0);
|
||||
|
||||
return sb.ToString();
|
||||
}
|
||||
|
||||
public override void WriteJson(JsonWriter writer, object value, JsonSerializer serializer)
|
||||
{
|
||||
writer.WriteRawValue((string)value);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,89 @@
|
|||
//------------------------------------------------------------------------------
|
||||
// <copyright company="Microsoft Corporation">
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// </copyright>
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
using Crawl.Data;
|
||||
using Microsoft.ApplicationInsights;
|
||||
using Microsoft.ApplicationInsights.Extensibility;
|
||||
using Microsoft.Azure.WebJobs.Host;
|
||||
using Newtonsoft.Json;
|
||||
using Newtonsoft.Json.Linq;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Net.Http;
|
||||
using System.Text;
|
||||
using System.Web;
|
||||
|
||||
namespace Microsoft.DecisionService.Crawl
|
||||
{
|
||||
public static class Services
|
||||
{
|
||||
public readonly static TelemetryClient TelemetryClient;
|
||||
|
||||
static Services()
|
||||
{
|
||||
TelemetryConfiguration.Active.InstrumentationKey = System.Configuration.ConfigurationManager.AppSettings["AppInsightsKey"];
|
||||
TelemetryClient = new TelemetryClient();
|
||||
TelemetryClient.Context.Cloud.RoleName = "Crawl";
|
||||
TelemetryClient.Context.Component.Version = typeof(Services).Assembly.GetName().Version.ToString();
|
||||
}
|
||||
|
||||
public static string Limit(string text, int numBytes)
|
||||
{
|
||||
if (Encoding.UTF8.GetByteCount(text) < numBytes)
|
||||
return text;
|
||||
|
||||
var chars = text.ToCharArray();
|
||||
var length = Math.Min(text.Length, numBytes);
|
||||
|
||||
while (Encoding.UTF8.GetByteCount(chars, 0, length) > numBytes)
|
||||
length--;
|
||||
|
||||
return text.Substring(length);
|
||||
}
|
||||
|
||||
public static HttpResponseMessage CreateResponse(this HttpRequestMessage req, BlobContent blobContent)
|
||||
{
|
||||
blobContent.Output?.Add(new JProperty("_expires", blobContent.Expires));
|
||||
|
||||
var response = new HttpResponseMessage(System.Net.HttpStatusCode.OK)
|
||||
{
|
||||
Content = new StringContent(
|
||||
blobContent.Output?.ToString(Formatting.None) ?? string.Empty,
|
||||
new UTF8Encoding(encoderShouldEmitUTF8Identifier: false),
|
||||
"application/json")
|
||||
};
|
||||
|
||||
// Get replaced in deployed version
|
||||
// response.Content.Headers.Expires = expires;
|
||||
|
||||
// response.Content.Headers.TryAddWithoutValidation("X-DecisionService-Expires", expires.ToString("ddd, dd MMM yyyy HH:mm:ss 'GMT'"));
|
||||
|
||||
return response;
|
||||
}
|
||||
|
||||
public static void TrackException(Exception ex, HttpRequestMessage req, TraceWriter log, string reqBodyStr, CrawlResponse reqBody, BlobContent blobContent)
|
||||
{
|
||||
var props = new Dictionary<string, string>
|
||||
{
|
||||
{ "Service", req.RequestUri.ToString() },
|
||||
{ "Request", reqBodyStr }
|
||||
};
|
||||
|
||||
if (reqBody != null)
|
||||
{
|
||||
props.Add("AppId", reqBody.Site);
|
||||
props.Add("ActionId", reqBody.Id);
|
||||
}
|
||||
|
||||
if (blobContent != null)
|
||||
props.Add("Response", blobContent.Value);
|
||||
|
||||
TelemetryClient.TrackException(ex, props);
|
||||
log.Error($"Request for AppId={reqBody?.Site} ActionId={reqBody?.Id} failed", ex);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,30 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
|
||||
<!-- For more information on using web.config transformation visit https://go.microsoft.com/fwlink/?LinkId=125889 -->
|
||||
|
||||
<configuration xmlns:xdt="http://schemas.microsoft.com/XML-Document-Transform">
|
||||
<!--
|
||||
In the example below, the "SetAttributes" transform will change the value of
|
||||
"connectionString" to use "ReleaseSQLServer" only when the "Match" locator
|
||||
finds an attribute "name" that has a value of "MyDB".
|
||||
|
||||
<connectionStrings>
|
||||
<add name="MyDB"
|
||||
connectionString="Data Source=ReleaseSQLServer;Initial Catalog=MyReleaseDB;Integrated Security=True"
|
||||
xdt:Transform="SetAttributes" xdt:Locator="Match(name)"/>
|
||||
</connectionStrings>
|
||||
-->
|
||||
<system.web>
|
||||
<!--
|
||||
In the example below, the "Replace" transform will replace the entire
|
||||
<customErrors> section of your web.config file.
|
||||
Note that because there is only one customErrors section under the
|
||||
<system.web> node, there is no need to use the "xdt:Locator" attribute.
|
||||
|
||||
<customErrors defaultRedirect="GenericError.htm"
|
||||
mode="RemoteOnly" xdt:Transform="Replace">
|
||||
<error statusCode="500" redirect="InternalError.htm"/>
|
||||
</customErrors>
|
||||
-->
|
||||
</system.web>
|
||||
</configuration>
|
|
@ -0,0 +1,31 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
|
||||
<!-- For more information on using web.config transformation visit https://go.microsoft.com/fwlink/?LinkId=125889 -->
|
||||
|
||||
<configuration xmlns:xdt="http://schemas.microsoft.com/XML-Document-Transform">
|
||||
<!--
|
||||
In the example below, the "SetAttributes" transform will change the value of
|
||||
"connectionString" to use "ReleaseSQLServer" only when the "Match" locator
|
||||
finds an attribute "name" that has a value of "MyDB".
|
||||
|
||||
<connectionStrings>
|
||||
<add name="MyDB"
|
||||
connectionString="Data Source=ReleaseSQLServer;Initial Catalog=MyReleaseDB;Integrated Security=True"
|
||||
xdt:Transform="SetAttributes" xdt:Locator="Match(name)"/>
|
||||
</connectionStrings>
|
||||
-->
|
||||
<system.web>
|
||||
<compilation xdt:Transform="RemoveAttributes(debug)" />
|
||||
<!--
|
||||
In the example below, the "Replace" transform will replace the entire
|
||||
<customErrors> section of your web.config file.
|
||||
Note that because there is only one customErrors section under the
|
||||
<system.web> node, there is no need to use the "xdt:Locator" attribute.
|
||||
|
||||
<customErrors defaultRedirect="GenericError.htm"
|
||||
mode="RemoteOnly" xdt:Transform="Replace">
|
||||
<error statusCode="500" redirect="InternalError.htm"/>
|
||||
</customErrors>
|
||||
-->
|
||||
</system.web>
|
||||
</configuration>
|
|
@ -0,0 +1,50 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<!--
|
||||
For more information on how to configure your ASP.NET application, please visit
|
||||
https://go.microsoft.com/fwlink/?LinkId=169433
|
||||
-->
|
||||
<configuration>
|
||||
<!--
|
||||
For a description of web.config changes see http://go.microsoft.com/fwlink/?LinkId=235367.
|
||||
|
||||
The following attributes can be set on the <httpRuntime> tag.
|
||||
<system.Web>
|
||||
<httpRuntime targetFramework="4.6" />
|
||||
</system.Web>
|
||||
-->
|
||||
|
||||
<system.web>
|
||||
<compilation debug="true" targetFramework="4.6.2" />
|
||||
<httpRuntime targetFramework="4.5.2" />
|
||||
</system.web>
|
||||
<runtime>
|
||||
<assemblyBinding xmlns="urn:schemas-microsoft-com:asm.v1">
|
||||
<dependentAssembly>
|
||||
<assemblyIdentity name="Newtonsoft.Json" publicKeyToken="30ad4fe6b2a6aeed" culture="neutral" />
|
||||
<bindingRedirect oldVersion="0.0.0.0-9.0.0.0" newVersion="9.0.0.0" />
|
||||
</dependentAssembly>
|
||||
<dependentAssembly>
|
||||
<assemblyIdentity name="Microsoft.WindowsAzure.Storage" publicKeyToken="31bf3856ad364e35" culture="neutral" />
|
||||
<bindingRedirect oldVersion="0.0.0.0-8.1.1.0" newVersion="8.1.1.0" />
|
||||
</dependentAssembly>
|
||||
<dependentAssembly>
|
||||
<assemblyIdentity name="Microsoft.Azure.KeyVault.Core" publicKeyToken="31bf3856ad364e35" culture="neutral" />
|
||||
<bindingRedirect oldVersion="0.0.0.0-2.0.0.0" newVersion="2.0.0.0" />
|
||||
</dependentAssembly>
|
||||
<dependentAssembly>
|
||||
<assemblyIdentity name="System.Reactive.Core" publicKeyToken="94bc3704cddfc263" culture="neutral" />
|
||||
<bindingRedirect oldVersion="0.0.0.0-3.0.3000.0" newVersion="3.0.3000.0" />
|
||||
</dependentAssembly>
|
||||
<dependentAssembly>
|
||||
<assemblyIdentity name="System.Net.Http" publicKeyToken="b03f5f7f11d50a3a" culture="neutral" />
|
||||
<bindingRedirect oldVersion="0.0.0.0-4.1.1.0" newVersion="4.1.1.0" />
|
||||
</dependentAssembly>
|
||||
</assemblyBinding>
|
||||
</runtime>
|
||||
<system.codedom>
|
||||
<compilers>
|
||||
<compiler language="c#;cs;csharp" extension=".cs" type="Microsoft.CodeDom.Providers.DotNetCompilerPlatform.CSharpCodeProvider, Microsoft.CodeDom.Providers.DotNetCompilerPlatform, Version=1.0.4.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35" warningLevel="4" compilerOptions="/langversion:7 /nowarn:1659;1699;1701" />
|
||||
<compiler language="vb;vbs;visualbasic;vbscript" extension=".vb" type="Microsoft.CodeDom.Providers.DotNetCompilerPlatform.VBCodeProvider, Microsoft.CodeDom.Providers.DotNetCompilerPlatform, Version=1.0.4.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35" warningLevel="4" compilerOptions="/langversion:15 /nowarn:41008 /define:_MYTYPE=\"Web\" /optionInfer+" />
|
||||
</compilers>
|
||||
</system.codedom>
|
||||
</configuration>
|
|
@ -0,0 +1,3 @@
|
|||
{
|
||||
"id": "ce294cb2fbbc45d7a3473d6160d08a7c"
|
||||
}
|
|
@ -0,0 +1,29 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<packages>
|
||||
<package id="HtmlAgilityPack" version="1.4.9.5" targetFramework="net452" />
|
||||
<package id="Microsoft.ApplicationInsights" version="2.3.0" targetFramework="net462" />
|
||||
<package id="Microsoft.Azure.KeyVault" version="2.0.6" targetFramework="net462" />
|
||||
<package id="Microsoft.Azure.KeyVault.Core" version="2.0.4" targetFramework="net452" />
|
||||
<package id="Microsoft.Azure.KeyVault.WebKey" version="2.0.5" targetFramework="net462" />
|
||||
<package id="Microsoft.Azure.WebJobs" version="2.0.0" targetFramework="net452" />
|
||||
<package id="Microsoft.Azure.WebJobs.Core" version="2.0.0" targetFramework="net452" />
|
||||
<package id="Microsoft.CodeDom.Providers.DotNetCompilerPlatform" version="1.0.4" targetFramework="net462" />
|
||||
<package id="Microsoft.Data.Edm" version="5.8.2" targetFramework="net452" />
|
||||
<package id="Microsoft.Data.OData" version="5.8.2" targetFramework="net452" />
|
||||
<package id="Microsoft.Data.Services.Client" version="5.8.2" targetFramework="net452" />
|
||||
<package id="Microsoft.IdentityModel.Clients.ActiveDirectory" version="3.13.9" targetFramework="net462" />
|
||||
<package id="Microsoft.Net.Compilers" version="2.1.0" targetFramework="net462" developmentDependency="true" />
|
||||
<package id="Microsoft.Rest.ClientRuntime" version="2.3.7" targetFramework="net462" />
|
||||
<package id="Microsoft.Rest.ClientRuntime.Azure" version="3.3.6" targetFramework="net462" />
|
||||
<package id="Newtonsoft.Json" version="9.0.1" targetFramework="net462" />
|
||||
<package id="System.ComponentModel.EventBasedAsync" version="4.3.0" targetFramework="net452" />
|
||||
<package id="System.Dynamic.Runtime" version="4.3.0" targetFramework="net452" />
|
||||
<package id="System.Linq.Queryable" version="4.3.0" targetFramework="net452" />
|
||||
<package id="System.Net.Requests" version="4.3.0" targetFramework="net452" />
|
||||
<package id="System.Security.Cryptography.Algorithms" version="4.3.0" targetFramework="net462" />
|
||||
<package id="System.Security.Cryptography.Encoding" version="4.3.0" targetFramework="net462" />
|
||||
<package id="System.Security.Cryptography.Primitives" version="4.3.0" targetFramework="net462" />
|
||||
<package id="System.Security.Cryptography.X509Certificates" version="4.3.0" targetFramework="net462" />
|
||||
<package id="System.Spatial" version="5.8.2" targetFramework="net452" />
|
||||
<package id="WindowsAzure.Storage" version="8.1.1" targetFramework="net452" />
|
||||
</packages>
|
|
@ -0,0 +1,18 @@
|
|||
Install Azure Function CLI tools
|
||||
|
||||
|
||||
npm install -g azure-functions-cli
|
||||
|
||||
Run on command line from Crawl directory
|
||||
|
||||
%AppData%\npm\func run .
|
||||
|
||||
|
||||
You can attach VS to func.exe.
|
||||
|
||||
appsettings.json has the Configuration Manager settings.
|
||||
|
||||
(Invoke-WebRequest -Method Post -Body $r.Content -ContentType 'application/json' 'http://localhost:7071/api/CognitiveServiceVision').Content
|
||||
|
||||
|
||||
curl -v -X POST http://localhost:7071/api/CognitiveServiceVision -H "Content-Type: application/json" -d @vision.json -H "Accept: application/json"
|
Загрузка…
Ссылка в новой задаче