Add files via upload
This commit is contained in:
Родитель
e7c441a430
Коммит
a9f87fe672
|
@ -0,0 +1,111 @@
|
||||||
|
using Azure;
|
||||||
|
using Azure.Analytics.Purview.Catalog;
|
||||||
|
using Azure.Core;
|
||||||
|
using DGCM.Purview.Engine.Models;
|
||||||
|
using System;
|
||||||
|
using System.Collections.Concurrent;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Net;
|
||||||
|
using System.Text.Json;
|
||||||
|
using System.Threading;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
namespace DGCM.Purview.Engine.Services {
|
||||||
|
public class PurviewService : IPurviewService {
|
||||||
|
private readonly PurviewCatalogClient _purviewClient;
|
||||||
|
public readonly int MAX_RESULTS_PER_PAGE = 1000;
|
||||||
|
public PurviewService(PurviewCatalogClient purviewClient) {
|
||||||
|
_purviewClient = purviewClient;
|
||||||
|
}
|
||||||
|
public async Task<PurviewSearchResult> SearchByKeywords(string keywords,
|
||||||
|
int limit = 50,
|
||||||
|
int offset = 0) {
|
||||||
|
var searchSchema = new PurviewSearchParameters {
|
||||||
|
Keywords = keywords,
|
||||||
|
Limit = limit,
|
||||||
|
Offset = offset,
|
||||||
|
};
|
||||||
|
var serializedSearchSchema = RequestContent.Create(searchSchema);
|
||||||
|
var response = await _purviewClient.SearchAsync(serializedSearchSchema);
|
||||||
|
if (response.Status != (int)HttpStatusCode.OK) {
|
||||||
|
throw new Exception("Purview's SearchAsync has failed!");
|
||||||
|
}
|
||||||
|
return await JsonSerializer.DeserializeAsync<PurviewSearchResult>(
|
||||||
|
response.Content.ToStream());
|
||||||
|
}
|
||||||
|
public async Task<PurviewSearchResult> GetAll() {
|
||||||
|
var result = await SearchByKeywords("*", MAX_RESULTS_PER_PAGE, 0);
|
||||||
|
if (result.SearchCount < MAX_RESULTS_PER_PAGE) {
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
int remainingPages =
|
||||||
|
(int)Math.Ceiling(result.SearchCount / (double)MAX_RESULTS_PER_PAGE) -
|
||||||
|
1; // TODO: Remove this after Purview fixes the offset limit.
|
||||||
|
remainingPages = Math.Min(remainingPages, 100);
|
||||||
|
var searchTasks =
|
||||||
|
(Enumerable.Range(1, remainingPages)).Select(async i => {
|
||||||
|
var page = await SearchByKeywords("*", MAX_RESULTS_PER_PAGE,
|
||||||
|
i * MAX_RESULTS_PER_PAGE);
|
||||||
|
result.Value.AddRange(page.Value);
|
||||||
|
});
|
||||||
|
await Task.WhenAll(searchTasks);
|
||||||
|
return result;
|
||||||
|
} /// <inheritdoc/>
|
||||||
|
public async Task<PurviewGetByGuidResult> GetEntityById(string id) {
|
||||||
|
try {
|
||||||
|
var response = await _purviewClient.Entities.GetByGuidAsync(
|
||||||
|
id, new RequestOptions());
|
||||||
|
return await JsonSerializer.DeserializeAsync<PurviewGetByGuidResult>(
|
||||||
|
response.Content.ToStream());
|
||||||
|
} catch (RequestFailedException ex) {
|
||||||
|
throw new RequestFailedException(
|
||||||
|
"Purview's GetByGuidAsync has failed! Does the provided GUID exist?",
|
||||||
|
ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
public async Task<IEnumerable<PurviewCompleteEntity>> GetAllEnriched(
|
||||||
|
Action<string> progressCallback = null, int maxConcurrentTasks = 250) {
|
||||||
|
progressCallback("starting Purview search");
|
||||||
|
var purviewSearchResult = await GetAll();
|
||||||
|
progressCallback(
|
||||||
|
$"Purview search done: {purviewSearchResult.Value.Count} results fetched");
|
||||||
|
var completeEntities = new ConcurrentBag<PurviewCompleteEntity>();
|
||||||
|
using (var semaphore =
|
||||||
|
new SemaphoreSlim(initialCount: maxConcurrentTasks)) {
|
||||||
|
int count = 0;
|
||||||
|
var t0 = DateTime.Now.TimeOfDay;
|
||||||
|
progressCallback("starting GetEntityById");
|
||||||
|
ConcurrentBag<Task> tasks = new ConcurrentBag<Task>();
|
||||||
|
foreach (var item in purviewSearchResult.Value) {
|
||||||
|
semaphore.Wait();
|
||||||
|
var t = Task.Factory.StartNew(async () => {
|
||||||
|
try {
|
||||||
|
var getByIdResult = await GetEntityById(item.Id);
|
||||||
|
completeEntities.Add(
|
||||||
|
PurviewCompleteEntity.MergeEntityAndSearchResult(
|
||||||
|
getByIdResult.Entity, item));
|
||||||
|
} catch (RequestFailedException) {
|
||||||
|
var getByIdResult = new PurviewGetByGuidResult() {
|
||||||
|
Entity = new PurviewEntity { Guid = item.Id }
|
||||||
|
};
|
||||||
|
completeEntities.Add(
|
||||||
|
PurviewCompleteEntity.MergeEntityAndSearchResult(
|
||||||
|
getByIdResult.Entity, item));
|
||||||
|
} finally {
|
||||||
|
if (count % 100 == 0 && count != 0) {
|
||||||
|
progressCallback(
|
||||||
|
$"{count}/{purviewSearchResult.Value.Count} details fetched - {(DateTime.Now.TimeOfDay - t0).TotalSeconds}s");
|
||||||
|
t0 = DateTime.Now.TimeOfDay;
|
||||||
|
}
|
||||||
|
count++;
|
||||||
|
semaphore.Release();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
tasks.Add(t);
|
||||||
|
}
|
||||||
|
await Task.WhenAll(tasks);
|
||||||
|
}
|
||||||
|
return completeEntities;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Загрузка…
Ссылка в новой задаче