Merge pull request #29 from krishanjmistry/simplified-ingest-client
This commit is contained in:
Коммит
e1dca0af14
|
@ -1,3 +1,3 @@
|
||||||
[workspace]
|
[workspace]
|
||||||
members = ["azure-kusto-data"]
|
members = ["azure-kusto-data", "azure-kusto-ingest"]
|
||||||
resolver = "2"
|
resolver = "2"
|
||||||
|
|
|
@ -0,0 +1,26 @@
|
||||||
|
[package]
|
||||||
|
name = "azure-kusto-ingest"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
azure-kusto-data = { path = "../azure-kusto-data", default-features = false }
|
||||||
|
# Azure SDK for Rust crates versions must be kept in sync
|
||||||
|
azure_core = "0.19"
|
||||||
|
azure_storage = "0.19"
|
||||||
|
azure_storage_blobs = "0.19"
|
||||||
|
azure_storage_queues = "0.19"
|
||||||
|
|
||||||
|
async-lock = "3"
|
||||||
|
rand = "0.8"
|
||||||
|
serde = { version = "1", features = ["serde_derive"] }
|
||||||
|
serde_json = "1"
|
||||||
|
thiserror = "1"
|
||||||
|
time = { version = "0.3", features = ["serde-human-readable", "macros"] }
|
||||||
|
url = "2"
|
||||||
|
uuid = { version = "1", features = ["v4", "serde"] }
|
||||||
|
|
||||||
|
[dev-dependencies]
|
||||||
|
tokio = { version = "1", features = ["macros", "rt-multi-thread"] }
|
|
@ -0,0 +1,61 @@
|
||||||
|
use std::env;
|
||||||
|
|
||||||
|
use azure_kusto_data::prelude::{ConnectionString, KustoClient, KustoClientOptions};
|
||||||
|
use azure_kusto_ingest::data_format::DataFormat;
|
||||||
|
use azure_kusto_ingest::descriptors::{BlobAuth, BlobDescriptor};
|
||||||
|
use azure_kusto_ingest::ingestion_properties::IngestionProperties;
|
||||||
|
use azure_kusto_ingest::queued_ingest::QueuedIngestClient;
|
||||||
|
|
||||||
|
/// Example of ingesting data into Kusto from Azure Blob Storage using managed identities.
|
||||||
|
/// This example enforces that the Kusto cluster has a system assigned managed identity with access to the storage account
|
||||||
|
///
|
||||||
|
/// There are some steps that need to be taken to allow for managed identities to work:
|
||||||
|
/// - Permissions as the ingestor to initiate ingestion
|
||||||
|
/// https://learn.microsoft.com/en-us/azure/data-explorer/kusto/api/netfx/kusto-ingest-client-permissions
|
||||||
|
/// - Permissions for Kusto to access storage
|
||||||
|
/// https://learn.microsoft.com/en-us/azure/data-explorer/ingest-data-managed-identity
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
||||||
|
let cluster_ingest_uri = env::var("KUSTO_INGEST_URI").expect("Must define KUSTO_INGEST_URI");
|
||||||
|
let user_mi_object_id =
|
||||||
|
env::var("KUSTO_USER_MI_OBJECT_ID").expect("Must define KUSTO_USER_MI_OBJECT_ID");
|
||||||
|
|
||||||
|
// Create a Kusto client with managed identity authentication via the user assigned identity
|
||||||
|
let kusto_client = KustoClient::new(
|
||||||
|
ConnectionString::with_managed_identity_auth(cluster_ingest_uri, Some(user_mi_object_id)),
|
||||||
|
KustoClientOptions::default(),
|
||||||
|
)?;
|
||||||
|
|
||||||
|
// Create a queued ingest client
|
||||||
|
let queued_ingest_client = QueuedIngestClient::new(kusto_client);
|
||||||
|
|
||||||
|
// Define ingestion properties
|
||||||
|
let ingestion_properties = IngestionProperties {
|
||||||
|
database_name: env::var("KUSTO_DATABASE_NAME").expect("Must define KUSTO_DATABASE_NAME"),
|
||||||
|
table_name: env::var("KUSTO_TABLE_NAME").expect("Must define KUSTO_TABLE_NAME"),
|
||||||
|
// Don't delete the blob on successful ingestion
|
||||||
|
retain_blob_on_success: Some(true),
|
||||||
|
// File format of the blob is Parquet
|
||||||
|
data_format: DataFormat::Parquet,
|
||||||
|
// Assume the server side default for flush_immediately
|
||||||
|
flush_immediately: None,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Define the blob to ingest from
|
||||||
|
let blob_uri = env::var("BLOB_URI").expect("Must define BLOB_URI");
|
||||||
|
// Define the size of the blob if known, this improves ingestion performance as Kusto does not need to access the blob to determine the size
|
||||||
|
let blob_size: Option<u64> = match env::var("BLOB_SIZE") {
|
||||||
|
Ok(blob_size) => Some(blob_size.parse().expect("BLOB_SIZE must be a valid u64")),
|
||||||
|
Err(_) => None,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Create the blob descriptor, also specifying that the blob should be accessed using the system assigned managed identity of the Kusto cluster
|
||||||
|
let blob_descriptor = BlobDescriptor::new(blob_uri, blob_size, None)
|
||||||
|
.with_blob_auth(BlobAuth::SystemAssignedManagedIdentity);
|
||||||
|
|
||||||
|
let _ = queued_ingest_client
|
||||||
|
.ingest_from_blob(blob_descriptor, ingestion_properties)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
|
@ -0,0 +1,51 @@
|
||||||
|
use azure_core::ClientOptions;
|
||||||
|
|
||||||
|
/// Allows configurability of ClientOptions for the storage clients used within [QueuedIngestClient](crate::queued_ingest::QueuedIngestClient)
|
||||||
|
#[derive(Clone, Default)]
|
||||||
|
pub struct QueuedIngestClientOptions {
|
||||||
|
pub queue_service_options: ClientOptions,
|
||||||
|
pub blob_service_options: ClientOptions,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<ClientOptions> for QueuedIngestClientOptions {
|
||||||
|
/// Creates a `QueuedIngestClientOptions` struct where the same [ClientOptions] are used for all services
|
||||||
|
fn from(client_options: ClientOptions) -> Self {
|
||||||
|
Self {
|
||||||
|
queue_service_options: client_options.clone(),
|
||||||
|
blob_service_options: client_options,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Builder for [QueuedIngestClientOptions], call `build()` to create the [QueuedIngestClientOptions]
|
||||||
|
#[derive(Clone, Default)]
|
||||||
|
pub struct QueuedIngestClientOptionsBuilder {
|
||||||
|
queue_service_options: ClientOptions,
|
||||||
|
blob_service_options: ClientOptions,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl QueuedIngestClientOptionsBuilder {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
queue_service_options: ClientOptions::default(),
|
||||||
|
blob_service_options: ClientOptions::default(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn with_queue_service_options(mut self, queue_service_options: ClientOptions) -> Self {
|
||||||
|
self.queue_service_options = queue_service_options;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn with_blob_service_options(mut self, blob_service_options: ClientOptions) -> Self {
|
||||||
|
self.blob_service_options = blob_service_options;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn build(self) -> QueuedIngestClientOptions {
|
||||||
|
QueuedIngestClientOptions {
|
||||||
|
queue_service_options: self.queue_service_options,
|
||||||
|
blob_service_options: self.blob_service_options,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,37 @@
|
||||||
|
use serde::Serialize;
|
||||||
|
|
||||||
|
/// All data formats supported by Kusto.
|
||||||
|
/// Default is [DataFormat::CSV]
|
||||||
|
#[derive(Serialize, Clone, Debug, Default, PartialEq)]
|
||||||
|
#[serde(rename_all = "lowercase")]
|
||||||
|
pub enum DataFormat {
|
||||||
|
ApacheAvro,
|
||||||
|
Avro,
|
||||||
|
#[default]
|
||||||
|
CSV,
|
||||||
|
JSON,
|
||||||
|
MultiJSON,
|
||||||
|
ORC,
|
||||||
|
Parquet,
|
||||||
|
PSV,
|
||||||
|
RAW,
|
||||||
|
SCSV,
|
||||||
|
SOHsv,
|
||||||
|
SingleJSON,
|
||||||
|
SStream,
|
||||||
|
TSV,
|
||||||
|
TSVe,
|
||||||
|
TXT,
|
||||||
|
W3CLOGFILE,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Unit tests
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn data_format_default() {
|
||||||
|
assert_eq!(DataFormat::default(), DataFormat::CSV);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,148 @@
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
/// Encapsulates the information related to a blob that is required to ingest from a blob
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct BlobDescriptor {
|
||||||
|
uri: String,
|
||||||
|
pub(crate) size: Option<u64>,
|
||||||
|
pub(crate) source_id: Uuid,
|
||||||
|
/// Authentication information for the blob; when [None], the uri is passed through as is
|
||||||
|
blob_auth: Option<BlobAuth>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl BlobDescriptor {
|
||||||
|
/// Create a new BlobDescriptor.
|
||||||
|
///
|
||||||
|
/// Parameters:
|
||||||
|
/// - `uri`: the uri of the blob to ingest from, note you can use the optional helper method `with_blob_auth` to add authentication information to the uri
|
||||||
|
/// - `size`: although the size is not required, providing it is recommended as it allows Kusto to better plan the ingestion process
|
||||||
|
/// - `source_id`: optional, useful if tracking ingestion status, if not provided, a random uuid will be generated
|
||||||
|
pub fn new(uri: impl Into<String>, size: Option<u64>, source_id: Option<Uuid>) -> Self {
|
||||||
|
let source_id = match source_id {
|
||||||
|
Some(source_id) => source_id,
|
||||||
|
None => Uuid::new_v4(),
|
||||||
|
};
|
||||||
|
|
||||||
|
Self {
|
||||||
|
uri: uri.into(),
|
||||||
|
size,
|
||||||
|
source_id,
|
||||||
|
blob_auth: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Mutator to modify the authentication information of the BlobDescriptor
|
||||||
|
pub fn with_blob_auth(mut self, blob_auth: BlobAuth) -> Self {
|
||||||
|
self.blob_auth = Some(blob_auth);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the uri with the authentication information concatenated, ready to be serialized into the ingestion message
|
||||||
|
pub(crate) fn uri(&self) -> String {
|
||||||
|
match &self.blob_auth {
|
||||||
|
Some(BlobAuth::SASToken(sas_token)) => {
|
||||||
|
format!("{}?{}", self.uri, sas_token.as_str())
|
||||||
|
}
|
||||||
|
Some(BlobAuth::UserAssignedManagedIdentity(object_id)) => {
|
||||||
|
format!("{};managed_identity={}", self.uri, object_id)
|
||||||
|
}
|
||||||
|
Some(BlobAuth::SystemAssignedManagedIdentity) => {
|
||||||
|
format!("{};managed_identity=system", self.uri)
|
||||||
|
}
|
||||||
|
None => self.uri.to_string(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Helper for adding authentication information to a blob path in the format expected by Kusto
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub enum BlobAuth {
|
||||||
|
/// adds `?<sas_token>` to the blob path
|
||||||
|
SASToken(String),
|
||||||
|
/// adds `;managed_identity=<identity>` to the blob path
|
||||||
|
UserAssignedManagedIdentity(String),
|
||||||
|
/// adds `;managed_identity=system` to the blob path
|
||||||
|
SystemAssignedManagedIdentity,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Custom impl of Debug to avoid leaking sensitive information
|
||||||
|
impl std::fmt::Debug for BlobAuth {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
match self {
|
||||||
|
BlobAuth::SASToken(_) => f.debug_struct("SASToken").finish(),
|
||||||
|
BlobAuth::UserAssignedManagedIdentity(object_id) => f
|
||||||
|
.debug_struct("UserAssignedManagedIdentity")
|
||||||
|
.field("object_id", object_id)
|
||||||
|
.finish(),
|
||||||
|
BlobAuth::SystemAssignedManagedIdentity => {
|
||||||
|
f.debug_struct("SystemAssignedManagedIdentity").finish()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn blob_descriptor_with_no_auth_modification() {
|
||||||
|
let uri = "https://mystorageaccount.blob.core.windows.net/mycontainer/myblob";
|
||||||
|
let blob_descriptor = BlobDescriptor::new(uri, None, None);
|
||||||
|
|
||||||
|
assert_eq!(blob_descriptor.uri(), uri);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn blob_descriptor_with_sas_token() {
|
||||||
|
let uri = "https://mystorageaccount.blob.core.windows.net/mycontainer/myblob";
|
||||||
|
let sas_token = "my_sas_token";
|
||||||
|
let blob_descriptor = BlobDescriptor::new(uri, None, None)
|
||||||
|
.with_blob_auth(BlobAuth::SASToken(sas_token.to_string()));
|
||||||
|
|
||||||
|
assert_eq!(blob_descriptor.uri(), format!("{uri}?{sas_token}"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn blob_descriptor_with_user_assigned_managed_identity() {
|
||||||
|
let uri = "https://mystorageaccount.blob.core.windows.net/mycontainer/myblob";
|
||||||
|
let object_id = "my_object_id";
|
||||||
|
let blob_descriptor = BlobDescriptor::new(uri, None, None)
|
||||||
|
.with_blob_auth(BlobAuth::UserAssignedManagedIdentity(object_id.to_string()));
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
blob_descriptor.uri(),
|
||||||
|
format!("{uri};managed_identity={object_id}")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn blob_descriptor_with_system_assigned_managed_identity() {
|
||||||
|
let uri = "https://mystorageaccount.blob.core.windows.net/mycontainer/myblob";
|
||||||
|
let blob_descriptor = BlobDescriptor::new(uri, None, None)
|
||||||
|
.with_blob_auth(BlobAuth::SystemAssignedManagedIdentity);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
blob_descriptor.uri(),
|
||||||
|
format!("{uri};managed_identity=system")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn blob_descriptor_with_size() {
|
||||||
|
let uri = "https://mystorageaccount.blob.core.windows.net/mycontainer/myblob";
|
||||||
|
let size = 123;
|
||||||
|
let blob_descriptor = BlobDescriptor::new(uri, Some(size), None);
|
||||||
|
|
||||||
|
assert_eq!(blob_descriptor.size, Some(size));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn blob_descriptor_with_source_id() {
|
||||||
|
let uri = "https://mystorageaccount.blob.core.windows.net/mycontainer/myblob";
|
||||||
|
let source_id = Uuid::new_v4();
|
||||||
|
let blob_descriptor = BlobDescriptor::new(uri, None, Some(source_id));
|
||||||
|
|
||||||
|
assert_eq!(blob_descriptor.source_id, source_id);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,20 @@
|
||||||
|
//! Defines [Error] for representing failures in various operations.
|
||||||
|
|
||||||
|
/// Error type for kusto ingestion operations.
|
||||||
|
#[derive(thiserror::Error, Debug)]
|
||||||
|
pub enum Error {
|
||||||
|
/// Error raised when failing to obtain ingestion resources.
|
||||||
|
#[error("Error obtaining ingestion resources: {0}")]
|
||||||
|
ResourceManagerError(#[from] super::resource_manager::ResourceManagerError),
|
||||||
|
|
||||||
|
/// Error relating to (de-)serialization of JSON data
|
||||||
|
#[error("Error in JSON serialization/deserialization: {0}")]
|
||||||
|
JsonError(#[from] serde_json::Error),
|
||||||
|
|
||||||
|
/// Error occurring within core azure crates
|
||||||
|
#[error("Error in azure-core: {0}")]
|
||||||
|
AzureError(#[from] azure_core::error::Error),
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Result type for kusto ingest operations.
|
||||||
|
pub type Result<T> = std::result::Result<T, Error>;
|
|
@ -0,0 +1,119 @@
|
||||||
|
use serde::Serialize;
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
data_format::DataFormat, descriptors::BlobDescriptor,
|
||||||
|
ingestion_properties::IngestionProperties,
|
||||||
|
resource_manager::authorization_context::KustoIdentityToken,
|
||||||
|
};
|
||||||
|
|
||||||
|
use time::{
|
||||||
|
format_description::well_known::{iso8601, Iso8601},
|
||||||
|
OffsetDateTime,
|
||||||
|
};
|
||||||
|
/// The [DEFAULT](iso8601::Config::DEFAULT) ISO8601 format that the time crate serializes to uses a 6 digit year,
|
||||||
|
/// Here we create our own serializer function that uses a 4 digit year which is exposed as `kusto_ingest_iso8601_format`
|
||||||
|
const CONFIG: iso8601::EncodedConfig = iso8601::Config::DEFAULT
|
||||||
|
.set_year_is_six_digits(false)
|
||||||
|
.encode();
|
||||||
|
const FORMAT: Iso8601<CONFIG> = Iso8601::<CONFIG>;
|
||||||
|
time::serde::format_description!(kusto_ingest_iso8601_format, OffsetDateTime, FORMAT);
|
||||||
|
|
||||||
|
/// Message to be serialized as JSON and sent to the ingestion queue
|
||||||
|
///
|
||||||
|
/// Basing the ingestion message on
|
||||||
|
/// https://learn.microsoft.com/en-us/azure/data-explorer/kusto/api/netfx/kusto-ingest-client-rest#ingestion-message-internal-structure
|
||||||
|
#[derive(Serialize, Debug)]
|
||||||
|
#[serde(rename_all = "PascalCase")]
|
||||||
|
pub(crate) struct QueuedIngestionMessage {
|
||||||
|
/// Message identifier for this upload
|
||||||
|
id: Uuid,
|
||||||
|
/// Path (URI) to the blob.
|
||||||
|
/// This should include any SAS token required to access the blob, or hints to use managed identity auth.
|
||||||
|
/// Extra permissions are required if the `RetainBlobOnSuccess` option is not true so that the ingestion service can delete the blob once it has completed ingesting the data.
|
||||||
|
blob_path: String,
|
||||||
|
// Name of the Kusto database the data will ingest into
|
||||||
|
database_name: String,
|
||||||
|
// Name of the Kusto table the the data will ingest into
|
||||||
|
table_name: String,
|
||||||
|
/// Size of the uncompressed data in bytes.
|
||||||
|
/// Providing this value allows the ingestion service to optimize ingestion by potentially aggregating multiple blobs.
|
||||||
|
/// Although this property is optional, it is recommended to provide the size as otherwise the service will access the blob just to retrieve the size.
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
raw_data_size: Option<u64>,
|
||||||
|
/// If set to `true`, the blob won't be deleted once ingestion is successfully completed.
|
||||||
|
/// Default is `false` when this property is not specified. Note that this has implications on permissions required against the blob.
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
retain_blob_on_success: Option<bool>,
|
||||||
|
/// If set to `true`, any server side aggregation will be skipped - thus overriding the batching policy. Default is `false`.
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
flush_immediately: Option<bool>,
|
||||||
|
#[serde(with = "kusto_ingest_iso8601_format")]
|
||||||
|
source_message_creation_time: OffsetDateTime,
|
||||||
|
// source_message_creation_time: DateTime<Utc>,
|
||||||
|
// Extra properties added to the ingestion command
|
||||||
|
additional_properties: AdditionalProperties,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl QueuedIngestionMessage {
|
||||||
|
pub(crate) fn new(
|
||||||
|
blob_descriptor: &BlobDescriptor,
|
||||||
|
ingestion_properties: &IngestionProperties,
|
||||||
|
authorization_context: KustoIdentityToken,
|
||||||
|
) -> Self {
|
||||||
|
let additional_properties = AdditionalProperties {
|
||||||
|
authorization_context,
|
||||||
|
data_format: ingestion_properties.data_format.clone(),
|
||||||
|
};
|
||||||
|
|
||||||
|
Self {
|
||||||
|
id: blob_descriptor.source_id,
|
||||||
|
blob_path: blob_descriptor.uri(),
|
||||||
|
raw_data_size: blob_descriptor.size,
|
||||||
|
database_name: ingestion_properties.database_name.clone(),
|
||||||
|
table_name: ingestion_properties.table_name.clone(),
|
||||||
|
retain_blob_on_success: ingestion_properties.retain_blob_on_success,
|
||||||
|
flush_immediately: ingestion_properties.flush_immediately,
|
||||||
|
source_message_creation_time: OffsetDateTime::now_utc(),
|
||||||
|
additional_properties,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Additional properties to be added to the ingestion message
|
||||||
|
/// This struct is modelled on: https://learn.microsoft.com/en-us/azure/data-explorer/ingestion-properties
|
||||||
|
#[derive(Serialize, Clone, Debug)]
|
||||||
|
struct AdditionalProperties {
|
||||||
|
/// Authorization string obtained from Kusto to allow for ingestion
|
||||||
|
#[serde(rename = "authorizationContext")]
|
||||||
|
authorization_context: KustoIdentityToken,
|
||||||
|
#[serde(rename = "format")]
|
||||||
|
data_format: DataFormat,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn time_custom_iso8601_serialization() {
|
||||||
|
#[derive(Serialize, Debug)]
|
||||||
|
struct TestTimeSerialize {
|
||||||
|
#[serde(with = "kusto_ingest_iso8601_format")]
|
||||||
|
customised_time_format: time::OffsetDateTime,
|
||||||
|
}
|
||||||
|
|
||||||
|
let test_message = TestTimeSerialize {
|
||||||
|
customised_time_format: time::OffsetDateTime::from_unix_timestamp_nanos(
|
||||||
|
1_234_567_890_123_456_789,
|
||||||
|
)
|
||||||
|
.unwrap(),
|
||||||
|
};
|
||||||
|
|
||||||
|
let serialized_message = serde_json::to_string(&test_message).unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
serialized_message,
|
||||||
|
"{\"customised_time_format\":\"2009-02-13T23:31:30.123456789Z\"}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,18 @@
|
||||||
|
use crate::data_format::DataFormat;
|
||||||
|
|
||||||
|
/// Properties of ingestion that can be used when ingesting data into Kusto allowing for customisation of the ingestion process
|
||||||
|
#[derive(Clone, Debug, Default)]
|
||||||
|
pub struct IngestionProperties {
|
||||||
|
/// Name of the database to ingest into
|
||||||
|
pub database_name: String,
|
||||||
|
/// Name of the table to ingest into
|
||||||
|
pub table_name: String,
|
||||||
|
/// Whether the blob is retained after ingestion.
|
||||||
|
/// Note that the default when not provided is `false`, meaning that Kusto will attempt to delete the blob upon ingestion.
|
||||||
|
/// This will only be successful if provided sufficient permissions on the blob
|
||||||
|
pub retain_blob_on_success: Option<bool>,
|
||||||
|
/// Format of the data being ingested
|
||||||
|
pub data_format: DataFormat,
|
||||||
|
/// If set to `true`, any aggregation will be skipped. Default is `false`
|
||||||
|
pub flush_immediately: Option<bool>,
|
||||||
|
}
|
|
@ -0,0 +1,8 @@
|
||||||
|
pub mod client_options;
|
||||||
|
pub mod data_format;
|
||||||
|
pub mod descriptors;
|
||||||
|
pub mod error;
|
||||||
|
pub(crate) mod ingestion_blob_info;
|
||||||
|
pub mod ingestion_properties;
|
||||||
|
pub mod queued_ingest;
|
||||||
|
pub(crate) mod resource_manager;
|
|
@ -0,0 +1,62 @@
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use crate::error::Result;
|
||||||
|
use azure_core::base64;
|
||||||
|
use azure_kusto_data::prelude::KustoClient;
|
||||||
|
|
||||||
|
use crate::client_options::QueuedIngestClientOptions;
|
||||||
|
use crate::descriptors::BlobDescriptor;
|
||||||
|
use crate::ingestion_blob_info::QueuedIngestionMessage;
|
||||||
|
use crate::ingestion_properties::IngestionProperties;
|
||||||
|
use crate::resource_manager::ResourceManager;
|
||||||
|
|
||||||
|
/// Client for ingesting data into Kusto using the queued flavour of ingestion
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct QueuedIngestClient {
|
||||||
|
resource_manager: Arc<ResourceManager>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl QueuedIngestClient {
|
||||||
|
/// Creates a new client from the given [KustoClient].
|
||||||
|
///
|
||||||
|
/// **WARNING**: the [KustoClient] must be created with a connection string that points to the ingestion endpoint
|
||||||
|
pub fn new(kusto_client: KustoClient) -> Self {
|
||||||
|
Self::new_with_client_options(kusto_client, QueuedIngestClientOptions::default())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Creates a new client from the given [KustoClient] and [QueuedIngestClientOptions]
|
||||||
|
/// This allows for customisation of the [ClientOptions] used for the storage clients
|
||||||
|
///
|
||||||
|
/// **WARNING**: the [KustoClient] must be created with a connection string that points to the ingestion endpoint
|
||||||
|
pub fn new_with_client_options(
|
||||||
|
kusto_client: KustoClient,
|
||||||
|
options: QueuedIngestClientOptions,
|
||||||
|
) -> Self {
|
||||||
|
Self {
|
||||||
|
resource_manager: Arc::new(ResourceManager::new(kusto_client, options)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Ingest a file into Kusto from Azure Blob Storage
|
||||||
|
pub async fn ingest_from_blob(
|
||||||
|
&self,
|
||||||
|
blob_descriptor: BlobDescriptor,
|
||||||
|
ingestion_properties: IngestionProperties,
|
||||||
|
) -> Result<()> {
|
||||||
|
let queue_client = self.resource_manager.random_ingestion_queue().await?;
|
||||||
|
|
||||||
|
let auth_context = self.resource_manager.authorization_context().await?;
|
||||||
|
|
||||||
|
let message =
|
||||||
|
QueuedIngestionMessage::new(&blob_descriptor, &ingestion_properties, auth_context);
|
||||||
|
|
||||||
|
let message = serde_json::to_string(&message)?;
|
||||||
|
|
||||||
|
// Base64 encode the ingestion message
|
||||||
|
let message = base64::encode(&message);
|
||||||
|
|
||||||
|
let _resp = queue_client.put_message(message).await?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,81 @@
|
||||||
|
use std::{sync::Arc, time::Duration};
|
||||||
|
|
||||||
|
pub mod authorization_context;
|
||||||
|
pub mod cache;
|
||||||
|
pub mod ingest_client_resources;
|
||||||
|
pub mod resource_uri;
|
||||||
|
pub mod utils;
|
||||||
|
|
||||||
|
use azure_kusto_data::prelude::KustoClient;
|
||||||
|
|
||||||
|
use azure_storage_queues::QueueClient;
|
||||||
|
|
||||||
|
use crate::client_options::QueuedIngestClientOptions;
|
||||||
|
|
||||||
|
use self::{
|
||||||
|
authorization_context::{AuthorizationContext, KustoIdentityToken},
|
||||||
|
ingest_client_resources::IngestClientResources,
|
||||||
|
};
|
||||||
|
|
||||||
|
use rand::{seq::SliceRandom, thread_rng};
|
||||||
|
|
||||||
|
pub const RESOURCE_REFRESH_PERIOD: Duration = Duration::from_secs(60 * 60);
|
||||||
|
|
||||||
|
#[derive(Debug, thiserror::Error)]
|
||||||
|
pub enum ResourceManagerError {
|
||||||
|
#[error("Failed to obtain ingestion resources: {0}")]
|
||||||
|
IngestClientResourcesError(#[from] ingest_client_resources::IngestionResourceError),
|
||||||
|
|
||||||
|
#[error("Failed to obtain authorization token: {0}")]
|
||||||
|
AuthorizationContextError(#[from] authorization_context::KustoIdentityTokenError),
|
||||||
|
|
||||||
|
#[error("Failed to select a resource - no resources found")]
|
||||||
|
NoResourcesFound,
|
||||||
|
}
|
||||||
|
|
||||||
|
type Result<T> = std::result::Result<T, ResourceManagerError>;
|
||||||
|
|
||||||
|
/// ResourceManager is a struct that keeps track of all the resources required for ingestion using the queued flavour
|
||||||
|
pub struct ResourceManager {
|
||||||
|
ingest_client_resources: Arc<IngestClientResources>,
|
||||||
|
authorization_context: Arc<AuthorizationContext>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ResourceManager {
|
||||||
|
/// Creates a new ResourceManager from the given [KustoClient] and the [QueuedIngestClientOptions] as provided by the user
|
||||||
|
pub fn new(client: KustoClient, client_options: QueuedIngestClientOptions) -> Self {
|
||||||
|
Self {
|
||||||
|
ingest_client_resources: Arc::new(IngestClientResources::new(
|
||||||
|
client.clone(),
|
||||||
|
client_options,
|
||||||
|
)),
|
||||||
|
authorization_context: Arc::new(AuthorizationContext::new(client)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the latest [QueueClient]s ready for posting ingestion messages to
|
||||||
|
async fn ingestion_queues(&self) -> Result<Vec<QueueClient>> {
|
||||||
|
Ok(self.ingest_client_resources.get().await?.ingestion_queues)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns a [QueueClient] to ingest to.
|
||||||
|
/// This is a random selection from the list of ingestion queues
|
||||||
|
pub async fn random_ingestion_queue(&self) -> Result<QueueClient> {
|
||||||
|
let ingestion_queues = self.ingestion_queues().await?;
|
||||||
|
|
||||||
|
let mut rng = thread_rng();
|
||||||
|
let selected_queue = ingestion_queues
|
||||||
|
.choose(&mut rng)
|
||||||
|
.ok_or(ResourceManagerError::NoResourcesFound)?;
|
||||||
|
|
||||||
|
Ok(selected_queue.clone())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the latest [KustoIdentityToken] to be added as an authorization context to ingestion messages
|
||||||
|
pub async fn authorization_context(&self) -> Result<KustoIdentityToken> {
|
||||||
|
self.authorization_context
|
||||||
|
.get()
|
||||||
|
.await
|
||||||
|
.map_err(ResourceManagerError::AuthorizationContextError)
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,103 @@
|
||||||
|
use azure_kusto_data::prelude::KustoClient;
|
||||||
|
use serde_json::Value;
|
||||||
|
|
||||||
|
use super::cache::ThreadSafeCachedValue;
|
||||||
|
use super::utils::get_column_index;
|
||||||
|
use super::RESOURCE_REFRESH_PERIOD;
|
||||||
|
|
||||||
|
pub(crate) type KustoIdentityToken = String;
|
||||||
|
|
||||||
|
const AUTHORIZATION_CONTEXT: &str = "AuthorizationContext";
|
||||||
|
|
||||||
|
#[derive(thiserror::Error, Debug)]
|
||||||
|
pub enum KustoIdentityTokenError {
|
||||||
|
#[error("Kusto expected 1 table in results, found {0}")]
|
||||||
|
ExpectedOneTable(usize),
|
||||||
|
|
||||||
|
#[error("Kusto expected 1 row in table, found {0}")]
|
||||||
|
ExpectedOneRow(usize),
|
||||||
|
|
||||||
|
#[error("Column {0} not found in table")]
|
||||||
|
ColumnNotFound(String),
|
||||||
|
|
||||||
|
#[error("Invalid JSON response from Kusto: {0:?}")]
|
||||||
|
InvalidJSONResponse(Value),
|
||||||
|
|
||||||
|
#[error("Token is empty")]
|
||||||
|
EmptyToken,
|
||||||
|
|
||||||
|
#[error(transparent)]
|
||||||
|
KustoError(#[from] azure_kusto_data::error::Error),
|
||||||
|
}
|
||||||
|
|
||||||
|
type Result<T> = std::result::Result<T, KustoIdentityTokenError>;
|
||||||
|
/// Logic to obtain a Kusto identity token from the management endpoint. This auth token is a temporary token
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub(crate) struct AuthorizationContext {
|
||||||
|
/// A client against a Kusto ingestion cluster
|
||||||
|
client: KustoClient,
|
||||||
|
/// Cache of the Kusto identity token
|
||||||
|
token_cache: ThreadSafeCachedValue<KustoIdentityToken>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl AuthorizationContext {
|
||||||
|
pub fn new(client: KustoClient) -> Self {
|
||||||
|
Self {
|
||||||
|
client,
|
||||||
|
token_cache: ThreadSafeCachedValue::new(RESOURCE_REFRESH_PERIOD),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Executes a KQL query to get the Kusto identity token from the management endpoint
|
||||||
|
async fn query_kusto_identity_token(&self) -> Result<KustoIdentityToken> {
|
||||||
|
let results = self
|
||||||
|
.client
|
||||||
|
.execute_command("NetDefaultDB", ".get kusto identity token", None)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
// Check that there is only 1 table in the results returned by the query
|
||||||
|
let table = match &results.tables[..] {
|
||||||
|
[a] => a,
|
||||||
|
_ => {
|
||||||
|
return Err(KustoIdentityTokenError::ExpectedOneTable(
|
||||||
|
results.tables.len(),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Check that a column in this table actually exists called `AuthorizationContext`
|
||||||
|
let index = get_column_index(table, AUTHORIZATION_CONTEXT).ok_or(
|
||||||
|
KustoIdentityTokenError::ColumnNotFound(AUTHORIZATION_CONTEXT.into()),
|
||||||
|
)?;
|
||||||
|
|
||||||
|
// Check that there is only 1 row in the table, and that the value in the first row at the given index is not empty
|
||||||
|
let token = match &table.rows[..] {
|
||||||
|
[row] => row
|
||||||
|
.get(index)
|
||||||
|
.ok_or(KustoIdentityTokenError::ColumnNotFound(
|
||||||
|
AUTHORIZATION_CONTEXT.into(),
|
||||||
|
))?,
|
||||||
|
_ => return Err(KustoIdentityTokenError::ExpectedOneRow(table.rows.len())),
|
||||||
|
};
|
||||||
|
|
||||||
|
// Convert the JSON string into a Rust string
|
||||||
|
let token = token
|
||||||
|
.as_str()
|
||||||
|
.ok_or(KustoIdentityTokenError::InvalidJSONResponse(
|
||||||
|
token.to_owned(),
|
||||||
|
))?;
|
||||||
|
|
||||||
|
if token.chars().all(char::is_whitespace) {
|
||||||
|
return Err(KustoIdentityTokenError::EmptyToken);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(token.to_string())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Fetches the latest Kusto identity token, either retrieving from cache if valid, or by executing a KQL query
|
||||||
|
pub(crate) async fn get(&self) -> Result<KustoIdentityToken> {
|
||||||
|
self.token_cache
|
||||||
|
.get(self.query_kusto_identity_token())
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,183 @@
|
||||||
|
use std::{
|
||||||
|
error::Error,
|
||||||
|
future::Future,
|
||||||
|
sync::Arc,
|
||||||
|
time::{Duration, Instant},
|
||||||
|
};
|
||||||
|
|
||||||
|
use async_lock::RwLock;
|
||||||
|
|
||||||
|
/// Wrapper around a value that allows for storing when the value was last updated,
|
||||||
|
/// as well as the period after which it should be refreshed (i.e. expired)
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct Cached<T> {
|
||||||
|
inner: T,
|
||||||
|
last_updated: Instant,
|
||||||
|
refresh_period: Duration,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> Cached<T> {
|
||||||
|
pub fn new(inner: T, refresh_period: Duration) -> Self {
|
||||||
|
Self {
|
||||||
|
inner,
|
||||||
|
last_updated: Instant::now(),
|
||||||
|
refresh_period,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get(&self) -> &T {
|
||||||
|
&self.inner
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_expired(&self) -> bool {
|
||||||
|
self.last_updated.elapsed() >= self.refresh_period
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn update(&mut self, inner: T) {
|
||||||
|
self.inner = inner;
|
||||||
|
self.last_updated = Instant::now();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct ThreadSafeCachedValue<T>
|
||||||
|
where
|
||||||
|
T: Clone,
|
||||||
|
{
|
||||||
|
cache: Arc<RwLock<Cached<Option<T>>>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: Clone> ThreadSafeCachedValue<T> {
|
||||||
|
pub fn new(refresh_period: Duration) -> Self {
|
||||||
|
Self {
|
||||||
|
cache: Arc::new(RwLock::new(Cached::new(None, refresh_period))),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Fetches the latest value, either retrieving from cache if valid, or by executing the callback
|
||||||
|
pub async fn get<F, E: Error>(&self, callback: F) -> Result<T, E>
|
||||||
|
where
|
||||||
|
F: Future<Output = Result<T, E>>,
|
||||||
|
{
|
||||||
|
// First, try to get a value from the cache by obtaining a read lock
|
||||||
|
{
|
||||||
|
let cache = self.cache.read().await;
|
||||||
|
if !cache.is_expired() {
|
||||||
|
if let Some(cached_value) = cache.get() {
|
||||||
|
return Ok(cached_value.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Obtain a write lock to refresh the cached value
|
||||||
|
let mut cache = self.cache.write().await;
|
||||||
|
|
||||||
|
// Again attempt to return from cache, check is done in case another thread
|
||||||
|
// refreshed the cached value while we were waiting on the write lock and its now valid
|
||||||
|
if !cache.is_expired() {
|
||||||
|
if let Some(cached_value) = cache.get() {
|
||||||
|
return Ok(cached_value.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fetch new value by executing the callback, update the cache, and return the value
|
||||||
|
let fetched_value = callback.await?;
|
||||||
|
cache.update(Some(fetched_value.clone()));
|
||||||
|
|
||||||
|
Ok(fetched_value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod cached_tests {
|
||||||
|
use super::*;
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_cached_get() {
|
||||||
|
let value = "hello";
|
||||||
|
let cached_string = Cached::new(value.to_string(), Duration::from_secs(60));
|
||||||
|
|
||||||
|
assert_eq!(cached_string.get(), value);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_cached_is_expired() {
|
||||||
|
let value = "hello";
|
||||||
|
let mut cached_string = Cached::new(value.to_string(), Duration::from_secs(60));
|
||||||
|
|
||||||
|
assert!(!cached_string.is_expired());
|
||||||
|
|
||||||
|
cached_string.last_updated = Instant::now() - Duration::from_secs(61);
|
||||||
|
|
||||||
|
assert!(cached_string.is_expired());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_cached_update() {
|
||||||
|
let value = "hello";
|
||||||
|
let mut cached_string = Cached::new(value.to_string(), Duration::from_secs(60));
|
||||||
|
|
||||||
|
assert_eq!(cached_string.get(), value);
|
||||||
|
|
||||||
|
let new_value = "world";
|
||||||
|
cached_string.update(new_value.to_string());
|
||||||
|
|
||||||
|
assert!(!cached_string.is_expired());
|
||||||
|
assert_eq!(cached_string.get(), new_value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod thread_safe_cached_value_tests {
|
||||||
|
use super::*;
|
||||||
|
use std::{fmt::Error, sync::Mutex};
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct MockToken {
|
||||||
|
get_token_call_count: Mutex<usize>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MockToken {
|
||||||
|
fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
get_token_call_count: Mutex::new(0),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn get_new_token(&self) -> Result<usize, Error> {
|
||||||
|
// Include an incrementing counter in the token to track how many times the token has been refreshed
|
||||||
|
let mut call_count = self.get_token_call_count.lock().unwrap();
|
||||||
|
*call_count += 1;
|
||||||
|
Ok(call_count.clone())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn returns_same_value_if_unexpired() -> Result<(), Error> {
|
||||||
|
let cache = ThreadSafeCachedValue::new(Duration::from_secs(300));
|
||||||
|
let mock_token = MockToken::new();
|
||||||
|
|
||||||
|
let token1 = cache.get(mock_token.get_new_token()).await?;
|
||||||
|
let token2 = cache.get(mock_token.get_new_token()).await?;
|
||||||
|
|
||||||
|
assert_eq!(token1, 1);
|
||||||
|
assert_eq!(token2, 1);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn returns_new_value_if_expired() -> Result<(), Error> {
|
||||||
|
let cache = ThreadSafeCachedValue::new(Duration::from_millis(1));
|
||||||
|
let mock_token = MockToken::new();
|
||||||
|
|
||||||
|
let token1 = cache.get(mock_token.get_new_token()).await?;
|
||||||
|
// Sleep to ensure the token expires
|
||||||
|
tokio::time::sleep(Duration::from_secs(1)).await;
|
||||||
|
let token2 = cache.get(mock_token.get_new_token()).await?;
|
||||||
|
|
||||||
|
assert_eq!(token1, 1);
|
||||||
|
assert_eq!(token2, 2);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,149 @@
|
||||||
|
use crate::client_options::QueuedIngestClientOptions;
|
||||||
|
|
||||||
|
use super::{
|
||||||
|
cache::ThreadSafeCachedValue,
|
||||||
|
resource_uri::{ClientFromResourceUri, ResourceUri},
|
||||||
|
utils, RESOURCE_REFRESH_PERIOD,
|
||||||
|
};
|
||||||
|
|
||||||
|
use azure_core::ClientOptions;
|
||||||
|
use azure_kusto_data::{models::TableV1, prelude::KustoClient};
|
||||||
|
use azure_storage_blobs::prelude::ContainerClient;
|
||||||
|
use azure_storage_queues::QueueClient;
|
||||||
|
use serde_json::Value;
|
||||||
|
|
||||||
|
#[derive(Debug, thiserror::Error)]
|
||||||
|
pub enum IngestionResourceError {
|
||||||
|
#[error("{column_name} column is missing in the table")]
|
||||||
|
ColumnNotFoundError { column_name: String },
|
||||||
|
|
||||||
|
#[error("Response returned from Kusto could not be parsed as a string: {0}")]
|
||||||
|
ParseAsStringError(Value),
|
||||||
|
|
||||||
|
#[error("No {0} resources found in the table")]
|
||||||
|
NoResourcesFound(String),
|
||||||
|
|
||||||
|
#[error(transparent)]
|
||||||
|
KustoError(#[from] azure_kusto_data::error::Error),
|
||||||
|
|
||||||
|
#[error(transparent)]
|
||||||
|
ResourceUriError(#[from] super::resource_uri::ResourceUriError),
|
||||||
|
|
||||||
|
#[error("Kusto expected a table containing ingestion resource results, found no tables")]
|
||||||
|
NoTablesFound,
|
||||||
|
}
|
||||||
|
|
||||||
|
type Result<T> = std::result::Result<T, IngestionResourceError>;
|
||||||
|
|
||||||
|
fn get_column_index(table: &TableV1, column_name: &str) -> Result<usize> {
|
||||||
|
utils::get_column_index(table, column_name).ok_or(IngestionResourceError::ColumnNotFoundError {
|
||||||
|
column_name: column_name.to_string(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Helper to get a resource URI from a table, erroring if there are no resources of the given name
|
||||||
|
fn get_resource_by_name(table: &TableV1, resource_name: String) -> Result<Vec<ResourceUri>> {
|
||||||
|
let storage_root_index = get_column_index(table, "StorageRoot")?;
|
||||||
|
let resource_type_name_index = get_column_index(table, "ResourceTypeName")?;
|
||||||
|
|
||||||
|
let resource_uris: Vec<Result<ResourceUri>> = table
|
||||||
|
.rows
|
||||||
|
.iter()
|
||||||
|
.filter(|r| r[resource_type_name_index] == resource_name)
|
||||||
|
.map(|r| {
|
||||||
|
let x = r[storage_root_index].as_str().ok_or(
|
||||||
|
IngestionResourceError::ParseAsStringError(r[storage_root_index].clone()),
|
||||||
|
)?;
|
||||||
|
ResourceUri::try_from(x).map_err(IngestionResourceError::ResourceUriError)
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
if resource_uris.is_empty() {
|
||||||
|
return Err(IngestionResourceError::NoResourcesFound(resource_name));
|
||||||
|
}
|
||||||
|
|
||||||
|
resource_uris.into_iter().collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Helper to turn a vector of resource URIs into a vector of Azure clients of type T with the provided [ClientOptions]
|
||||||
|
fn create_clients_vec<T>(resource_uris: &[ResourceUri], client_options: &ClientOptions) -> Vec<T>
|
||||||
|
where
|
||||||
|
T: ClientFromResourceUri,
|
||||||
|
{
|
||||||
|
resource_uris
|
||||||
|
.iter()
|
||||||
|
.map(|uri| T::create_client(uri.clone(), client_options.clone()))
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Storage of the clients required for ingestion
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct InnerIngestClientResources {
|
||||||
|
pub ingestion_queues: Vec<QueueClient>,
|
||||||
|
pub temp_storage_containers: Vec<ContainerClient>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TryFrom<(&TableV1, &QueuedIngestClientOptions)> for InnerIngestClientResources {
|
||||||
|
type Error = IngestionResourceError;
|
||||||
|
|
||||||
|
/// Attempts to create a new InnerIngestClientResources from the given [TableV1] and [QueuedIngestClientOptions]
|
||||||
|
fn try_from(
|
||||||
|
(table, client_options): (&TableV1, &QueuedIngestClientOptions),
|
||||||
|
) -> std::result::Result<Self, Self::Error> {
|
||||||
|
let secured_ready_for_aggregation_queues =
|
||||||
|
get_resource_by_name(table, "SecuredReadyForAggregationQueue".to_string())?;
|
||||||
|
let temp_storage = get_resource_by_name(table, "TempStorage".to_string())?;
|
||||||
|
|
||||||
|
Ok(Self {
|
||||||
|
ingestion_queues: create_clients_vec(
|
||||||
|
&secured_ready_for_aggregation_queues,
|
||||||
|
&client_options.queue_service_options,
|
||||||
|
),
|
||||||
|
temp_storage_containers: create_clients_vec(
|
||||||
|
&temp_storage,
|
||||||
|
&client_options.blob_service_options,
|
||||||
|
),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct IngestClientResources {
|
||||||
|
/// A client against a Kusto ingestion cluster
|
||||||
|
client: KustoClient,
|
||||||
|
/// Cache of the ingest client resources
|
||||||
|
resources_cache: ThreadSafeCachedValue<InnerIngestClientResources>,
|
||||||
|
/// Options to customise the storage clients
|
||||||
|
client_options: QueuedIngestClientOptions,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl IngestClientResources {
|
||||||
|
pub fn new(client: KustoClient, client_options: QueuedIngestClientOptions) -> Self {
|
||||||
|
Self {
|
||||||
|
client,
|
||||||
|
resources_cache: ThreadSafeCachedValue::new(RESOURCE_REFRESH_PERIOD),
|
||||||
|
client_options,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Executes a KQL management query that retrieves resource URIs for the various Azure resources used for ingestion
|
||||||
|
async fn query_ingestion_resources(&self) -> Result<InnerIngestClientResources> {
|
||||||
|
let results = self
|
||||||
|
.client
|
||||||
|
.execute_command("NetDefaultDB", ".get ingestion resources", None)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
let new_resources = results
|
||||||
|
.tables
|
||||||
|
.first()
|
||||||
|
.ok_or(IngestionResourceError::NoTablesFound)?;
|
||||||
|
|
||||||
|
InnerIngestClientResources::try_from((new_resources, &self.client_options))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Gets the latest resources either from cache, or fetching from Kusto and updating the cached resources
|
||||||
|
pub async fn get(&self) -> Result<InnerIngestClientResources> {
|
||||||
|
self.resources_cache
|
||||||
|
.get(self.query_ingestion_resources())
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,266 @@
|
||||||
|
use azure_core::ClientOptions;
|
||||||
|
use azure_storage::StorageCredentials;
|
||||||
|
use azure_storage_blobs::prelude::{ClientBuilder, ContainerClient};
|
||||||
|
use azure_storage_queues::{QueueClient, QueueServiceClientBuilder};
|
||||||
|
use url::Url;
|
||||||
|
|
||||||
|
#[derive(Debug, thiserror::Error)]
|
||||||
|
pub enum ResourceUriError {
|
||||||
|
#[error("URI scheme must be 'https', was '{0}'")]
|
||||||
|
InvalidScheme(String),
|
||||||
|
|
||||||
|
#[error("URI host must be a domain")]
|
||||||
|
InvalidHost,
|
||||||
|
|
||||||
|
#[error("Object name is missing in the URI")]
|
||||||
|
MissingObjectName,
|
||||||
|
|
||||||
|
#[error("SAS token is missing in the URI as a query parameter")]
|
||||||
|
MissingSasToken,
|
||||||
|
|
||||||
|
#[error("Account name is missing in the URI")]
|
||||||
|
MissingAccountName,
|
||||||
|
|
||||||
|
#[error(transparent)]
|
||||||
|
ParseError(#[from] url::ParseError),
|
||||||
|
|
||||||
|
#[error(transparent)]
|
||||||
|
AzureError(#[from] azure_core::Error),
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parsing logic of resource URIs as returned by the Kusto management endpoint
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub(crate) struct ResourceUri {
|
||||||
|
pub(crate) service_uri: String,
|
||||||
|
pub(crate) object_name: String,
|
||||||
|
pub(crate) account_name: String,
|
||||||
|
pub(crate) sas_token: StorageCredentials,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TryFrom<&str> for ResourceUri {
|
||||||
|
type Error = ResourceUriError;
|
||||||
|
|
||||||
|
fn try_from(uri: &str) -> Result<Self, Self::Error> {
|
||||||
|
let parsed_uri = Url::parse(uri)?;
|
||||||
|
|
||||||
|
match parsed_uri.scheme() {
|
||||||
|
"https" => {}
|
||||||
|
other_scheme => return Err(ResourceUriError::InvalidScheme(other_scheme.to_string())),
|
||||||
|
};
|
||||||
|
|
||||||
|
let host_string = match parsed_uri.host() {
|
||||||
|
Some(url::Host::Domain(host_string)) => host_string,
|
||||||
|
_ => return Err(ResourceUriError::InvalidHost),
|
||||||
|
};
|
||||||
|
|
||||||
|
let service_uri = String::from("https://") + host_string;
|
||||||
|
|
||||||
|
// WIBNI: better parsing that this conforms to a storage resource URI,
|
||||||
|
// perhaps then ResourceUri could take a type like ResourceUri<Queue> or ResourceUri<Container>
|
||||||
|
let (account_name, _service_endpoint) = host_string
|
||||||
|
.split_once('.')
|
||||||
|
.ok_or(ResourceUriError::MissingAccountName)?;
|
||||||
|
|
||||||
|
let object_name = match parsed_uri.path_segments() {
|
||||||
|
Some(mut path_segments) => {
|
||||||
|
let object_name = match path_segments.next() {
|
||||||
|
Some(object_name) if !object_name.is_empty() => object_name,
|
||||||
|
_ => return Err(ResourceUriError::MissingObjectName),
|
||||||
|
};
|
||||||
|
// Ensure there is only one path segment (i.e. the object name)
|
||||||
|
if path_segments.next().is_some() {
|
||||||
|
return Err(ResourceUriError::MissingObjectName);
|
||||||
|
};
|
||||||
|
object_name
|
||||||
|
}
|
||||||
|
None => return Err(ResourceUriError::MissingObjectName),
|
||||||
|
};
|
||||||
|
|
||||||
|
let sas_token = parsed_uri
|
||||||
|
.query()
|
||||||
|
.ok_or(ResourceUriError::MissingSasToken)?;
|
||||||
|
|
||||||
|
let sas_token = StorageCredentials::sas_token(sas_token)?;
|
||||||
|
|
||||||
|
Ok(Self {
|
||||||
|
service_uri,
|
||||||
|
object_name: object_name.to_string(),
|
||||||
|
account_name: account_name.to_string(),
|
||||||
|
sas_token,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Trait to be used to create an Azure client from a resource URI with configurability of ClientOptions
|
||||||
|
pub(crate) trait ClientFromResourceUri {
|
||||||
|
fn create_client(resource_uri: ResourceUri, client_options: ClientOptions) -> Self;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ClientFromResourceUri for QueueClient {
|
||||||
|
fn create_client(resource_uri: ResourceUri, client_options: ClientOptions) -> Self {
|
||||||
|
QueueServiceClientBuilder::with_location(
|
||||||
|
azure_storage::CloudLocation::Custom {
|
||||||
|
uri: resource_uri.service_uri,
|
||||||
|
account: resource_uri.account_name,
|
||||||
|
},
|
||||||
|
resource_uri.sas_token,
|
||||||
|
)
|
||||||
|
.client_options(client_options)
|
||||||
|
.build()
|
||||||
|
.queue_client(resource_uri.object_name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ClientFromResourceUri for ContainerClient {
|
||||||
|
fn create_client(resource_uri: ResourceUri, client_options: ClientOptions) -> Self {
|
||||||
|
ClientBuilder::with_location(
|
||||||
|
azure_storage::CloudLocation::Custom {
|
||||||
|
uri: resource_uri.service_uri,
|
||||||
|
account: resource_uri.account_name,
|
||||||
|
},
|
||||||
|
resource_uri.sas_token,
|
||||||
|
)
|
||||||
|
.client_options(client_options)
|
||||||
|
.container_client(resource_uri.object_name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use azure_storage::StorageCredentialsInner;
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
use std::convert::TryFrom;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn resource_uri_try_from() {
|
||||||
|
let uri = "https://storageaccountname.blob.core.windows.com/containerobjectname?sas=token";
|
||||||
|
let resource_uri = ResourceUri::try_from(uri).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
resource_uri.service_uri,
|
||||||
|
"https://storageaccountname.blob.core.windows.com"
|
||||||
|
);
|
||||||
|
assert_eq!(resource_uri.object_name, "containerobjectname");
|
||||||
|
|
||||||
|
let storage_credential_inner = std::sync::Arc::into_inner(resource_uri.sas_token.0)
|
||||||
|
.unwrap()
|
||||||
|
.into_inner();
|
||||||
|
assert!(matches!(
|
||||||
|
storage_credential_inner,
|
||||||
|
StorageCredentialsInner::SASToken(_)
|
||||||
|
));
|
||||||
|
|
||||||
|
if let StorageCredentialsInner::SASToken(sas_vec) = storage_credential_inner {
|
||||||
|
assert_eq!(sas_vec.len(), 1);
|
||||||
|
assert_eq!(sas_vec[0].0, "sas");
|
||||||
|
assert_eq!(sas_vec[0].1, "token");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn invalid_scheme() {
|
||||||
|
let uri = "http://storageaccountname.blob.core.windows.com/containerobjectname?sas=token";
|
||||||
|
let resource_uri = ResourceUri::try_from(uri);
|
||||||
|
|
||||||
|
assert!(resource_uri.is_err());
|
||||||
|
assert!(matches!(
|
||||||
|
resource_uri.unwrap_err(),
|
||||||
|
ResourceUriError::InvalidScheme(_)
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn missing_host_str() {
|
||||||
|
let uri = "https:";
|
||||||
|
let resource_uri = ResourceUri::try_from(uri);
|
||||||
|
println!("{:#?}", resource_uri);
|
||||||
|
|
||||||
|
assert!(resource_uri.is_err());
|
||||||
|
assert!(matches!(
|
||||||
|
resource_uri.unwrap_err(),
|
||||||
|
ResourceUriError::ParseError(_)
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn invalid_host_ipv4() {
|
||||||
|
let uri = "https://127.0.0.1/containerobjectname?sas=token";
|
||||||
|
let resource_uri = ResourceUri::try_from(uri);
|
||||||
|
|
||||||
|
assert!(resource_uri.is_err());
|
||||||
|
assert!(matches!(
|
||||||
|
resource_uri.unwrap_err(),
|
||||||
|
ResourceUriError::InvalidHost
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn invalid_host_ipv6() {
|
||||||
|
let uri = "https://[3FFE:FFFF:0::CD30]/containerobjectname?sas=token";
|
||||||
|
let resource_uri = ResourceUri::try_from(uri);
|
||||||
|
println!("{:#?}", resource_uri);
|
||||||
|
|
||||||
|
assert!(resource_uri.is_err());
|
||||||
|
assert!(matches!(
|
||||||
|
resource_uri.unwrap_err(),
|
||||||
|
ResourceUriError::InvalidHost
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn missing_object_name() {
|
||||||
|
let uri = "https://storageaccountname.blob.core.windows.com/?sas=token";
|
||||||
|
let resource_uri = ResourceUri::try_from(uri);
|
||||||
|
println!("{:#?}", resource_uri);
|
||||||
|
|
||||||
|
assert!(resource_uri.is_err());
|
||||||
|
assert!(matches!(
|
||||||
|
resource_uri.unwrap_err(),
|
||||||
|
ResourceUriError::MissingObjectName
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn missing_sas_token() {
|
||||||
|
let uri = "https://storageaccountname.blob.core.windows.com/containerobjectname";
|
||||||
|
let resource_uri = ResourceUri::try_from(uri);
|
||||||
|
println!("{:#?}", resource_uri);
|
||||||
|
|
||||||
|
assert!(resource_uri.is_err());
|
||||||
|
assert!(matches!(
|
||||||
|
resource_uri.unwrap_err(),
|
||||||
|
ResourceUriError::MissingSasToken
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn queue_client_from_resource_uri() {
|
||||||
|
let resource_uri = ResourceUri {
|
||||||
|
service_uri: "https://mystorageaccount.queue.core.windows.net".to_string(),
|
||||||
|
object_name: "queuename".to_string(),
|
||||||
|
account_name: "mystorageaccount".to_string(),
|
||||||
|
sas_token: StorageCredentials::sas_token("sas=token").unwrap(),
|
||||||
|
};
|
||||||
|
|
||||||
|
let client_options = ClientOptions::default();
|
||||||
|
let queue_client = QueueClient::create_client(resource_uri, client_options);
|
||||||
|
|
||||||
|
assert_eq!(queue_client.queue_name(), "queuename");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn container_client_from_resource_uri() {
|
||||||
|
let resource_uri = ResourceUri {
|
||||||
|
service_uri: "https://mystorageaccount.blob.core.windows.net".to_string(),
|
||||||
|
object_name: "containername".to_string(),
|
||||||
|
account_name: "mystorageaccount".to_string(),
|
||||||
|
sas_token: StorageCredentials::sas_token("sas=token").unwrap(),
|
||||||
|
};
|
||||||
|
|
||||||
|
let client_options = ClientOptions::default();
|
||||||
|
let container_client = ContainerClient::create_client(resource_uri, client_options);
|
||||||
|
|
||||||
|
assert_eq!(container_client.container_name(), "containername");
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,10 @@
|
||||||
|
use azure_kusto_data::models::TableV1;
|
||||||
|
|
||||||
|
/// Helper to get a column index from a table
|
||||||
|
// TODO: this could be moved upstream into Kusto Data
|
||||||
|
pub fn get_column_index(table: &TableV1, column_name: &str) -> Option<usize> {
|
||||||
|
table
|
||||||
|
.columns
|
||||||
|
.iter()
|
||||||
|
.position(|c| c.column_name == column_name)
|
||||||
|
}
|
Загрузка…
Ссылка в новой задаче