Streaming from blob (#517)
* streaming from blob * managed streaming Q: why dont we use aio library for storage? * format * fix test * prepering client_base from_stream methods * maybe better * async impl test fallback to queue * format * fix usage of ExecuteRequestParams in tests * fix usage of ExecuteRequestParams in tests * b * fix aio * add span headers for devbug * add span headers for devbug * add span headers for devbug * t * f * revert * print * try copy * remove print --------- Co-authored-by: Ohad Bitton <ohbitton@microsoft.com>
This commit is contained in:
Родитель
40197266f2
Коммит
c6150c0056
|
@ -5,6 +5,13 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||||
|
|
||||||
## Unreleased
|
## Unreleased
|
||||||
|
|
||||||
|
### Added
|
||||||
|
- Streaming ingestion from blob
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- Managed streaming fallback to queued
|
||||||
|
-
|
||||||
### Changed
|
### Changed
|
||||||
- Changed binary files data format compression to false
|
- Changed binary files data format compression to false
|
||||||
|
|
||||||
|
|
|
@ -51,42 +51,85 @@ class KustoClient(_KustoClientBase):
|
||||||
return await self.execute_mgmt(database, query, properties)
|
return await self.execute_mgmt(database, query, properties)
|
||||||
return await self.execute_query(database, query, properties)
|
return await self.execute_query(database, query, properties)
|
||||||
|
|
||||||
@distributed_trace_async(name_of_span="KustoClient.query_cmd", kind=SpanKind.CLIENT)
|
@distributed_trace_async(name_of_span="AioKustoClient.query_cmd", kind=SpanKind.CLIENT)
|
||||||
@aio_documented_by(KustoClientSync.execute_query)
|
@aio_documented_by(KustoClientSync.execute_query)
|
||||||
async def execute_query(self, database: str, query: str, properties: ClientRequestProperties = None) -> KustoResponseDataSet:
|
async def execute_query(self, database: str, query: str, properties: ClientRequestProperties = None) -> KustoResponseDataSet:
|
||||||
database = self._get_database_or_default(database)
|
database = self._get_database_or_default(database)
|
||||||
Span.set_query_attributes(self._kusto_cluster, database, properties)
|
Span.set_query_attributes(self._kusto_cluster, database, properties)
|
||||||
|
request = ExecuteRequestParams._from_query(
|
||||||
|
query,
|
||||||
|
database,
|
||||||
|
properties,
|
||||||
|
self._request_headers,
|
||||||
|
self._query_default_timeout,
|
||||||
|
self._mgmt_default_timeout,
|
||||||
|
self._client_server_delta,
|
||||||
|
self.client_details,
|
||||||
|
)
|
||||||
|
return await self._execute(self._query_endpoint, request, properties)
|
||||||
|
|
||||||
return await self._execute(self._query_endpoint, database, query, None, KustoClient._query_default_timeout, properties)
|
@distributed_trace_async(name_of_span="AioKustoClient.control_cmd", kind=SpanKind.CLIENT)
|
||||||
|
|
||||||
@distributed_trace_async(name_of_span="KustoClient.control_cmd", kind=SpanKind.CLIENT)
|
|
||||||
@aio_documented_by(KustoClientSync.execute_mgmt)
|
@aio_documented_by(KustoClientSync.execute_mgmt)
|
||||||
async def execute_mgmt(self, database: str, query: str, properties: ClientRequestProperties = None) -> KustoResponseDataSet:
|
async def execute_mgmt(self, database: str, query: str, properties: ClientRequestProperties = None) -> KustoResponseDataSet:
|
||||||
database = self._get_database_or_default(database)
|
database = self._get_database_or_default(database)
|
||||||
Span.set_query_attributes(self._kusto_cluster, database, properties)
|
Span.set_query_attributes(self._kusto_cluster, database, properties)
|
||||||
|
request = ExecuteRequestParams._from_query(
|
||||||
|
query,
|
||||||
|
database,
|
||||||
|
properties,
|
||||||
|
self._request_headers,
|
||||||
|
self._mgmt_default_timeout,
|
||||||
|
self._mgmt_default_timeout,
|
||||||
|
self._client_server_delta,
|
||||||
|
self.client_details,
|
||||||
|
)
|
||||||
|
return await self._execute(self._mgmt_endpoint, request, properties)
|
||||||
|
|
||||||
return await self._execute(self._mgmt_endpoint, database, query, None, KustoClient._mgmt_default_timeout, properties)
|
@distributed_trace_async(name_of_span="AioKustoClient.streaming_ingest", kind=SpanKind.CLIENT)
|
||||||
|
|
||||||
@distributed_trace_async(name_of_span="KustoClient.streaming_ingest", kind=SpanKind.CLIENT)
|
|
||||||
@aio_documented_by(KustoClientSync.execute_streaming_ingest)
|
@aio_documented_by(KustoClientSync.execute_streaming_ingest)
|
||||||
async def execute_streaming_ingest(
|
async def execute_streaming_ingest(
|
||||||
self,
|
self,
|
||||||
database: Optional[str],
|
database: Optional[str],
|
||||||
table: str,
|
table: str,
|
||||||
stream: io.IOBase,
|
stream: Optional[io.IOBase],
|
||||||
|
blob_url: Optional[str],
|
||||||
stream_format: Union[DataFormat, str],
|
stream_format: Union[DataFormat, str],
|
||||||
properties: ClientRequestProperties = None,
|
properties: ClientRequestProperties = None,
|
||||||
mapping_name: str = None,
|
mapping_name: str = None,
|
||||||
):
|
):
|
||||||
database = self._get_database_or_default(database)
|
database = self._get_database_or_default(database)
|
||||||
Span.set_streaming_ingest_attributes(self._kusto_cluster, database, table, properties)
|
|
||||||
|
|
||||||
stream_format = stream_format.kusto_value if isinstance(stream_format, DataFormat) else DataFormat[stream_format.upper()].kusto_value
|
stream_format = stream_format.kusto_value if isinstance(stream_format, DataFormat) else DataFormat[stream_format.upper()].kusto_value
|
||||||
endpoint = self._streaming_ingest_endpoint + database + "/" + table + "?streamFormat=" + stream_format
|
endpoint = self._streaming_ingest_endpoint + database + "/" + table + "?streamFormat=" + stream_format
|
||||||
if mapping_name is not None:
|
if mapping_name is not None:
|
||||||
endpoint = endpoint + "&mappingName=" + mapping_name
|
endpoint = endpoint + "&mappingName=" + mapping_name
|
||||||
|
|
||||||
await self._execute(endpoint, database, None, stream, self._streaming_ingest_default_timeout, properties)
|
if blob_url:
|
||||||
|
endpoint += "&sourceKind=uri"
|
||||||
|
request = ExecuteRequestParams._from_blob_url(
|
||||||
|
blob_url,
|
||||||
|
properties,
|
||||||
|
self._request_headers,
|
||||||
|
self._streaming_ingest_default_timeout,
|
||||||
|
self._mgmt_default_timeout,
|
||||||
|
self._client_server_delta,
|
||||||
|
self.client_details,
|
||||||
|
)
|
||||||
|
elif stream:
|
||||||
|
request = ExecuteRequestParams._from_stream(
|
||||||
|
stream,
|
||||||
|
properties,
|
||||||
|
self._request_headers,
|
||||||
|
self._streaming_ingest_default_timeout,
|
||||||
|
self._mgmt_default_timeout,
|
||||||
|
self._client_server_delta,
|
||||||
|
self.client_details,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise Exception("execute_streaming_ingest is expecting either a stream or blob url")
|
||||||
|
|
||||||
|
Span.set_streaming_ingest_attributes(self._kusto_cluster, database, table, properties)
|
||||||
|
await self._execute(endpoint, request, properties)
|
||||||
|
|
||||||
@aio_documented_by(KustoClientSync._execute_streaming_query_parsed)
|
@aio_documented_by(KustoClientSync._execute_streaming_query_parsed)
|
||||||
async def _execute_streaming_query_parsed(
|
async def _execute_streaming_query_parsed(
|
||||||
|
@ -96,10 +139,13 @@ class KustoClient(_KustoClientBase):
|
||||||
timeout: timedelta = _KustoClientBase._query_default_timeout,
|
timeout: timedelta = _KustoClientBase._query_default_timeout,
|
||||||
properties: Optional[ClientRequestProperties] = None,
|
properties: Optional[ClientRequestProperties] = None,
|
||||||
) -> StreamingDataSetEnumerator:
|
) -> StreamingDataSetEnumerator:
|
||||||
response = await self._execute(self._query_endpoint, database, query, None, timeout, properties, stream_response=True)
|
request = ExecuteRequestParams._from_query(
|
||||||
|
query, database, properties, self._request_headers, timeout, self._mgmt_default_timeout, self._client_server_delta, self.client_details
|
||||||
|
)
|
||||||
|
response = await self._execute(self._query_endpoint, request, properties, stream_response=True)
|
||||||
return StreamingDataSetEnumerator(JsonTokenReader(response.content))
|
return StreamingDataSetEnumerator(JsonTokenReader(response.content))
|
||||||
|
|
||||||
@distributed_trace_async(name_of_span="KustoClient.streaming_query", kind=SpanKind.CLIENT)
|
@distributed_trace_async(name_of_span="AioKustoClient.streaming_query", kind=SpanKind.CLIENT)
|
||||||
@aio_documented_by(KustoClientSync.execute_streaming_query)
|
@aio_documented_by(KustoClientSync.execute_streaming_query)
|
||||||
async def execute_streaming_query(
|
async def execute_streaming_query(
|
||||||
self,
|
self,
|
||||||
|
@ -118,41 +164,33 @@ class KustoClient(_KustoClientBase):
|
||||||
async def _execute(
|
async def _execute(
|
||||||
self,
|
self,
|
||||||
endpoint: str,
|
endpoint: str,
|
||||||
database: Optional[str],
|
request: ExecuteRequestParams,
|
||||||
query: Optional[str],
|
properties: Optional[ClientRequestProperties] = None,
|
||||||
payload: Optional[io.IOBase],
|
|
||||||
timeout: timedelta,
|
|
||||||
properties: ClientRequestProperties = None,
|
|
||||||
stream_response: bool = False,
|
stream_response: bool = False,
|
||||||
) -> Union[KustoResponseDataSet, ClientResponse]:
|
) -> Union[KustoResponseDataSet, ClientResponse]:
|
||||||
"""Executes given query against this client"""
|
"""Executes given query against this client"""
|
||||||
if self._is_closed:
|
if self._is_closed:
|
||||||
raise KustoClosedError()
|
raise KustoClosedError()
|
||||||
self.validate_endpoint()
|
self.validate_endpoint()
|
||||||
request_params = ExecuteRequestParams(
|
|
||||||
database,
|
request_headers = request.request_headers
|
||||||
payload,
|
timeout = request.timeout
|
||||||
properties,
|
|
||||||
query,
|
|
||||||
timeout,
|
|
||||||
self._request_headers,
|
|
||||||
self._mgmt_default_timeout,
|
|
||||||
self._client_server_delta,
|
|
||||||
self.client_details,
|
|
||||||
)
|
|
||||||
json_payload = request_params.json_payload
|
|
||||||
request_headers = request_params.request_headers
|
|
||||||
timeout = request_params.timeout
|
|
||||||
if self._aad_helper:
|
if self._aad_helper:
|
||||||
request_headers["Authorization"] = await self._aad_helper.acquire_authorization_header_async()
|
request_headers["Authorization"] = await self._aad_helper.acquire_authorization_header_async()
|
||||||
|
|
||||||
invoker = lambda: self._session.post(
|
invoker = lambda: self._session.post(
|
||||||
endpoint, headers=request_headers, json=json_payload, data=payload, timeout=timeout.seconds, proxy=self._proxy_url, allow_redirects=False
|
endpoint,
|
||||||
|
headers=request_headers,
|
||||||
|
json=request.json_payload,
|
||||||
|
data=request.payload,
|
||||||
|
timeout=timeout.seconds,
|
||||||
|
proxy=self._proxy_url,
|
||||||
|
allow_redirects=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = await MonitoredActivity.invoke_async(
|
response = await MonitoredActivity.invoke_async(
|
||||||
invoker, name_of_span="KustoClient.http_post", tracing_attributes=Span.create_http_attributes("POST", endpoint, request_headers)
|
invoker, name_of_span="AioKustoClient.http_post", tracing_attributes=Span.create_http_attributes("POST", endpoint, request_headers)
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise KustoNetworkError(endpoint, None if properties is None else properties.client_request_id) from e
|
raise KustoNetworkError(endpoint, None if properties is None else properties.client_request_id) from e
|
||||||
|
@ -172,7 +210,7 @@ class KustoClient(_KustoClientBase):
|
||||||
response_json = await response.json()
|
response_json = await response.json()
|
||||||
except Exception:
|
except Exception:
|
||||||
response_json = None
|
response_json = None
|
||||||
raise self._handle_http_error(e, endpoint, payload, response, response.status, response_json, response_text)
|
raise self._handle_http_error(e, endpoint, request.payload, response, response.status, response_json, response_text)
|
||||||
|
|
||||||
async with response:
|
async with response:
|
||||||
response_json = None
|
response_json = None
|
||||||
|
@ -186,5 +224,5 @@ class KustoClient(_KustoClientBase):
|
||||||
response_text = await response.text()
|
response_text = await response.text()
|
||||||
except Exception:
|
except Exception:
|
||||||
response_text = None
|
response_text = None
|
||||||
raise self._handle_http_error(e, endpoint, payload, response, response.status, response_json, response_text)
|
raise self._handle_http_error(e, endpoint, request.payload, response, response.status, response_json, response_text)
|
||||||
return MonitoredActivity.invoke(lambda: self._kusto_parse_by_endpoint(endpoint, response_json), name_of_span="KustoClient.processing_response")
|
return MonitoredActivity.invoke(lambda: self._kusto_parse_by_endpoint(endpoint, response_json), name_of_span="AioKustoClient.processing_response")
|
||||||
|
|
|
@ -175,8 +175,17 @@ class KustoClient(_KustoClientBase):
|
||||||
"""
|
"""
|
||||||
database = self._get_database_or_default(database)
|
database = self._get_database_or_default(database)
|
||||||
Span.set_query_attributes(self._kusto_cluster, database, properties)
|
Span.set_query_attributes(self._kusto_cluster, database, properties)
|
||||||
|
request = ExecuteRequestParams._from_query(
|
||||||
return self._execute(self._query_endpoint, database, query, None, self._query_default_timeout, properties)
|
query,
|
||||||
|
database,
|
||||||
|
properties,
|
||||||
|
self._request_headers,
|
||||||
|
self._query_default_timeout,
|
||||||
|
self._mgmt_default_timeout,
|
||||||
|
self._client_server_delta,
|
||||||
|
self.client_details,
|
||||||
|
)
|
||||||
|
return self._execute(self._query_endpoint, request, properties)
|
||||||
|
|
||||||
@distributed_trace(name_of_span="KustoClient.control_cmd", kind=SpanKind.CLIENT)
|
@distributed_trace(name_of_span="KustoClient.control_cmd", kind=SpanKind.CLIENT)
|
||||||
def execute_mgmt(self, database: Optional[str], query: str, properties: Optional[ClientRequestProperties] = None) -> KustoResponseDataSet:
|
def execute_mgmt(self, database: Optional[str], query: str, properties: Optional[ClientRequestProperties] = None) -> KustoResponseDataSet:
|
||||||
|
@ -191,15 +200,25 @@ class KustoClient(_KustoClientBase):
|
||||||
"""
|
"""
|
||||||
database = self._get_database_or_default(database)
|
database = self._get_database_or_default(database)
|
||||||
Span.set_query_attributes(self._kusto_cluster, database, properties)
|
Span.set_query_attributes(self._kusto_cluster, database, properties)
|
||||||
|
request = ExecuteRequestParams._from_query(
|
||||||
return self._execute(self._mgmt_endpoint, database, query, None, self._mgmt_default_timeout, properties)
|
query,
|
||||||
|
database,
|
||||||
|
properties,
|
||||||
|
self._request_headers,
|
||||||
|
self._mgmt_default_timeout,
|
||||||
|
self._mgmt_default_timeout,
|
||||||
|
self._client_server_delta,
|
||||||
|
self.client_details,
|
||||||
|
)
|
||||||
|
return self._execute(self._mgmt_endpoint, request, properties)
|
||||||
|
|
||||||
@distributed_trace(name_of_span="KustoClient.streaming_ingest", kind=SpanKind.CLIENT)
|
@distributed_trace(name_of_span="KustoClient.streaming_ingest", kind=SpanKind.CLIENT)
|
||||||
def execute_streaming_ingest(
|
def execute_streaming_ingest(
|
||||||
self,
|
self,
|
||||||
database: Optional[str],
|
database: Optional[str],
|
||||||
table: str,
|
table: str,
|
||||||
stream: IO[AnyStr],
|
stream: Optional[IO[AnyStr]],
|
||||||
|
blob_url: Optional[str],
|
||||||
stream_format: Union[DataFormat, str],
|
stream_format: Union[DataFormat, str],
|
||||||
properties: Optional[ClientRequestProperties] = None,
|
properties: Optional[ClientRequestProperties] = None,
|
||||||
mapping_name: str = None,
|
mapping_name: str = None,
|
||||||
|
@ -211,20 +230,44 @@ class KustoClient(_KustoClientBase):
|
||||||
https://docs.microsoft.com/en-us/azure/data-explorer/ingest-data-streaming
|
https://docs.microsoft.com/en-us/azure/data-explorer/ingest-data-streaming
|
||||||
:param Optional[str] database: Target database. If not provided, will default to the "Initial Catalog" value in the connection string
|
:param Optional[str] database: Target database. If not provided, will default to the "Initial Catalog" value in the connection string
|
||||||
:param str table: Target table.
|
:param str table: Target table.
|
||||||
:param io.BaseIO stream: stream object which contains the data to ingest.
|
:param Optional[IO[AnyStr]] stream: a stream object or which contains the data to ingest.
|
||||||
|
:param Optional[str] blob_url: An url to a blob which contains the data to ingest. Provide either this or stream.
|
||||||
:param DataFormat stream_format: Format of the data in the stream.
|
:param DataFormat stream_format: Format of the data in the stream.
|
||||||
:param ClientRequestProperties properties: additional request properties.
|
:param ClientRequestProperties properties: additional request properties.
|
||||||
:param str mapping_name: Pre-defined mapping of the table. Required when stream_format is json/avro.
|
:param str mapping_name: Pre-defined mapping of the table. Required when stream_format is json/avro.
|
||||||
"""
|
"""
|
||||||
database = self._get_database_or_default(database)
|
database = self._get_database_or_default(database)
|
||||||
Span.set_streaming_ingest_attributes(self._kusto_cluster, database, table, properties)
|
|
||||||
|
|
||||||
stream_format = stream_format.kusto_value if isinstance(stream_format, DataFormat) else DataFormat[stream_format.upper()].kusto_value
|
stream_format = stream_format.kusto_value if isinstance(stream_format, DataFormat) else DataFormat[stream_format.upper()].kusto_value
|
||||||
endpoint = self._streaming_ingest_endpoint + database + "/" + table + "?streamFormat=" + stream_format
|
endpoint = self._streaming_ingest_endpoint + database + "/" + table + "?streamFormat=" + stream_format
|
||||||
if mapping_name is not None:
|
if mapping_name is not None:
|
||||||
endpoint = endpoint + "&mappingName=" + mapping_name
|
endpoint = endpoint + "&mappingName=" + mapping_name
|
||||||
|
if blob_url:
|
||||||
|
endpoint += "&sourceKind=uri"
|
||||||
|
request = ExecuteRequestParams._from_blob_url(
|
||||||
|
blob_url,
|
||||||
|
properties,
|
||||||
|
self._request_headers,
|
||||||
|
self._streaming_ingest_default_timeout,
|
||||||
|
self._mgmt_default_timeout,
|
||||||
|
self._client_server_delta,
|
||||||
|
self.client_details,
|
||||||
|
)
|
||||||
|
elif stream:
|
||||||
|
request = ExecuteRequestParams._from_stream(
|
||||||
|
stream,
|
||||||
|
properties,
|
||||||
|
self._request_headers,
|
||||||
|
self._streaming_ingest_default_timeout,
|
||||||
|
self._mgmt_default_timeout,
|
||||||
|
self._client_server_delta,
|
||||||
|
self.client_details,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise Exception("execute_streaming_ingest is expecting either a stream or blob url")
|
||||||
|
|
||||||
self._execute(endpoint, database, None, stream, self._streaming_ingest_default_timeout, properties)
|
Span.set_streaming_ingest_attributes(self._kusto_cluster, database, table, properties)
|
||||||
|
self._execute(endpoint, request, properties)
|
||||||
|
|
||||||
def _execute_streaming_query_parsed(
|
def _execute_streaming_query_parsed(
|
||||||
self,
|
self,
|
||||||
|
@ -233,7 +276,10 @@ class KustoClient(_KustoClientBase):
|
||||||
timeout: timedelta = _KustoClientBase._query_default_timeout,
|
timeout: timedelta = _KustoClientBase._query_default_timeout,
|
||||||
properties: Optional[ClientRequestProperties] = None,
|
properties: Optional[ClientRequestProperties] = None,
|
||||||
) -> StreamingDataSetEnumerator:
|
) -> StreamingDataSetEnumerator:
|
||||||
response = self._execute(self._query_endpoint, database, query, None, timeout, properties, stream_response=True)
|
request = ExecuteRequestParams._from_query(
|
||||||
|
query, database, properties, self._request_headers, timeout, self._mgmt_default_timeout, self._client_server_delta, self.client_details
|
||||||
|
)
|
||||||
|
response = self._execute(self._query_endpoint, request, properties, stream_response=True)
|
||||||
response.raw.decode_content = True
|
response.raw.decode_content = True
|
||||||
return StreamingDataSetEnumerator(JsonTokenReader(response.raw))
|
return StreamingDataSetEnumerator(JsonTokenReader(response.raw))
|
||||||
|
|
||||||
|
@ -262,10 +308,7 @@ class KustoClient(_KustoClientBase):
|
||||||
def _execute(
|
def _execute(
|
||||||
self,
|
self,
|
||||||
endpoint: str,
|
endpoint: str,
|
||||||
database: Optional[str],
|
request: ExecuteRequestParams,
|
||||||
query: Optional[str],
|
|
||||||
payload: Optional[IO[AnyStr]],
|
|
||||||
timeout: timedelta,
|
|
||||||
properties: Optional[ClientRequestProperties] = None,
|
properties: Optional[ClientRequestProperties] = None,
|
||||||
stream_response: bool = False,
|
stream_response: bool = False,
|
||||||
) -> Union[KustoResponseDataSet, Response]:
|
) -> Union[KustoResponseDataSet, Response]:
|
||||||
|
@ -273,20 +316,8 @@ class KustoClient(_KustoClientBase):
|
||||||
if self._is_closed:
|
if self._is_closed:
|
||||||
raise KustoClosedError()
|
raise KustoClosedError()
|
||||||
self.validate_endpoint()
|
self.validate_endpoint()
|
||||||
request_params = ExecuteRequestParams(
|
|
||||||
database,
|
request_headers = request.request_headers
|
||||||
payload,
|
|
||||||
properties,
|
|
||||||
query,
|
|
||||||
timeout,
|
|
||||||
self._request_headers,
|
|
||||||
self._mgmt_default_timeout,
|
|
||||||
self._client_server_delta,
|
|
||||||
self.client_details,
|
|
||||||
)
|
|
||||||
json_payload = request_params.json_payload
|
|
||||||
request_headers = request_params.request_headers
|
|
||||||
timeout = request_params.timeout
|
|
||||||
if self._aad_helper:
|
if self._aad_helper:
|
||||||
request_headers["Authorization"] = self._aad_helper.acquire_authorization_header()
|
request_headers["Authorization"] = self._aad_helper.acquire_authorization_header()
|
||||||
|
|
||||||
|
@ -294,9 +325,9 @@ class KustoClient(_KustoClientBase):
|
||||||
invoker = lambda: self._session.post(
|
invoker = lambda: self._session.post(
|
||||||
endpoint,
|
endpoint,
|
||||||
headers=request_headers,
|
headers=request_headers,
|
||||||
json=json_payload,
|
json=request.json_payload,
|
||||||
data=payload,
|
data=request.payload,
|
||||||
timeout=timeout.seconds,
|
timeout=request.timeout.seconds,
|
||||||
stream=stream_response,
|
stream=stream_response,
|
||||||
allow_redirects=False,
|
allow_redirects=False,
|
||||||
)
|
)
|
||||||
|
@ -324,6 +355,6 @@ class KustoClient(_KustoClientBase):
|
||||||
response_json = response.json()
|
response_json = response.json()
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise self._handle_http_error(e, endpoint, payload, response, response.status_code, response_json, response.text)
|
raise self._handle_http_error(e, endpoint, request.payload, response, response.status_code, response_json, response.text)
|
||||||
# trace response processing
|
# trace response processing
|
||||||
return MonitoredActivity.invoke(lambda: self._kusto_parse_by_endpoint(endpoint, response_json), name_of_span="KustoClient.processing_response")
|
return MonitoredActivity.invoke(lambda: self._kusto_parse_by_endpoint(endpoint, response_json), name_of_span="KustoClient.processing_response")
|
||||||
|
|
|
@ -125,36 +125,82 @@ class _KustoClientBase(abc.ABC):
|
||||||
|
|
||||||
|
|
||||||
class ExecuteRequestParams:
|
class ExecuteRequestParams:
|
||||||
def __init__(
|
@staticmethod
|
||||||
self,
|
def _from_stream(
|
||||||
database: str,
|
stream: io.IOBase,
|
||||||
payload: Optional[io.IOBase],
|
|
||||||
properties: ClientRequestProperties,
|
properties: ClientRequestProperties,
|
||||||
query: str,
|
request_headers: Any,
|
||||||
timeout: timedelta,
|
timeout: timedelta,
|
||||||
request_headers: dict,
|
|
||||||
mgmt_default_timeout: timedelta,
|
mgmt_default_timeout: timedelta,
|
||||||
client_server_delta: timedelta,
|
client_server_delta: timedelta,
|
||||||
client_details: ClientDetails,
|
client_details: ClientDetails,
|
||||||
):
|
):
|
||||||
request_headers = copy(request_headers)
|
# Before 3.0 it was KPC.execute_streaming_ingest, but was changed to align with the other SDKs
|
||||||
request_headers["Connection"] = "Keep-Alive"
|
client_request_id_prefix = "KPC.executeStreamingIngest;"
|
||||||
json_payload = None
|
request_headers = request_headers.copy()
|
||||||
if not payload:
|
request_headers["Content-Encoding"] = "gzip"
|
||||||
json_payload = {"db": database, "csl": query}
|
if properties:
|
||||||
if properties:
|
request_headers.update(json.loads(properties.to_json())["Options"])
|
||||||
json_payload["properties"] = properties.to_json()
|
|
||||||
|
|
||||||
client_request_id_prefix = "KPC.execute;"
|
return ExecuteRequestParams(
|
||||||
request_headers["Content-Type"] = "application/json; charset=utf-8"
|
stream, None, request_headers, client_request_id_prefix, properties, timeout, mgmt_default_timeout, client_server_delta, client_details
|
||||||
else:
|
)
|
||||||
if properties:
|
|
||||||
request_headers.update(json.loads(properties.to_json())["Options"])
|
|
||||||
|
|
||||||
# Before 3.0 it was KPC.execute_streaming_ingest, but was changed to align with the other SDKs
|
@staticmethod
|
||||||
client_request_id_prefix = "KPC.executeStreamingIngest;"
|
def _from_query(
|
||||||
request_headers["Content-Encoding"] = "gzip"
|
query: str,
|
||||||
|
database: str,
|
||||||
|
properties: ClientRequestProperties,
|
||||||
|
request_headers: Any,
|
||||||
|
timeout: timedelta,
|
||||||
|
mgmt_default_timeout: timedelta,
|
||||||
|
client_server_delta: timedelta,
|
||||||
|
client_details: ClientDetails,
|
||||||
|
):
|
||||||
|
json_payload = {"db": database, "csl": query}
|
||||||
|
if properties:
|
||||||
|
json_payload["properties"] = properties.to_json()
|
||||||
|
|
||||||
|
client_request_id_prefix = "KPC.execute;"
|
||||||
|
request_headers = request_headers.copy()
|
||||||
|
request_headers["Content-Type"] = "application/json; charset=utf-8"
|
||||||
|
|
||||||
|
return ExecuteRequestParams(
|
||||||
|
None, json_payload, request_headers, client_request_id_prefix, properties, timeout, mgmt_default_timeout, client_server_delta, client_details
|
||||||
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _from_blob_url(
|
||||||
|
blob: str,
|
||||||
|
properties: ClientRequestProperties,
|
||||||
|
request_headers: Any,
|
||||||
|
timeout: timedelta,
|
||||||
|
mgmt_default_timeout: timedelta,
|
||||||
|
client_server_delta: timedelta,
|
||||||
|
client_details: ClientDetails,
|
||||||
|
):
|
||||||
|
json_payload = {"sourceUri": blob}
|
||||||
|
client_request_id_prefix = "KPC.executeStreamingIngestFromBlob;"
|
||||||
|
request_headers = request_headers.copy()
|
||||||
|
request_headers["Content-Type"] = "application/json; charset=utf-8"
|
||||||
|
if properties:
|
||||||
|
request_headers.update(json.loads(properties.to_json())["Options"])
|
||||||
|
return ExecuteRequestParams(
|
||||||
|
None, json_payload, request_headers, client_request_id_prefix, properties, timeout, mgmt_default_timeout, client_server_delta, client_details
|
||||||
|
)
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
payload,
|
||||||
|
json_payload,
|
||||||
|
request_headers,
|
||||||
|
client_request_id_prefix,
|
||||||
|
properties: ClientRequestProperties,
|
||||||
|
timeout: timedelta,
|
||||||
|
mgmt_default_timeout: timedelta,
|
||||||
|
client_server_delta: timedelta,
|
||||||
|
client_details: ClientDetails,
|
||||||
|
):
|
||||||
special_headers = [
|
special_headers = [
|
||||||
{
|
{
|
||||||
"name": "x-ms-client-request-id",
|
"name": "x-ms-client-request-id",
|
||||||
|
@ -201,3 +247,4 @@ class ExecuteRequestParams:
|
||||||
self.json_payload = json_payload
|
self.json_payload = json_payload
|
||||||
self.request_headers = request_headers
|
self.request_headers = request_headers
|
||||||
self.timeout = timeout
|
self.timeout = timeout
|
||||||
|
self.payload = payload
|
||||||
|
|
|
@ -37,15 +37,14 @@ def test_properties():
|
||||||
|
|
||||||
def test_default_tracing_properties():
|
def test_default_tracing_properties():
|
||||||
kcsb = KustoConnectionStringBuilder("test")
|
kcsb = KustoConnectionStringBuilder("test")
|
||||||
params = ExecuteRequestParams(
|
params = ExecuteRequestParams._from_query(
|
||||||
"somedatabase",
|
|
||||||
None,
|
|
||||||
ClientRequestProperties(),
|
|
||||||
"somequery",
|
"somequery",
|
||||||
timedelta(seconds=10),
|
"somedatabase",
|
||||||
|
ClientRequestProperties(),
|
||||||
{},
|
{},
|
||||||
timedelta(seconds=10),
|
timedelta(seconds=10),
|
||||||
timedelta(seconds=10),
|
timedelta(seconds=10),
|
||||||
|
timedelta(seconds=10),
|
||||||
kcsb.client_details,
|
kcsb.client_details,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -60,15 +59,14 @@ def test_custom_kcsb_tracing_properties():
|
||||||
kcsb.application_for_tracing = "myApp"
|
kcsb.application_for_tracing = "myApp"
|
||||||
kcsb.user_name_for_tracing = "myUser"
|
kcsb.user_name_for_tracing = "myUser"
|
||||||
|
|
||||||
params = ExecuteRequestParams(
|
params = ExecuteRequestParams._from_query(
|
||||||
"somedatabase",
|
|
||||||
None,
|
|
||||||
ClientRequestProperties(),
|
|
||||||
"somequery",
|
"somequery",
|
||||||
timedelta(seconds=10),
|
"somedatabase",
|
||||||
|
ClientRequestProperties(),
|
||||||
{},
|
{},
|
||||||
timedelta(seconds=10),
|
timedelta(seconds=10),
|
||||||
timedelta(seconds=10),
|
timedelta(seconds=10),
|
||||||
|
timedelta(seconds=10),
|
||||||
kcsb.client_details,
|
kcsb.client_details,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -84,15 +82,14 @@ def test_custom_crp_tracing_properties():
|
||||||
crp.application = "myApp2"
|
crp.application = "myApp2"
|
||||||
crp.user = "myUser2"
|
crp.user = "myUser2"
|
||||||
|
|
||||||
params = ExecuteRequestParams(
|
params = ExecuteRequestParams._from_query(
|
||||||
"somedatabase",
|
|
||||||
None,
|
|
||||||
crp,
|
|
||||||
"somequery",
|
"somequery",
|
||||||
timedelta(seconds=10),
|
"somedatabase",
|
||||||
|
crp,
|
||||||
{},
|
{},
|
||||||
timedelta(seconds=10),
|
timedelta(seconds=10),
|
||||||
timedelta(seconds=10),
|
timedelta(seconds=10),
|
||||||
|
timedelta(seconds=10),
|
||||||
kcsb.client_details,
|
kcsb.client_details,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -110,15 +107,14 @@ def test_custom_crp_tracing_properties_override_kcsb():
|
||||||
crp.application = "myApp2"
|
crp.application = "myApp2"
|
||||||
crp.user = "myUser2"
|
crp.user = "myUser2"
|
||||||
|
|
||||||
params = ExecuteRequestParams(
|
params = ExecuteRequestParams._from_query(
|
||||||
"somedatabase",
|
|
||||||
None,
|
|
||||||
crp,
|
|
||||||
"somequery",
|
"somequery",
|
||||||
timedelta(seconds=10),
|
"somedatabase",
|
||||||
|
crp,
|
||||||
{},
|
{},
|
||||||
timedelta(seconds=10),
|
timedelta(seconds=10),
|
||||||
timedelta(seconds=10),
|
timedelta(seconds=10),
|
||||||
|
timedelta(seconds=10),
|
||||||
kcsb.client_details,
|
kcsb.client_details,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -133,15 +129,14 @@ def test_set_connector_name_and_version():
|
||||||
kcsb._set_connector_details("myConnector", "myVersion", send_user=False)
|
kcsb._set_connector_details("myConnector", "myVersion", send_user=False)
|
||||||
crp = ClientRequestProperties()
|
crp = ClientRequestProperties()
|
||||||
|
|
||||||
params = ExecuteRequestParams(
|
params = ExecuteRequestParams._from_query(
|
||||||
"somedatabase",
|
|
||||||
None,
|
|
||||||
crp,
|
|
||||||
"somequery",
|
"somequery",
|
||||||
timedelta(seconds=10),
|
"somedatabase",
|
||||||
|
ClientRequestProperties(),
|
||||||
{},
|
{},
|
||||||
timedelta(seconds=10),
|
timedelta(seconds=10),
|
||||||
timedelta(seconds=10),
|
timedelta(seconds=10),
|
||||||
|
timedelta(seconds=10),
|
||||||
kcsb.client_details,
|
kcsb.client_details,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -157,15 +152,14 @@ def test_set_connector_no_app_version():
|
||||||
kcsb._set_connector_details("myConnector", "myVersion", app_name="myApp", send_user=True)
|
kcsb._set_connector_details("myConnector", "myVersion", app_name="myApp", send_user=True)
|
||||||
crp = ClientRequestProperties()
|
crp = ClientRequestProperties()
|
||||||
|
|
||||||
params = ExecuteRequestParams(
|
params = ExecuteRequestParams._from_query(
|
||||||
"somedatabase",
|
|
||||||
None,
|
|
||||||
crp,
|
|
||||||
"somequery",
|
"somequery",
|
||||||
timedelta(seconds=10),
|
"somedatabase",
|
||||||
|
ClientRequestProperties(),
|
||||||
{},
|
{},
|
||||||
timedelta(seconds=10),
|
timedelta(seconds=10),
|
||||||
timedelta(seconds=10),
|
timedelta(seconds=10),
|
||||||
|
timedelta(seconds=10),
|
||||||
kcsb.client_details,
|
kcsb.client_details,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -189,15 +183,14 @@ def test_set_connector_full():
|
||||||
)
|
)
|
||||||
crp = ClientRequestProperties()
|
crp = ClientRequestProperties()
|
||||||
|
|
||||||
params = ExecuteRequestParams(
|
params = ExecuteRequestParams._from_query(
|
||||||
"somedatabase",
|
|
||||||
None,
|
|
||||||
crp,
|
|
||||||
"somequery",
|
"somequery",
|
||||||
timedelta(seconds=10),
|
"somedatabase",
|
||||||
|
crp,
|
||||||
{},
|
{},
|
||||||
timedelta(seconds=10),
|
timedelta(seconds=10),
|
||||||
timedelta(seconds=10),
|
timedelta(seconds=10),
|
||||||
|
timedelta(seconds=10),
|
||||||
kcsb.client_details,
|
kcsb.client_details,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -12,6 +12,8 @@ from io import BytesIO, SEEK_END
|
||||||
from typing import Union, Optional, AnyStr, IO, List, Dict
|
from typing import Union, Optional, AnyStr, IO, List, Dict
|
||||||
from zipfile import ZipFile
|
from zipfile import ZipFile
|
||||||
|
|
||||||
|
from azure.storage.blob import BlobClient
|
||||||
|
|
||||||
OptionalUUID = Optional[Union[str, uuid.UUID]]
|
OptionalUUID = Optional[Union[str, uuid.UUID]]
|
||||||
|
|
||||||
|
|
||||||
|
@ -150,6 +152,10 @@ class BlobDescriptor(DescriptorBase):
|
||||||
obfuscated_path = self.path.split("?")[0].split(";")[0]
|
obfuscated_path = self.path.split("?")[0].split(";")[0]
|
||||||
return {self._BLOB_URI: obfuscated_path, self._SOURCE_ID: str(self.source_id)}
|
return {self._BLOB_URI: obfuscated_path, self._SOURCE_ID: str(self.source_id)}
|
||||||
|
|
||||||
|
def fill_size(self):
|
||||||
|
if not self.size:
|
||||||
|
self.size = BlobClient.from_blob_url(self.path).get_blob_properties().size
|
||||||
|
|
||||||
|
|
||||||
class StreamDescriptor(DescriptorBase):
|
class StreamDescriptor(DescriptorBase):
|
||||||
"""StreamDescriptor is used to describe a stream that will be used as ingestion source"""
|
"""StreamDescriptor is used to describe a stream that will be used as ingestion source"""
|
||||||
|
|
|
@ -1,7 +1,8 @@
|
||||||
import uuid
|
import uuid
|
||||||
from io import SEEK_SET
|
from io import SEEK_SET
|
||||||
from typing import AnyStr, IO, TYPE_CHECKING, Union
|
from typing import AnyStr, IO, TYPE_CHECKING, Union, Optional
|
||||||
|
|
||||||
|
from azure.kusto.ingest.descriptors import DescriptorBase
|
||||||
from tenacity import Retrying, _utils, stop_after_attempt, wait_random_exponential
|
from tenacity import Retrying, _utils, stop_after_attempt, wait_random_exponential
|
||||||
|
|
||||||
from azure.core.tracing.decorator import distributed_trace
|
from azure.core.tracing.decorator import distributed_trace
|
||||||
|
@ -11,7 +12,6 @@ from azure.kusto.data import KustoConnectionStringBuilder
|
||||||
from azure.kusto.data.exceptions import KustoApiError, KustoClosedError
|
from azure.kusto.data.exceptions import KustoApiError, KustoClosedError
|
||||||
from azure.kusto.data._telemetry import MonitoredActivity
|
from azure.kusto.data._telemetry import MonitoredActivity
|
||||||
|
|
||||||
|
|
||||||
from . import BlobDescriptor, FileDescriptor, IngestionProperties, StreamDescriptor
|
from . import BlobDescriptor, FileDescriptor, IngestionProperties, StreamDescriptor
|
||||||
from ._ingest_telemetry import IngestTracingAttributes
|
from ._ingest_telemetry import IngestTracingAttributes
|
||||||
from ._stream_extensions import chain_streams, read_until_size_or_end
|
from ._stream_extensions import chain_streams, read_until_size_or_end
|
||||||
|
@ -108,30 +108,20 @@ class ManagedStreamingIngestClient(BaseIngestClient):
|
||||||
stream = stream_descriptor.stream
|
stream = stream_descriptor.stream
|
||||||
|
|
||||||
buffered_stream = read_until_size_or_end(stream, self.MAX_STREAMING_SIZE_IN_BYTES + 1)
|
buffered_stream = read_until_size_or_end(stream, self.MAX_STREAMING_SIZE_IN_BYTES + 1)
|
||||||
|
length = len(buffered_stream.getbuffer())
|
||||||
if len(buffered_stream.getbuffer()) > self.MAX_STREAMING_SIZE_IN_BYTES:
|
|
||||||
stream_descriptor.stream = chain_streams([buffered_stream, stream])
|
|
||||||
return self.queued_client.ingest_from_stream(stream_descriptor, ingestion_properties)
|
|
||||||
|
|
||||||
stream_descriptor.stream = buffered_stream
|
stream_descriptor.stream = buffered_stream
|
||||||
|
|
||||||
try:
|
try:
|
||||||
for attempt in Retrying(
|
res = self._stream_with_retries(length, stream_descriptor, ingestion_properties)
|
||||||
stop=stop_after_attempt(self._num_of_attempts), wait=wait_random_exponential(max=self._max_seconds_per_retry), reraise=True
|
if res:
|
||||||
):
|
return res
|
||||||
with attempt:
|
stream_descriptor.stream = chain_streams([buffered_stream, stream])
|
||||||
stream.seek(0, SEEK_SET)
|
|
||||||
client_request_id = ManagedStreamingIngestClient._get_request_id(stream_descriptor.source_id, attempt.retry_state.attempt_number - 1)
|
|
||||||
# trace attempt to ingest from stream
|
|
||||||
invoker = lambda: self.streaming_client._ingest_from_stream_with_client_request_id(
|
|
||||||
stream_descriptor, ingestion_properties, client_request_id
|
|
||||||
)
|
|
||||||
return MonitoredActivity.invoke(invoker, name_of_span="ManagedStreamingIngestClient.ingest_from_stream_attempt")
|
|
||||||
|
|
||||||
except KustoApiError as ex:
|
except KustoApiError as ex:
|
||||||
error = ex.get_api_error()
|
error = ex.get_api_error()
|
||||||
if error.permanent:
|
if error.permanent:
|
||||||
raise
|
raise
|
||||||
|
buffered_stream.seek(0, SEEK_SET)
|
||||||
|
|
||||||
return self.queued_client.ingest_from_stream(stream_descriptor, ingestion_properties)
|
return self.queued_client.ingest_from_stream(stream_descriptor, ingestion_properties)
|
||||||
|
|
||||||
|
@ -151,9 +141,42 @@ class ManagedStreamingIngestClient(BaseIngestClient):
|
||||||
|
|
||||||
if self._is_closed:
|
if self._is_closed:
|
||||||
raise KustoClosedError()
|
raise KustoClosedError()
|
||||||
|
blob_descriptor.fill_size()
|
||||||
|
try:
|
||||||
|
res = self._stream_with_retries(blob_descriptor.size, blob_descriptor, ingestion_properties)
|
||||||
|
if res:
|
||||||
|
return res
|
||||||
|
except KustoApiError as ex:
|
||||||
|
error = ex.get_api_error()
|
||||||
|
if error.permanent:
|
||||||
|
raise
|
||||||
|
|
||||||
return self.queued_client.ingest_from_blob(blob_descriptor, ingestion_properties)
|
return self.queued_client.ingest_from_blob(blob_descriptor, ingestion_properties)
|
||||||
|
|
||||||
|
def _stream_with_retries(
|
||||||
|
self,
|
||||||
|
length: int,
|
||||||
|
descriptor: DescriptorBase,
|
||||||
|
props: IngestionProperties,
|
||||||
|
) -> Optional[IngestionResult]:
|
||||||
|
from_stream = isinstance(descriptor, StreamDescriptor)
|
||||||
|
if length > self.MAX_STREAMING_SIZE_IN_BYTES:
|
||||||
|
return None
|
||||||
|
for attempt in Retrying(stop=stop_after_attempt(self._num_of_attempts), wait=wait_random_exponential(max=self._max_seconds_per_retry), reraise=True):
|
||||||
|
with attempt:
|
||||||
|
client_request_id = ManagedStreamingIngestClient._get_request_id(descriptor.source_id, attempt.retry_state.attempt_number - 1)
|
||||||
|
# trace attempt to ingest from stream
|
||||||
|
if from_stream:
|
||||||
|
descriptor.stream.seek(0, SEEK_SET)
|
||||||
|
invoker = lambda: self.streaming_client._ingest_from_stream_with_client_request_id(descriptor, props, client_request_id)
|
||||||
|
else:
|
||||||
|
invoker = lambda: self.streaming_client.ingest_from_blob(descriptor, props, client_request_id)
|
||||||
|
return MonitoredActivity.invoke(
|
||||||
|
invoker,
|
||||||
|
name_of_span="ManagedStreamingIngestClient.ingest_from_stream_attempt",
|
||||||
|
tracing_attributes={"attemptNumber": attempt, "sourceIsStream": from_stream},
|
||||||
|
)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _get_request_id(source_id: uuid.UUID, attempt: int):
|
def _get_request_id(source_id: uuid.UUID, attempt: int):
|
||||||
return f"KPC.executeManagedStreamingIngest;{source_id};{attempt}"
|
return f"KPC.executeManagedStreamingIngest;{source_id};{attempt}"
|
||||||
|
|
|
@ -10,7 +10,7 @@ from azure.kusto.data import KustoClient, KustoConnectionStringBuilder, ClientRe
|
||||||
|
|
||||||
from ._ingest_telemetry import IngestTracingAttributes
|
from ._ingest_telemetry import IngestTracingAttributes
|
||||||
from .base_ingest_client import BaseIngestClient, IngestionResult, IngestionStatus
|
from .base_ingest_client import BaseIngestClient, IngestionResult, IngestionStatus
|
||||||
from .descriptors import FileDescriptor, StreamDescriptor
|
from .descriptors import FileDescriptor, StreamDescriptor, BlobDescriptor
|
||||||
from .ingestion_properties import IngestionProperties
|
from .ingestion_properties import IngestionProperties
|
||||||
|
|
||||||
|
|
||||||
|
@ -79,9 +79,30 @@ class KustoStreamingIngestClient(BaseIngestClient):
|
||||||
ingestion_properties.database,
|
ingestion_properties.database,
|
||||||
ingestion_properties.table,
|
ingestion_properties.table,
|
||||||
stream_descriptor.stream,
|
stream_descriptor.stream,
|
||||||
|
None,
|
||||||
ingestion_properties.format.name,
|
ingestion_properties.format.name,
|
||||||
additional_properties,
|
additional_properties,
|
||||||
mapping_name=ingestion_properties.ingestion_mapping_reference,
|
mapping_name=ingestion_properties.ingestion_mapping_reference,
|
||||||
)
|
)
|
||||||
|
|
||||||
return IngestionResult(IngestionStatus.SUCCESS, ingestion_properties.database, ingestion_properties.table, stream_descriptor.source_id)
|
return IngestionResult(IngestionStatus.SUCCESS, ingestion_properties.database, ingestion_properties.table, stream_descriptor.source_id)
|
||||||
|
|
||||||
|
def ingest_from_blob(
|
||||||
|
self, blob_descriptor: BlobDescriptor, ingestion_properties: IngestionProperties, client_request_id: Optional[str] = None
|
||||||
|
) -> IngestionResult:
|
||||||
|
IngestTracingAttributes.set_ingest_descriptor_attributes(blob_descriptor, ingestion_properties)
|
||||||
|
additional_properties = None
|
||||||
|
if client_request_id:
|
||||||
|
additional_properties = ClientRequestProperties()
|
||||||
|
additional_properties.client_request_id = client_request_id
|
||||||
|
|
||||||
|
self._kusto_client.execute_streaming_ingest(
|
||||||
|
ingestion_properties.database,
|
||||||
|
ingestion_properties.table,
|
||||||
|
None,
|
||||||
|
blob_descriptor.path,
|
||||||
|
ingestion_properties.format.name,
|
||||||
|
additional_properties,
|
||||||
|
mapping_name=ingestion_properties.ingestion_mapping_reference,
|
||||||
|
)
|
||||||
|
return IngestionResult(IngestionStatus.SUCCESS, ingestion_properties.database, ingestion_properties.table, blob_descriptor.source_id)
|
||||||
|
|
|
@ -538,3 +538,32 @@ class TestE2E:
|
||||||
self.ingest_client.ingest_from_dataframe(df, ingestion_properties)
|
self.ingest_client.ingest_from_dataframe(df, ingestion_properties)
|
||||||
|
|
||||||
await self.assert_rows_added(1, timeout=120)
|
await self.assert_rows_added(1, timeout=120)
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_streaming_ingest_from_blob(self, is_managed_streaming):
|
||||||
|
ingestion_properties = IngestionProperties(
|
||||||
|
database=self.test_db,
|
||||||
|
table=self.test_table,
|
||||||
|
data_format=DataFormat.JSON,
|
||||||
|
ingestion_mapping_reference="JsonMapping",
|
||||||
|
ingestion_mapping_kind=IngestionMappingKind.JSON,
|
||||||
|
)
|
||||||
|
containers = self.ingest_client._resource_manager.get_containers()
|
||||||
|
|
||||||
|
with FileDescriptor(self.json_file_path).open(False) as stream:
|
||||||
|
blob_descriptor = self.ingest_client.upload_blob(
|
||||||
|
containers,
|
||||||
|
FileDescriptor(self.json_file_path),
|
||||||
|
ingestion_properties.database,
|
||||||
|
ingestion_properties.table,
|
||||||
|
stream,
|
||||||
|
None,
|
||||||
|
10 * 60,
|
||||||
|
3,
|
||||||
|
)
|
||||||
|
if is_managed_streaming:
|
||||||
|
self.managed_streaming_ingest_client.ingest_from_blob(blob_descriptor, ingestion_properties)
|
||||||
|
else:
|
||||||
|
self.streaming_ingest_client.ingest_from_blob(blob_descriptor, ingestion_properties)
|
||||||
|
|
||||||
|
await self.assert_rows_added(2, timeout=120)
|
||||||
|
|
|
@ -44,6 +44,36 @@ STORAGE_QUEUE4_URL = "https://storageaccount2.queue.core.windows.net/readyforagg
|
||||||
STORAGE_QUEUE5_URL = "https://storageaccount2.queue.core.windows.net/readyforaggregation-secured?5"
|
STORAGE_QUEUE5_URL = "https://storageaccount2.queue.core.windows.net/readyforaggregation-secured?5"
|
||||||
|
|
||||||
|
|
||||||
|
def request_callback_throw_transient(request):
|
||||||
|
response_status = 400
|
||||||
|
response_body = {
|
||||||
|
"error": {
|
||||||
|
"code": "BadRequest",
|
||||||
|
"message": "Request is invalid and cannot be executed.",
|
||||||
|
"@type": "Kusto.Common.Svc.Exceptions.AdminCommandWrongEndpointException",
|
||||||
|
"@message": "Cannot get ingestion resources from this service endpoint. The appropriate endpoint is most likely "
|
||||||
|
"'https://ingest-somecluster.kusto.windows.net/'.",
|
||||||
|
"@context": {
|
||||||
|
"timestamp": "2021-10-12T06:05:35.6602087Z",
|
||||||
|
"serviceAlias": "SomeCluster",
|
||||||
|
"machineName": "KEngine000000",
|
||||||
|
"processName": "Kusto.WinSvc.Svc",
|
||||||
|
"processId": 2648,
|
||||||
|
"threadId": 472,
|
||||||
|
"appDomainName": "Kusto.WinSvc.Svc.exe",
|
||||||
|
"clientRequestId": "KPC.execute;a3dfb878-9d2b-49d6-89a5-e9b3a9f1f674",
|
||||||
|
"activityId": "87eb8fc9-78b3-4580-bcc8-6c90482f9118",
|
||||||
|
"subActivityId": "bbfb038b-4467-4f96-afd4-945904fc6278",
|
||||||
|
"activityType": "DN.AdminCommand.IngestionResourcesGetCommand",
|
||||||
|
"parentActivityId": "00e678e9-4204-4143-8c94-6afd94c27430",
|
||||||
|
"activityStack": "(Activity stack: CRID=KPC.execute;a3dfb878-9d2b-49d6-89a5-e9b3a9f1f674 ARID=87eb8fc9-78b3-4580-bcc8-6c90482f9118 > DN.Admin.Client.ExecuteControlCommand/833dfb85-5d67-44b7-882d-eb2283e65780 > P.WCF.Service.ExecuteControlCommand..IInterNodeCommunicationAdminContract/3784e74f-1d89-4c15-adef-0a360c4c431e > DN.FE.ExecuteControlCommand/00e678e9-4204-4143-8c94-6afd94c27430 > DN.AdminCommand.IngestionResourcesGetCommand/bbfb038b-4467-4f96-afd4-945904fc6278)",
|
||||||
|
},
|
||||||
|
"@permanent": False,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return response_status, {}, json.dumps(response_body)
|
||||||
|
|
||||||
|
|
||||||
def request_callback(request):
|
def request_callback(request):
|
||||||
body = json.loads(request.body.decode()) if type(request.body) == bytes else json.loads(request.body)
|
body = json.loads(request.body.decode()) if type(request.body) == bytes else json.loads(request.body)
|
||||||
response_status = 400
|
response_status = 400
|
||||||
|
|
|
@ -11,7 +11,7 @@ import responses
|
||||||
from azure.kusto.data.data_format import DataFormat
|
from azure.kusto.data.data_format import DataFormat
|
||||||
from azure.kusto.data.exceptions import KustoApiError
|
from azure.kusto.data.exceptions import KustoApiError
|
||||||
from azure.kusto.ingest import ManagedStreamingIngestClient, IngestionProperties, IngestionStatus, BlobDescriptor
|
from azure.kusto.ingest import ManagedStreamingIngestClient, IngestionProperties, IngestionStatus, BlobDescriptor
|
||||||
from test_kusto_ingest_client import request_callback as queued_request_callback, assert_queued_upload
|
from test_kusto_ingest_client import request_callback as queued_request_callback, assert_queued_upload, request_callback_throw_transient
|
||||||
from test_kusto_streaming_ingest_client import request_callback as streaming_request_callback, assert_managed_streaming_request_id
|
from test_kusto_streaming_ingest_client import request_callback as streaming_request_callback, assert_managed_streaming_request_id
|
||||||
|
|
||||||
|
|
||||||
|
@ -21,6 +21,11 @@ class TransientResponseHelper:
|
||||||
self.total_calls = 0
|
self.total_calls = 0
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(params=["Blob", "File"])
|
||||||
|
def is_blob(request):
|
||||||
|
return request.param == "Blob"
|
||||||
|
|
||||||
|
|
||||||
def transient_error_callback(helper: TransientResponseHelper, request, custom_request_id=None):
|
def transient_error_callback(helper: TransientResponseHelper, request, custom_request_id=None):
|
||||||
if custom_request_id:
|
if custom_request_id:
|
||||||
assert request.headers["x-ms-client-request-id"] == custom_request_id
|
assert request.headers["x-ms-client-request-id"] == custom_request_id
|
||||||
|
@ -84,7 +89,7 @@ class TestManagedStreamingIngestClient:
|
||||||
@patch("azure.storage.blob.BlobClient.upload_blob")
|
@patch("azure.storage.blob.BlobClient.upload_blob")
|
||||||
@patch("azure.storage.queue.QueueClient.send_message")
|
@patch("azure.storage.queue.QueueClient.send_message")
|
||||||
@patch("uuid.uuid4", return_value=MOCKED_UUID_4)
|
@patch("uuid.uuid4", return_value=MOCKED_UUID_4)
|
||||||
def test_fallback_big_file(self, mock_uuid, mock_put_message_in_queue, mock_upload_blob_from_stream, mock_aad):
|
def test_fallback_big_file(self, mock_uuid, mock_put_message_in_queue, mock_upload_blob_from_stream, mock_aad, is_blob):
|
||||||
responses.add_callback(
|
responses.add_callback(
|
||||||
responses.POST, "https://ingest-somecluster.kusto.windows.net/v1/rest/mgmt", callback=queued_request_callback, content_type="application/json"
|
responses.POST, "https://ingest-somecluster.kusto.windows.net/v1/rest/mgmt", callback=queued_request_callback, content_type="application/json"
|
||||||
)
|
)
|
||||||
|
@ -108,10 +113,20 @@ class TestManagedStreamingIngestClient:
|
||||||
mock_upload_blob_from_stream.side_effect = check_bytes
|
mock_upload_blob_from_stream.side_effect = check_bytes
|
||||||
|
|
||||||
f = NamedTemporaryFile(dir=".", mode="wb", delete=False)
|
f = NamedTemporaryFile(dir=".", mode="wb", delete=False)
|
||||||
|
blob_url = "https://storageaccount.blob.core.windows.net/tempstorage/database__table__11111111-1111-1111-1111-111111111111__{}?".format(
|
||||||
|
os.path.basename(f.name)
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
f.write(initial_bytes)
|
if is_blob:
|
||||||
f.close()
|
result = ingest_client.ingest_from_blob(BlobDescriptor(blob_url + "sas", 5 * 1024 * 1024), ingestion_properties=ingestion_properties)
|
||||||
result = ingest_client.ingest_from_file(f.name, ingestion_properties=ingestion_properties)
|
f.close()
|
||||||
|
else:
|
||||||
|
f.write(initial_bytes)
|
||||||
|
f.close()
|
||||||
|
result = ingest_client.ingest_from_file(f.name, ingestion_properties=ingestion_properties)
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
finally:
|
finally:
|
||||||
os.unlink(f.name)
|
os.unlink(f.name)
|
||||||
|
|
||||||
|
@ -119,14 +134,13 @@ class TestManagedStreamingIngestClient:
|
||||||
|
|
||||||
assert_queued_upload(
|
assert_queued_upload(
|
||||||
mock_put_message_in_queue,
|
mock_put_message_in_queue,
|
||||||
mock_upload_blob_from_stream,
|
mock_upload_blob_from_stream if not is_blob else None,
|
||||||
"https://storageaccount.blob.core.windows.net/tempstorage/database__table__11111111-1111-1111-1111-111111111111__{}?".format(
|
blob_url,
|
||||||
os.path.basename(f.name)
|
|
||||||
),
|
|
||||||
format=data_format.kusto_value,
|
format=data_format.kusto_value,
|
||||||
)
|
)
|
||||||
|
|
||||||
mock_upload_blob_from_stream.assert_called()
|
if not is_blob:
|
||||||
|
mock_upload_blob_from_stream.assert_called()
|
||||||
|
|
||||||
@responses.activate
|
@responses.activate
|
||||||
@patch("azure.kusto.data.security._AadHelper.acquire_authorization_header", return_value=None)
|
@patch("azure.kusto.data.security._AadHelper.acquire_authorization_header", return_value=None)
|
||||||
|
@ -314,6 +328,12 @@ class TestManagedStreamingIngestClient:
|
||||||
responses.add_callback(
|
responses.add_callback(
|
||||||
responses.POST, "https://ingest-somecluster.kusto.windows.net/v1/rest/mgmt", callback=queued_request_callback, content_type="application/json"
|
responses.POST, "https://ingest-somecluster.kusto.windows.net/v1/rest/mgmt", callback=queued_request_callback, content_type="application/json"
|
||||||
)
|
)
|
||||||
|
responses.add_callback(
|
||||||
|
responses.POST,
|
||||||
|
"https://somecluster.kusto.windows.net/v1/rest/ingest/database/table?streamFormat=csv&sourceKind=uri",
|
||||||
|
callback=request_callback_throw_transient,
|
||||||
|
content_type="application/json",
|
||||||
|
)
|
||||||
|
|
||||||
ingest_client = ManagedStreamingIngestClient.from_dm_kcsb("https://ingest-somecluster.kusto.windows.net")
|
ingest_client = ManagedStreamingIngestClient.from_dm_kcsb("https://ingest-somecluster.kusto.windows.net")
|
||||||
ingestion_properties = IngestionProperties(database="database", table="table")
|
ingestion_properties = IngestionProperties(database="database", table="table")
|
||||||
|
|
Загрузка…
Ссылка в новой задаче