зеркало из
1
0
Форкнуть 0

Fix df from result table error message + old types (#543)

* Added dictionary options as parameters for type matching for dataframe_from_result_table func

* reformatted the file

* reformatted the file

* reformatted the file

* Fixes after PR

* Fixes after PR

* Fixes after PR

* Fixes after PR

* Fixes after PR

* For python 3.7 3.8

* Nullable bools fix

* Nullable bools fix

* Nullable bools fix

* Nullable bools fix2

* PR comment fixes

* LRU cache default paramaters for python 3.7

* LRU cache maxsize=1

* modified changes requested

* black

* fix for numpy 2.0

* fix numpy 2.0 nan

* fix numpy 2.0 ninf, inf

* fix numpy 2.0 -inf

* fix tenacity>=8.3

* fix tenacity>=8.3

* black

* fixed error message in case of none existing type + added old type names in kusto

* Delete azure-kusto-ingest/azure/kusto/ingest/V2/__init__.py

* Delete azure-kusto-ingest/azure/kusto/ingest/V2/blob_source.py

* Delete azure-kusto-ingest/azure/kusto/ingest/V2/compression_type.py

* Delete azure-kusto-ingest/azure/kusto/ingest/V2/ingestion_source.py

* Update setup.py

* Delete azure-kusto-ingest/azure/kusto/ingest/V2/kusto_storage_uploader.py

* Delete azure-kusto-ingest/azure/kusto/ingest/V2/local_source.py

* fixed error message in case of none existing type + added old type names in kusto

* added test for missing and old types

* black

* using pytest.raises
This commit is contained in:
mayamarom10 2024-06-18 17:22:22 +03:00 коммит произвёл GitHub
Родитель 0865ed5beb
Коммит 5c1909339d
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
3 изменённых файлов: 52 добавлений и 19 удалений

Просмотреть файл

@ -18,14 +18,22 @@ def default_dict() -> Converter:
return {
"string": lambda col, df: df[col].astype(pd.StringDtype()) if hasattr(pd, "StringDType") else df[col],
"guid": lambda col, df: df[col],
"uuid": lambda col, df: df[col],
"uniqueid": lambda col, df: df[col],
"dynamic": lambda col, df: df[col],
"bool": lambda col, df: df[col].astype(bool),
"boolean": lambda col, df: df[col].astype(bool),
"int": lambda col, df: df[col].astype(pd.Int32Dtype()),
"int32": lambda col, df: df[col].astype(pd.Int32Dtype()),
"int64": lambda col, df: df[col].astype(pd.Int64Dtype()),
"long": lambda col, df: df[col].astype(pd.Int64Dtype()),
"real": lambda col, df: parse_float(df, col),
"double": lambda col, df: parse_float(df, col),
"decimal": lambda col, df: parse_float(df, col),
"datetime": lambda col, df: parse_datetime(df, col),
"date": lambda col, df: parse_datetime(df, col),
"timespan": lambda col, df: df[col].apply(parse_timedelta),
"time": lambda col, df: df[col].apply(parse_timedelta),
}
@ -67,13 +75,15 @@ def dataframe_from_result_table(
column_name = col.column_name
column_type = col.column_type
if converters_by_column_name and column_name in converters_by_column_name:
converter = converters_by_column_name[column_name]
converter = converters_by_column_name.get(column_name)
elif converters_by_type and column_type in converters_by_type:
converter = converters_by_type[column_type]
converter = converters_by_type.get(column_type)
elif nullable_bools and column_type == "bool":
converter = lambda col, df: df[col].astype(pd.BooleanDtype())
else:
converter = default[column_type]
converter = default.get(column_type)
if converter is None:
raise Exception("Unexpected type " + column_type)
if isinstance(converter, str):
frame[column_name] = frame[column_name].astype(converter)
else:

Просмотреть файл

@ -73,6 +73,10 @@
"ColumnName": "RecordReal",
"ColumnType": "real"
},
{
"ColumnName": "RecordDouble",
"ColumnType": "double"
},
{
"ColumnName": "RecordDecimal",
"ColumnType": "decimal"
@ -80,6 +84,10 @@
{
"ColumnName": "RecordDynamic",
"ColumnType": "dynamic"
},
{
"ColumnName": "MissingType",
"ColumnType": "missing"
}
],
"Rows": [
@ -92,8 +100,8 @@
222,
92233720368,
"6f3c1072-2739-461c-8aa7-3cfc8ff528a8",
3.14159, 1.2,
"{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}"
3.14159, 7.89, 1.2,
"{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}", "miss"
],
[
@ -105,8 +113,8 @@
222,
92233720368,
"6f3c1072-2739-461c-8aa7-3cfc8ff528a8",
"NaN", "NaN",
"{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}"
"NaN", "NaN", "NaN",
"{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}", "miss"
],
[
@ -118,8 +126,8 @@
222,
92233720368,
"6f3c1072-2739-461c-8aa7-3cfc8ff528a8",
"Infinity", "Infinity",
"{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}"
"Infinity", "Infinity", "Infinity",
"{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}", "miss"
],
[
@ -131,8 +139,8 @@
222,
92233720368,
"6f3c1072-2739-461c-8aa7-3cfc8ff528a8",
"-Infinity", "-Infinity",
"{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}"
"-Infinity", "-Infinity", "-Infinity",
"{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}", "miss"
],
[
@ -144,8 +152,8 @@
222,
92233720368,
"6f3c1072-2739-461c-8aa7-3cfc8ff528a8",
3.14159, 1.2,
"{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}"
3.14159, 7.89, 1.2,
"{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}", "miss"
],
[
@ -157,8 +165,8 @@
222,
92233720368,
"6f3c1072-2739-461c-8aa7-3cfc8ff528a8",
3.14159, 1.2,
"{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}"
3.14159, 7.89, 1.2,
"{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}", "miss"
],
[
@ -170,8 +178,8 @@
222,
92233720368,
"6f3c1072-2739-461c-8aa7-3cfc8ff528a8",
3.14159, 1.2,
"{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}"
3.14159, 7.89, 1.2,
"{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}", "miss"
],
[

Просмотреть файл

@ -4,6 +4,8 @@ import datetime
import json
import os
import pytest
from azure.kusto.data._models import KustoResultTable
from azure.kusto.data.helpers import dataframe_from_result_table
from azure.kusto.data.response import KustoResponseDataSetV2
@ -20,7 +22,11 @@ with open(os.path.join(os.path.dirname(__file__), "input", "dataframe.json"), "r
response = KustoResponseDataSetV2(json.loads(data))
# Test when given both types of dictionary parameters that type conversion doesn't override column name conversion
test_dict_by_name = {"RecordName": lambda col, frame: frame[col].astype("str"), "RecordInt64": lambda col, frame: frame[col].astype("int64")}
test_dict_by_name = {
"RecordName": lambda col, frame: frame[col].astype("str"),
"RecordInt64": lambda col, frame: frame[col].astype("int64"),
"MissingType": lambda col, frame: frame[col].astype("str"),
}
test_dict_by_type = {"int": lambda col, frame: frame[col].astype("int32")}
df = dataframe_from_result_table(response.primary_results[0], converters_by_type=test_dict_by_type, converters_by_column_name=test_dict_by_name)
@ -52,6 +58,8 @@ assert type(df.iloc[0].RecordLong) is numpy.int64
assert df.iloc[0].RecordLong == 92233720368
assert type(df.iloc[0].RecordReal) is numpy.float64
assert df.iloc[0].RecordReal == 3.14159
assert type(df.iloc[0].RecordDouble) is numpy.float64
assert df.iloc[0].RecordDouble == 7.89
assert type(df.iloc[0].RecordDecimal) is numpy.float64
assert df.iloc[0].RecordDecimal == 1.2
@ -90,10 +98,17 @@ assert df.iloc[6].RecordOffset == pandas.to_timedelta("1 days 01:01:01")
# Testing int to float conversion
test_int_to_float = {"int": "float64"}
df_int_to_float = dataframe_from_result_table(response.primary_results[0], converters_by_type=test_int_to_float)
ignore_missing_type = {
"MissingType": lambda col, frame: frame[col].astype("str"),
}
df_int_to_float = dataframe_from_result_table(response.primary_results[0], converters_by_type=test_int_to_float, converters_by_column_name=ignore_missing_type)
assert type(df_int_to_float.iloc[0].RecordInt) is numpy.float64
assert df.iloc[0].RecordInt == 5678
# Testing missing type conversion
with pytest.raises(Exception):
df_missing_type = dataframe_from_result_table(response.primary_results[0])
def test_pandas_mixed_date():
df = dataframe_from_result_table(