Fix df from result table error message + old types (#543)
* Added dictionary options as parameters for type matching for dataframe_from_result_table func * reformatted the file * reformatted the file * reformatted the file * Fixes after PR * Fixes after PR * Fixes after PR * Fixes after PR * Fixes after PR * For python 3.7 3.8 * Nullable bools fix * Nullable bools fix * Nullable bools fix * Nullable bools fix2 * PR comment fixes * LRU cache default paramaters for python 3.7 * LRU cache maxsize=1 * modified changes requested * black * fix for numpy 2.0 * fix numpy 2.0 nan * fix numpy 2.0 ninf, inf * fix numpy 2.0 -inf * fix tenacity>=8.3 * fix tenacity>=8.3 * black * fixed error message in case of none existing type + added old type names in kusto * Delete azure-kusto-ingest/azure/kusto/ingest/V2/__init__.py * Delete azure-kusto-ingest/azure/kusto/ingest/V2/blob_source.py * Delete azure-kusto-ingest/azure/kusto/ingest/V2/compression_type.py * Delete azure-kusto-ingest/azure/kusto/ingest/V2/ingestion_source.py * Update setup.py * Delete azure-kusto-ingest/azure/kusto/ingest/V2/kusto_storage_uploader.py * Delete azure-kusto-ingest/azure/kusto/ingest/V2/local_source.py * fixed error message in case of none existing type + added old type names in kusto * added test for missing and old types * black * using pytest.raises
This commit is contained in:
Родитель
0865ed5beb
Коммит
5c1909339d
|
@ -18,14 +18,22 @@ def default_dict() -> Converter:
|
|||
return {
|
||||
"string": lambda col, df: df[col].astype(pd.StringDtype()) if hasattr(pd, "StringDType") else df[col],
|
||||
"guid": lambda col, df: df[col],
|
||||
"uuid": lambda col, df: df[col],
|
||||
"uniqueid": lambda col, df: df[col],
|
||||
"dynamic": lambda col, df: df[col],
|
||||
"bool": lambda col, df: df[col].astype(bool),
|
||||
"boolean": lambda col, df: df[col].astype(bool),
|
||||
"int": lambda col, df: df[col].astype(pd.Int32Dtype()),
|
||||
"int32": lambda col, df: df[col].astype(pd.Int32Dtype()),
|
||||
"int64": lambda col, df: df[col].astype(pd.Int64Dtype()),
|
||||
"long": lambda col, df: df[col].astype(pd.Int64Dtype()),
|
||||
"real": lambda col, df: parse_float(df, col),
|
||||
"double": lambda col, df: parse_float(df, col),
|
||||
"decimal": lambda col, df: parse_float(df, col),
|
||||
"datetime": lambda col, df: parse_datetime(df, col),
|
||||
"date": lambda col, df: parse_datetime(df, col),
|
||||
"timespan": lambda col, df: df[col].apply(parse_timedelta),
|
||||
"time": lambda col, df: df[col].apply(parse_timedelta),
|
||||
}
|
||||
|
||||
|
||||
|
@ -67,13 +75,15 @@ def dataframe_from_result_table(
|
|||
column_name = col.column_name
|
||||
column_type = col.column_type
|
||||
if converters_by_column_name and column_name in converters_by_column_name:
|
||||
converter = converters_by_column_name[column_name]
|
||||
converter = converters_by_column_name.get(column_name)
|
||||
elif converters_by_type and column_type in converters_by_type:
|
||||
converter = converters_by_type[column_type]
|
||||
converter = converters_by_type.get(column_type)
|
||||
elif nullable_bools and column_type == "bool":
|
||||
converter = lambda col, df: df[col].astype(pd.BooleanDtype())
|
||||
else:
|
||||
converter = default[column_type]
|
||||
converter = default.get(column_type)
|
||||
if converter is None:
|
||||
raise Exception("Unexpected type " + column_type)
|
||||
if isinstance(converter, str):
|
||||
frame[column_name] = frame[column_name].astype(converter)
|
||||
else:
|
||||
|
|
|
@ -73,6 +73,10 @@
|
|||
"ColumnName": "RecordReal",
|
||||
"ColumnType": "real"
|
||||
},
|
||||
{
|
||||
"ColumnName": "RecordDouble",
|
||||
"ColumnType": "double"
|
||||
},
|
||||
{
|
||||
"ColumnName": "RecordDecimal",
|
||||
"ColumnType": "decimal"
|
||||
|
@ -80,6 +84,10 @@
|
|||
{
|
||||
"ColumnName": "RecordDynamic",
|
||||
"ColumnType": "dynamic"
|
||||
},
|
||||
{
|
||||
"ColumnName": "MissingType",
|
||||
"ColumnType": "missing"
|
||||
}
|
||||
],
|
||||
"Rows": [
|
||||
|
@ -92,8 +100,8 @@
|
|||
222,
|
||||
92233720368,
|
||||
"6f3c1072-2739-461c-8aa7-3cfc8ff528a8",
|
||||
3.14159, 1.2,
|
||||
"{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}"
|
||||
3.14159, 7.89, 1.2,
|
||||
"{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}", "miss"
|
||||
|
||||
],
|
||||
[
|
||||
|
@ -105,8 +113,8 @@
|
|||
222,
|
||||
92233720368,
|
||||
"6f3c1072-2739-461c-8aa7-3cfc8ff528a8",
|
||||
"NaN", "NaN",
|
||||
"{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}"
|
||||
"NaN", "NaN", "NaN",
|
||||
"{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}", "miss"
|
||||
|
||||
],
|
||||
[
|
||||
|
@ -118,8 +126,8 @@
|
|||
222,
|
||||
92233720368,
|
||||
"6f3c1072-2739-461c-8aa7-3cfc8ff528a8",
|
||||
"Infinity", "Infinity",
|
||||
"{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}"
|
||||
"Infinity", "Infinity", "Infinity",
|
||||
"{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}", "miss"
|
||||
|
||||
],
|
||||
[
|
||||
|
@ -131,8 +139,8 @@
|
|||
222,
|
||||
92233720368,
|
||||
"6f3c1072-2739-461c-8aa7-3cfc8ff528a8",
|
||||
"-Infinity", "-Infinity",
|
||||
"{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}"
|
||||
"-Infinity", "-Infinity", "-Infinity",
|
||||
"{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}", "miss"
|
||||
|
||||
],
|
||||
[
|
||||
|
@ -144,8 +152,8 @@
|
|||
222,
|
||||
92233720368,
|
||||
"6f3c1072-2739-461c-8aa7-3cfc8ff528a8",
|
||||
3.14159, 1.2,
|
||||
"{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}"
|
||||
3.14159, 7.89, 1.2,
|
||||
"{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}", "miss"
|
||||
|
||||
],
|
||||
[
|
||||
|
@ -157,8 +165,8 @@
|
|||
222,
|
||||
92233720368,
|
||||
"6f3c1072-2739-461c-8aa7-3cfc8ff528a8",
|
||||
3.14159, 1.2,
|
||||
"{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}"
|
||||
3.14159, 7.89, 1.2,
|
||||
"{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}", "miss"
|
||||
|
||||
],
|
||||
[
|
||||
|
@ -170,8 +178,8 @@
|
|||
222,
|
||||
92233720368,
|
||||
"6f3c1072-2739-461c-8aa7-3cfc8ff528a8",
|
||||
3.14159, 1.2,
|
||||
"{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}"
|
||||
3.14159, 7.89, 1.2,
|
||||
"{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}", "miss"
|
||||
|
||||
],
|
||||
[
|
||||
|
|
|
@ -4,6 +4,8 @@ import datetime
|
|||
import json
|
||||
import os
|
||||
|
||||
import pytest
|
||||
|
||||
from azure.kusto.data._models import KustoResultTable
|
||||
from azure.kusto.data.helpers import dataframe_from_result_table
|
||||
from azure.kusto.data.response import KustoResponseDataSetV2
|
||||
|
@ -20,7 +22,11 @@ with open(os.path.join(os.path.dirname(__file__), "input", "dataframe.json"), "r
|
|||
|
||||
response = KustoResponseDataSetV2(json.loads(data))
|
||||
# Test when given both types of dictionary parameters that type conversion doesn't override column name conversion
|
||||
test_dict_by_name = {"RecordName": lambda col, frame: frame[col].astype("str"), "RecordInt64": lambda col, frame: frame[col].astype("int64")}
|
||||
test_dict_by_name = {
|
||||
"RecordName": lambda col, frame: frame[col].astype("str"),
|
||||
"RecordInt64": lambda col, frame: frame[col].astype("int64"),
|
||||
"MissingType": lambda col, frame: frame[col].astype("str"),
|
||||
}
|
||||
test_dict_by_type = {"int": lambda col, frame: frame[col].astype("int32")}
|
||||
df = dataframe_from_result_table(response.primary_results[0], converters_by_type=test_dict_by_type, converters_by_column_name=test_dict_by_name)
|
||||
|
||||
|
@ -52,6 +58,8 @@ assert type(df.iloc[0].RecordLong) is numpy.int64
|
|||
assert df.iloc[0].RecordLong == 92233720368
|
||||
assert type(df.iloc[0].RecordReal) is numpy.float64
|
||||
assert df.iloc[0].RecordReal == 3.14159
|
||||
assert type(df.iloc[0].RecordDouble) is numpy.float64
|
||||
assert df.iloc[0].RecordDouble == 7.89
|
||||
assert type(df.iloc[0].RecordDecimal) is numpy.float64
|
||||
assert df.iloc[0].RecordDecimal == 1.2
|
||||
|
||||
|
@ -90,10 +98,17 @@ assert df.iloc[6].RecordOffset == pandas.to_timedelta("1 days 01:01:01")
|
|||
|
||||
# Testing int to float conversion
|
||||
test_int_to_float = {"int": "float64"}
|
||||
df_int_to_float = dataframe_from_result_table(response.primary_results[0], converters_by_type=test_int_to_float)
|
||||
ignore_missing_type = {
|
||||
"MissingType": lambda col, frame: frame[col].astype("str"),
|
||||
}
|
||||
df_int_to_float = dataframe_from_result_table(response.primary_results[0], converters_by_type=test_int_to_float, converters_by_column_name=ignore_missing_type)
|
||||
assert type(df_int_to_float.iloc[0].RecordInt) is numpy.float64
|
||||
assert df.iloc[0].RecordInt == 5678
|
||||
|
||||
# Testing missing type conversion
|
||||
with pytest.raises(Exception):
|
||||
df_missing_type = dataframe_from_result_table(response.primary_results[0])
|
||||
|
||||
|
||||
def test_pandas_mixed_date():
|
||||
df = dataframe_from_result_table(
|
||||
|
|
Загрузка…
Ссылка в новой задаче