diff --git a/azure-kusto-data/azure/kusto/data/helpers.py b/azure-kusto-data/azure/kusto/data/helpers.py index 0a5638a..653a66e 100644 --- a/azure-kusto-data/azure/kusto/data/helpers.py +++ b/azure-kusto-data/azure/kusto/data/helpers.py @@ -18,14 +18,22 @@ def default_dict() -> Converter: return { "string": lambda col, df: df[col].astype(pd.StringDtype()) if hasattr(pd, "StringDType") else df[col], "guid": lambda col, df: df[col], + "uuid": lambda col, df: df[col], + "uniqueid": lambda col, df: df[col], "dynamic": lambda col, df: df[col], "bool": lambda col, df: df[col].astype(bool), + "boolean": lambda col, df: df[col].astype(bool), "int": lambda col, df: df[col].astype(pd.Int32Dtype()), + "int32": lambda col, df: df[col].astype(pd.Int32Dtype()), + "int64": lambda col, df: df[col].astype(pd.Int64Dtype()), "long": lambda col, df: df[col].astype(pd.Int64Dtype()), "real": lambda col, df: parse_float(df, col), + "double": lambda col, df: parse_float(df, col), "decimal": lambda col, df: parse_float(df, col), "datetime": lambda col, df: parse_datetime(df, col), + "date": lambda col, df: parse_datetime(df, col), "timespan": lambda col, df: df[col].apply(parse_timedelta), + "time": lambda col, df: df[col].apply(parse_timedelta), } @@ -67,13 +75,15 @@ def dataframe_from_result_table( column_name = col.column_name column_type = col.column_type if converters_by_column_name and column_name in converters_by_column_name: - converter = converters_by_column_name[column_name] + converter = converters_by_column_name.get(column_name) elif converters_by_type and column_type in converters_by_type: - converter = converters_by_type[column_type] + converter = converters_by_type.get(column_type) elif nullable_bools and column_type == "bool": converter = lambda col, df: df[col].astype(pd.BooleanDtype()) else: - converter = default[column_type] + converter = default.get(column_type) + if converter is None: + raise Exception("Unexpected type " + column_type) if isinstance(converter, str): frame[column_name] = frame[column_name].astype(converter) else: diff --git a/azure-kusto-data/tests/input/dataframe.json b/azure-kusto-data/tests/input/dataframe.json index 18d56c9..0c82aea 100644 --- a/azure-kusto-data/tests/input/dataframe.json +++ b/azure-kusto-data/tests/input/dataframe.json @@ -73,6 +73,10 @@ "ColumnName": "RecordReal", "ColumnType": "real" }, + { + "ColumnName": "RecordDouble", + "ColumnType": "double" + }, { "ColumnName": "RecordDecimal", "ColumnType": "decimal" @@ -80,6 +84,10 @@ { "ColumnName": "RecordDynamic", "ColumnType": "dynamic" + }, + { + "ColumnName": "MissingType", + "ColumnType": "missing" } ], "Rows": [ @@ -92,8 +100,8 @@ 222, 92233720368, "6f3c1072-2739-461c-8aa7-3cfc8ff528a8", - 3.14159, 1.2, - "{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}" + 3.14159, 7.89, 1.2, + "{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}", "miss" ], [ @@ -105,8 +113,8 @@ 222, 92233720368, "6f3c1072-2739-461c-8aa7-3cfc8ff528a8", - "NaN", "NaN", - "{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}" + "NaN", "NaN", "NaN", + "{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}", "miss" ], [ @@ -118,8 +126,8 @@ 222, 92233720368, "6f3c1072-2739-461c-8aa7-3cfc8ff528a8", - "Infinity", "Infinity", - "{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}" + "Infinity", "Infinity", "Infinity", + "{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}", "miss" ], [ @@ -131,8 +139,8 @@ 222, 92233720368, "6f3c1072-2739-461c-8aa7-3cfc8ff528a8", - "-Infinity", "-Infinity", - "{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}" + "-Infinity", "-Infinity", "-Infinity", + "{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}", "miss" ], [ @@ -144,8 +152,8 @@ 222, 92233720368, "6f3c1072-2739-461c-8aa7-3cfc8ff528a8", - 3.14159, 1.2, - "{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}" + 3.14159, 7.89, 1.2, + "{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}", "miss" ], [ @@ -157,8 +165,8 @@ 222, 92233720368, "6f3c1072-2739-461c-8aa7-3cfc8ff528a8", - 3.14159, 1.2, - "{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}" + 3.14159, 7.89, 1.2, + "{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}", "miss" ], [ @@ -170,8 +178,8 @@ 222, 92233720368, "6f3c1072-2739-461c-8aa7-3cfc8ff528a8", - 3.14159, 1.2, - "{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}" + 3.14159, 7.89, 1.2, + "{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}", "miss" ], [ diff --git a/azure-kusto-data/tests/test_helpers.py b/azure-kusto-data/tests/test_helpers.py index 989a4ea..3d0f0d8 100644 --- a/azure-kusto-data/tests/test_helpers.py +++ b/azure-kusto-data/tests/test_helpers.py @@ -4,6 +4,8 @@ import datetime import json import os +import pytest + from azure.kusto.data._models import KustoResultTable from azure.kusto.data.helpers import dataframe_from_result_table from azure.kusto.data.response import KustoResponseDataSetV2 @@ -20,7 +22,11 @@ with open(os.path.join(os.path.dirname(__file__), "input", "dataframe.json"), "r response = KustoResponseDataSetV2(json.loads(data)) # Test when given both types of dictionary parameters that type conversion doesn't override column name conversion -test_dict_by_name = {"RecordName": lambda col, frame: frame[col].astype("str"), "RecordInt64": lambda col, frame: frame[col].astype("int64")} +test_dict_by_name = { + "RecordName": lambda col, frame: frame[col].astype("str"), + "RecordInt64": lambda col, frame: frame[col].astype("int64"), + "MissingType": lambda col, frame: frame[col].astype("str"), +} test_dict_by_type = {"int": lambda col, frame: frame[col].astype("int32")} df = dataframe_from_result_table(response.primary_results[0], converters_by_type=test_dict_by_type, converters_by_column_name=test_dict_by_name) @@ -52,6 +58,8 @@ assert type(df.iloc[0].RecordLong) is numpy.int64 assert df.iloc[0].RecordLong == 92233720368 assert type(df.iloc[0].RecordReal) is numpy.float64 assert df.iloc[0].RecordReal == 3.14159 +assert type(df.iloc[0].RecordDouble) is numpy.float64 +assert df.iloc[0].RecordDouble == 7.89 assert type(df.iloc[0].RecordDecimal) is numpy.float64 assert df.iloc[0].RecordDecimal == 1.2 @@ -90,10 +98,17 @@ assert df.iloc[6].RecordOffset == pandas.to_timedelta("1 days 01:01:01") # Testing int to float conversion test_int_to_float = {"int": "float64"} -df_int_to_float = dataframe_from_result_table(response.primary_results[0], converters_by_type=test_int_to_float) +ignore_missing_type = { + "MissingType": lambda col, frame: frame[col].astype("str"), +} +df_int_to_float = dataframe_from_result_table(response.primary_results[0], converters_by_type=test_int_to_float, converters_by_column_name=ignore_missing_type) assert type(df_int_to_float.iloc[0].RecordInt) is numpy.float64 assert df.iloc[0].RecordInt == 5678 +# Testing missing type conversion +with pytest.raises(Exception): + df_missing_type = dataframe_from_result_table(response.primary_results[0]) + def test_pandas_mixed_date(): df = dataframe_from_result_table(