Fix df from result table error message + old types (#543)

* Added dictionary options as parameters for type matching for dataframe_from_result_table func * reformatted the file * reformatted the file * reformatted the file * Fixes after PR * Fixes after PR * Fixes after PR * Fixes after PR * Fixes after PR * For python 3.7 3.8 * Nullable bools fix * Nullable bools fix * Nullable bools fix * Nullable bools fix2 * PR comment fixes * LRU cache default paramaters for python 3.7 * LRU cache maxsize=1 * modified changes requested * black * fix for numpy 2.0 * fix numpy 2.0 nan * fix numpy 2.0 ninf, inf * fix numpy 2.0 -inf * fix tenacity>=8.3 * fix tenacity>=8.3 * black * fixed error message in case of none existing type + added old type names in kusto * Delete azure-kusto-ingest/azure/kusto/ingest/V2/__init__.py * Delete azure-kusto-ingest/azure/kusto/ingest/V2/blob_source.py * Delete azure-kusto-ingest/azure/kusto/ingest/V2/compression_type.py * Delete azure-kusto-ingest/azure/kusto/ingest/V2/ingestion_source.py * Update setup.py * Delete azure-kusto-ingest/azure/kusto/ingest/V2/kusto_storage_uploader.py * Delete azure-kusto-ingest/azure/kusto/ingest/V2/local_source.py * fixed error message in case of none existing type + added old type names in kusto * added test for missing and old types * black * using pytest.raises
2024-06-18 17:22:22 +03:00 · 2024-06-18 17:22:22 +03:00 · 5c1909339d
--- a/azure-kusto-data/azure/kusto/data/helpers.py
+++ b/azure-kusto-data/azure/kusto/data/helpers.py
@ -18,14 +18,22 @@ def default_dict() -> Converter:
    return {
        "string": lambda col, df: df[col].astype(pd.StringDtype()) if hasattr(pd, "StringDType") else df[col],
        "guid": lambda col, df: df[col],
+        "uuid": lambda col, df: df[col],
+        "uniqueid": lambda col, df: df[col],
        "dynamic": lambda col, df: df[col],
        "bool": lambda col, df: df[col].astype(bool),
+        "boolean": lambda col, df: df[col].astype(bool),
        "int": lambda col, df: df[col].astype(pd.Int32Dtype()),
+        "int32": lambda col, df: df[col].astype(pd.Int32Dtype()),
+        "int64": lambda col, df: df[col].astype(pd.Int64Dtype()),
        "long": lambda col, df: df[col].astype(pd.Int64Dtype()),
        "real": lambda col, df: parse_float(df, col),
+        "double": lambda col, df: parse_float(df, col),
        "decimal": lambda col, df: parse_float(df, col),
        "datetime": lambda col, df: parse_datetime(df, col),
+        "date": lambda col, df: parse_datetime(df, col),
        "timespan": lambda col, df: df[col].apply(parse_timedelta),
+        "time": lambda col, df: df[col].apply(parse_timedelta),
    }


@ -67,13 +75,15 @@ def dataframe_from_result_table(
        column_name = col.column_name
        column_type = col.column_type
        if converters_by_column_name and column_name in converters_by_column_name:
-            converter = converters_by_column_name[column_name]
+            converter = converters_by_column_name.get(column_name)
        elif converters_by_type and column_type in converters_by_type:
-            converter = converters_by_type[column_type]
+            converter = converters_by_type.get(column_type)
        elif nullable_bools and column_type == "bool":
            converter = lambda col, df: df[col].astype(pd.BooleanDtype())
        else:
-            converter = default[column_type]
+            converter = default.get(column_type)
+        if converter is None:
+            raise Exception("Unexpected type " + column_type)
        if isinstance(converter, str):
            frame[column_name] = frame[column_name].astype(converter)
        else:
--- a/azure-kusto-data/tests/input/dataframe.json
+++ b/azure-kusto-data/tests/input/dataframe.json
@ -73,6 +73,10 @@
        "ColumnName": "RecordReal",
        "ColumnType": "real"
      },
+      {
+        "ColumnName": "RecordDouble",
+        "ColumnType": "double"
+      },
      {
        "ColumnName": "RecordDecimal",
        "ColumnType": "decimal"
@ -80,6 +84,10 @@
      {
        "ColumnName": "RecordDynamic",
        "ColumnType": "dynamic"
+      },
+      {
+        "ColumnName": "MissingType",
+        "ColumnType": "missing"
      }
    ],
    "Rows": [
@ -92,8 +100,8 @@
        222,
        92233720368,
        "6f3c1072-2739-461c-8aa7-3cfc8ff528a8",
-        3.14159, 1.2,
-        "{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}"
+        3.14159, 7.89, 1.2,
+        "{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}", "miss"

      ],
      [
@ -105,8 +113,8 @@
        222,
        92233720368,
        "6f3c1072-2739-461c-8aa7-3cfc8ff528a8",
-        "NaN", "NaN",
-        "{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}"
+        "NaN", "NaN", "NaN",
+        "{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}", "miss"

      ],
      [
@ -118,8 +126,8 @@
        222,
        92233720368,
        "6f3c1072-2739-461c-8aa7-3cfc8ff528a8",
-        "Infinity", "Infinity",
-        "{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}"
+        "Infinity", "Infinity", "Infinity",
+        "{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}", "miss"

      ],
      [
@ -131,8 +139,8 @@
        222,
        92233720368,
        "6f3c1072-2739-461c-8aa7-3cfc8ff528a8",
-        "-Infinity", "-Infinity",
-        "{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}"
+        "-Infinity", "-Infinity", "-Infinity",
+        "{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}", "miss"

      ],
      [
@ -144,8 +152,8 @@
        222,
        92233720368,
        "6f3c1072-2739-461c-8aa7-3cfc8ff528a8",
-        3.14159, 1.2,
-        "{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}"
+        3.14159, 7.89, 1.2,
+        "{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}", "miss"

      ],
      [
@ -157,8 +165,8 @@
        222,
        92233720368,
        "6f3c1072-2739-461c-8aa7-3cfc8ff528a8",
-        3.14159, 1.2,
-        "{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}"
+        3.14159, 7.89, 1.2,
+        "{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}", "miss"

      ],
      [
@ -170,8 +178,8 @@
        222,
        92233720368,
        "6f3c1072-2739-461c-8aa7-3cfc8ff528a8",
-        3.14159, 1.2,
-        "{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}"
+        3.14159, 7.89, 1.2,
+        "{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}", "miss"

      ],
      [
--- a/azure-kusto-data/tests/test_helpers.py
+++ b/azure-kusto-data/tests/test_helpers.py
@ -4,6 +4,8 @@ import datetime
 import json
 import os

+import pytest
+
 from azure.kusto.data._models import KustoResultTable
 from azure.kusto.data.helpers import dataframe_from_result_table
 from azure.kusto.data.response import KustoResponseDataSetV2
@ -20,7 +22,11 @@ with open(os.path.join(os.path.dirname(__file__), "input", "dataframe.json"), "r

 response = KustoResponseDataSetV2(json.loads(data))
 # Test when given both types of dictionary parameters that type conversion doesn't override column name conversion
-test_dict_by_name = {"RecordName": lambda col, frame: frame[col].astype("str"), "RecordInt64": lambda col, frame: frame[col].astype("int64")}
+test_dict_by_name = {
+    "RecordName": lambda col, frame: frame[col].astype("str"),
+    "RecordInt64": lambda col, frame: frame[col].astype("int64"),
+    "MissingType": lambda col, frame: frame[col].astype("str"),
+}
 test_dict_by_type = {"int": lambda col, frame: frame[col].astype("int32")}
 df = dataframe_from_result_table(response.primary_results[0], converters_by_type=test_dict_by_type, converters_by_column_name=test_dict_by_name)

@ -52,6 +58,8 @@ assert type(df.iloc[0].RecordLong) is numpy.int64
 assert df.iloc[0].RecordLong == 92233720368
 assert type(df.iloc[0].RecordReal) is numpy.float64
 assert df.iloc[0].RecordReal == 3.14159
+assert type(df.iloc[0].RecordDouble) is numpy.float64
+assert df.iloc[0].RecordDouble == 7.89
 assert type(df.iloc[0].RecordDecimal) is numpy.float64
 assert df.iloc[0].RecordDecimal == 1.2

@ -90,10 +98,17 @@ assert df.iloc[6].RecordOffset == pandas.to_timedelta("1 days 01:01:01")

 # Testing int to float conversion
 test_int_to_float = {"int": "float64"}
-df_int_to_float = dataframe_from_result_table(response.primary_results[0], converters_by_type=test_int_to_float)
+ignore_missing_type = {
+    "MissingType": lambda col, frame: frame[col].astype("str"),
+}
+df_int_to_float = dataframe_from_result_table(response.primary_results[0], converters_by_type=test_int_to_float, converters_by_column_name=ignore_missing_type)
 assert type(df_int_to_float.iloc[0].RecordInt) is numpy.float64
 assert df.iloc[0].RecordInt == 5678

+# Testing missing type conversion
+with pytest.raises(Exception):
+    df_missing_type = dataframe_from_result_table(response.primary_results[0])
+

 def test_pandas_mixed_date():
    df = dataframe_from_result_table(