Fix logic for testing containment in dynamic types (#96)

* Fix logic for testing containment in dynamic types

* Remove code duplication in dynamic types, fix tests

Co-authored-by: Yonatan Most <>
This commit is contained in:
Yonatan Most 2020-06-15 15:09:26 +03:00 коммит произвёл GitHub
Родитель 89235ad79c
Коммит 63a4372752
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
5 изменённых файлов: 68 добавлений и 44 удалений

Просмотреть файл

@ -107,12 +107,18 @@ class BaseExpression:
def __ne__(self, other: ExpressionType) -> 'BooleanExpression':
return BooleanExpression.binary_op(self, ' != ', other)
def is_in(self, other: ArrayType) -> 'BooleanExpression':
def is_in(self, other: DynamicType) -> 'BooleanExpression':
"""
https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/dynamic#operators-and-functions-over-dynamic-types
https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/inoperator
https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/datatypes-string-operators
"""
return BooleanExpression.binary_op(self, ' in ', other)
if isinstance(other, (List, Tuple)):
# For a literal array, we can use 'in'
# The following RHS is the only place where a literal list does not require being surrounded by 'dynamic()'
return BooleanExpression(KQL(f'{self.kql} in ({", ".join(map(to_kql, other))})'))
# Otherwise, for some reason Kusto does not accept 'in', and we need to use 'contains' as if 'other' was a string
return BooleanExpression.binary_op(other, ' contains ', self)
def is_null(self) -> 'BooleanExpression':
"""
@ -678,11 +684,34 @@ class TimespanExpression(BaseExpression):
return BooleanExpression(KQL(f'{self.kql} between ({_subexpr_to_kql(lower)} .. {_subexpr_to_kql(upper)})'))
@plain_expression(KustoType.ARRAY)
class ArrayExpression(BaseExpression):
def __getitem__(self, index: NumberType) -> 'AnyExpression':
class BaseDynamicExpression(BaseExpression):
# We would prefer to use 'abc' to make the class abstract, but this can be done only if there is at least one
# abstract method, which we don't have here. Overriding __new___ is the next best solution.
def __new__(cls, *args, **kwargs) -> 'BaseDynamicExpression':
assert cls is not BaseDynamicExpression, "BaseDynamicExpression is abstract"
return object.__new__(cls)
def __getitem__(self, index: Union[StringType, NumberType]) -> 'AnyExpression':
return AnyExpression(KQL(f'{self.kql}[{to_kql(index)}]'))
def contains(self, other: ExpressionType) -> 'BooleanExpression':
"""
https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/dynamic#operators-and-functions-over-dynamic-types
https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/inoperator
https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/datatypes-string-operators
"""
# For some reason Kusto does not accept 'in', and we need to use 'contains' as if this were a string
if not isinstance(other, (BaseExpression, str)):
# When 'other' is a literal, it has to be a string, because syntactically 'contains' works only on strings.
other = str(to_kql(other))
return BooleanExpression.binary_op(self, ' contains ', other)
@plain_expression(KustoType.ARRAY)
class ArrayExpression(BaseDynamicExpression):
def __getitem__(self, index: NumberType) -> 'AnyExpression':
return super().__getitem__(index)
# We would like to allow using len(), but Python requires it to return an int, so we can't
def array_length(self) -> NumberExpression:
"""
@ -694,17 +723,18 @@ class ArrayExpression(BaseExpression):
"""
https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/dynamic#operators-and-functions-over-dynamic-types
https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/inoperator
https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/datatypes-string-operators
"""
return BooleanExpression.binary_op(other, ' in ', self)
return self.contains(other)
def assign_to_multiple_columns(self, *columns: 'AnyTypeColumn') -> 'AssignmentToMultipleColumns':
return AssignmentToMultipleColumns(columns, self)
@plain_expression(KustoType.MAPPING)
class MappingExpression(BaseExpression):
class MappingExpression(BaseDynamicExpression):
def __getitem__(self, index: StringType) -> 'AnyExpression':
return AnyExpression(KQL(f'{self.kql}[{to_kql(index)}]'))
return super().__getitem__(index)
def __getattr__(self, name: str) -> 'AnyExpression':
return AnyExpression(KQL(f'{self.kql}.{name}'))
@ -715,10 +745,17 @@ class MappingExpression(BaseExpression):
"""
return ArrayExpression(KQL(f'bag_keys({self.kql})'))
def bag_contains(self, other: ExpressionType) -> 'BooleanExpression':
"""
https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/dynamic#operators-and-functions-over-dynamic-types
https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/inoperator
https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/datatypes-string-operators
"""
return self.contains(other)
class DynamicExpression(ArrayExpression, MappingExpression):
def __getitem__(self, index: Union[StringType, NumberType]) -> 'AnyExpression':
return AnyExpression(KQL(f'{self.kql}[{_subexpr_to_kql(index)}]'))
pass
class AnyExpression(

Просмотреть файл

@ -1,4 +1,3 @@
import json
from itertools import chain
from typing import Union
@ -18,6 +17,7 @@ class Functions:
Recommended import style:\n
`from pykusto.functions import Functions as f`
"""
# Scalar functions
@staticmethod
@ -894,18 +894,12 @@ class Functions:
raise ValueError("strcat requires at least two arguments")
return StringExpression(KQL(f"strcat({', '.join(to_kql(s) for s in strings)})"))
@staticmethod
def to_literal_dynamic(d: DynamicType) -> KQL:
if isinstance(d, BaseExpression):
return d.kql
return KQL(f'dynamic({json.dumps(d)})')
@staticmethod
def strcat_array(expr: ArrayType, delimiter: StringType) -> StringExpression:
"""
https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/strcat-arrayfunction
"""
return StringExpression(KQL(f'strcat_array({Functions.to_literal_dynamic(expr)}, {to_kql(delimiter)})'))
return StringExpression(KQL(f'strcat_array({to_kql(expr)}, {to_kql(delimiter)})'))
@staticmethod
def strcat_delim(delimiter: StringType, expr1: StringType, expr2: StringType, *expressions: StringType) -> StringExpression:

Просмотреть файл

@ -24,7 +24,7 @@ def timedelta_to_kql(td: timedelta) -> KQL:
@kql_converter(KustoType.ARRAY, KustoType.MAPPING)
def dynamic_to_kql(d: Union[Mapping, List, Tuple]) -> KQL:
try:
query = list(json.dumps(d))
return KQL(f"dynamic({json.dumps(d)})")
except TypeError:
# Using exceptions as part of normal flow is not best practice, however in this case we have a good reason.
# The given object might contain a non-primitive object somewhere inside it, and the only way to find it is to go through the entire hierarchy, which is exactly
@ -32,29 +32,8 @@ def dynamic_to_kql(d: Union[Mapping, List, Tuple]) -> KQL:
# Also keep in mind that exception handling in Python has no performance overhead (unlike e.g. Java).
return build_dynamic(d)
# Convert square brackets to round brackets (Issue #11)
counter = 0
prev = ""
for i, c in enumerate(query):
if counter == 0:
if c == "[":
query[i] = "("
elif c == "]":
query[i] = ")"
elif c in ['"', '\''] and prev != "\\":
counter += 1
elif counter > 0:
if c in ['"', '\''] and prev != "\\":
counter -= 1
prev = query[i]
assert counter == 0
return KQL("".join(query))
def build_dynamic(d: Union[Mapping, List, Tuple]) -> KQL:
from pykusto.expressions import BaseExpression
if isinstance(d, BaseExpression):
return d.kql
if isinstance(d, Mapping):
return KQL(f"pack({', '.join(map(build_dynamic, chain(*d.items())))})")
if isinstance(d, (List, Tuple)):

Просмотреть файл

@ -35,10 +35,16 @@ class TestExpressions(TestBase):
def test_array_contains(self):
self.assertEqual(
' | where true in arrayField',
' | where arrayField contains "true"',
Query().where(t.arrayField.array_contains(True)).render(),
)
def test_bag_contains(self):
self.assertEqual(
' | where mapField contains "2"',
Query().where(t.mapField.bag_contains(2)).render(),
)
def test_not_equals(self):
self.assertEqual(
' | where stringField != "bar"',
@ -333,7 +339,7 @@ class TestExpressions(TestBase):
def test_dynamic(self):
self.assertEqual(
' | where (mapField["foo"][0].bar[1][2][(tolower(stringField))]) > time(1.0:0:0.0)',
' | where (mapField["foo"][0].bar[1][2][tolower(stringField)]) > time(1.0:0:0.0)',
Query().where(t.mapField['foo'][0].bar[1][2][t.stringField.lower()] > timedelta(1)).render(),
)
@ -391,6 +397,12 @@ class TestExpressions(TestBase):
lambda: t.stringField in t.stringField2
)
def test_is_in_expression(self):
self.assertEqual(
' | where arrayField contains stringField',
Query().where(t.stringField.is_in(t.arrayField)).render()
)
def test_has(self):
self.assertEqual(
' | where stringField has "test"',

Просмотреть файл

@ -13,8 +13,8 @@ class TestUtils(TestBase):
"pets": ["Libby", "Panda", "]", "["]
}
self.assertEqual(
'{"name": "Alan", "age": 21, "address": ("NY", 36), '
'"pets": ("Libby", "Panda", "]", "[")}',
'dynamic({"name": "Alan", "age": 21, "address": ["NY", 36], '
'"pets": ["Libby", "Panda", "]", "["]})',
to_kql(test_dict)
)
@ -25,6 +25,7 @@ class TestUtils(TestBase):
def str_annotated(s: str) -> str:
return "response to " + s
# noinspection PyTypeChecker
self.assertEqual(
"response to test for_type",
test_annotation.for_type(str)("test for_type")
@ -49,6 +50,7 @@ class TestUtils(TestBase):
def str_annotated(s: str) -> str:
return "response to " + s
# noinspection PyTypeChecker
self.assertRaises(
ValueError("Test annotation: no registered callable for type bool"),
lambda: test_annotation.for_type(bool)("test for_type")