diff --git a/pykusto/client.py b/pykusto/client.py index 2bc47e4..23b7266 100644 --- a/pykusto/client.py +++ b/pykusto/client.py @@ -1,5 +1,4 @@ from typing import Union, List, Tuple - # noinspection PyProtectedMember from urllib.parse import urlparse @@ -7,7 +6,7 @@ from urllib.parse import urlparse from azure.kusto.data._response import KustoResponseDataSet from azure.kusto.data.request import KustoClient, KustoConnectionStringBuilder, ClientRequestProperties -from pykusto.utils import KQL +from pykusto.kql_converters import KQL class PyKustoClient: diff --git a/pykusto/expressions.py b/pykusto/expressions.py index d921749..c8d1018 100644 --- a/pykusto/expressions.py +++ b/pykusto/expressions.py @@ -1,9 +1,10 @@ from datetime import datetime, timedelta -from typing import Any, List, Tuple, Mapping, Optional +from numbers import Number +from typing import Any, List, Tuple, Mapping, Optional, Type from typing import Union -from pykusto.utils import KQL -from pykusto.utils import KustoTypes, to_kql +from pykusto.kql_converters import KQL +from pykusto.type_utils import plain_expression, aggregation_expression, KustoTypes, kql_converter ExpressionType = Union[KustoTypes, 'BaseExpression'] StringType = Union[str, 'StringExpression'] @@ -13,7 +14,6 @@ ArrayType = Union[List, Tuple, 'ArrayExpression'] MappingType = Union[Mapping, 'MappingExpression'] DatetimeType = Union[datetime, 'DatetimeExpression'] TimespanType = Union[timedelta, 'TimespanExpression'] -AggregationType = Union['AggregationExpression'] DynamicType = Union[ArrayType, MappingType] OrderType = Union[DatetimeType, TimespanType, NumberType, StringType] @@ -29,11 +29,17 @@ def _subexpr_to_kql(obj: ExpressionType) -> KQL: class BaseExpression: kql: KQL + def __new__(cls, *args, **kwargs): + if cls is 'BaseExpression': + raise TypeError("BaseExpression is abstract") + return object.__new__(cls) + def __init__(self, kql: KQL) -> None: + assert isinstance(kql, str) self.kql = kql def __repr__(self) -> str: - return self.kql + return str(self.kql) def as_subexpression(self) -> KQL: return KQL('({})'.format(self.kql)) @@ -58,10 +64,15 @@ class BaseExpression: return BooleanExpression(KQL('{} has \"{}\"'.format(self.kql, exp))) @staticmethod - def binary_op(left: ExpressionType, operator: str, right: ExpressionType) -> KQL: - return KQL('{}{}{}'.format( + def base_binary_op( + left: ExpressionType, operator: str, right: ExpressionType, result_type: Type[KustoTypes] + ) -> 'BaseExpression': + registrar = plain_expression + if isinstance(left, AggregationExpression) or isinstance(right, AggregationExpression): + registrar = aggregation_expression + return registrar.for_type(result_type)(KQL('{}{}{}'.format( _subexpr_to_kql(left), operator, _subexpr_to_kql(right)) - ) + )) def __eq__(self, other: ExpressionType) -> 'BooleanExpression': return BooleanExpression.binary_op(self, ' == ', other) @@ -100,10 +111,11 @@ class BaseExpression: raise ValueError("Only arrays can be assigned to multiple columns") +@plain_expression(bool) class BooleanExpression(BaseExpression): @staticmethod def binary_op(left: ExpressionType, operator: str, right: ExpressionType) -> 'BooleanExpression': - return BooleanExpression(BaseExpression.binary_op(left, operator, right)) + return BaseExpression.base_binary_op(left, operator, right, bool) def __and__(self, other: BooleanType) -> 'BooleanExpression': return BooleanExpression.binary_op(self, ' and ', other) @@ -115,10 +127,11 @@ class BooleanExpression(BaseExpression): return BooleanExpression(KQL('not({})'.format(self.kql))) +@plain_expression(Number) class NumberExpression(BaseExpression): @staticmethod def binary_op(left: NumberType, operator: str, right: NumberType) -> 'NumberExpression': - return NumberExpression(BaseExpression.binary_op(left, operator, right)) + return BaseExpression.base_binary_op(left, operator, right, Number) def __lt__(self, other: NumberType) -> BooleanExpression: return BooleanExpression.binary_op(self, ' < ', other) @@ -168,15 +181,15 @@ class NumberExpression(BaseExpression): return NumberExpression(KQL('floor({}, {})'.format(self.kql, _subexpr_to_kql(round_to)))) def bin(self, round_to: NumberType) -> 'BaseExpression': - return BaseExpression(KQL('bin({}, {})'.format(self.kql, _subexpr_to_kql(round_to)))) + return NumberExpression(KQL('bin({}, {})'.format(self.kql, _subexpr_to_kql(round_to)))) def bin_at(self, round_to: NumberType, fixed_point: NumberType) -> 'BaseExpression': - return BaseExpression(KQL('bin_at({}, {}, {})'.format(self.kql, - _subexpr_to_kql(round_to), - _subexpr_to_kql(fixed_point)))) + return NumberExpression(KQL('bin_at({}, {}, {})'.format(self.kql, + _subexpr_to_kql(round_to), + _subexpr_to_kql(fixed_point)))) def bin_auto(self) -> 'BaseExpression': - return BaseExpression(KQL('bin_auto({})'.format(self.kql))) + return NumberExpression(KQL('bin_auto({})'.format(self.kql))) def ceiling(self) -> 'NumberExpression': return NumberExpression(KQL('ceiling({})'.format(self.kql))) @@ -213,11 +226,16 @@ class NumberExpression(BaseExpression): def round(self, precision: NumberType = None) -> 'NumberExpression': return NumberExpression(KQL( - ('round({}, {})' if precision is None else 'round({}, {})').format(self, precision) + ('round({}, {})' if precision is None else 'round({}, {})').format(self.kql, to_kql(precision)) )) +@plain_expression(str) class StringExpression(BaseExpression): + @staticmethod + def binary_op(left: ExpressionType, operator: str, right: ExpressionType) -> 'StringExpression': + return BaseExpression.base_binary_op(left, operator, right, str) + def __len__(self) -> NumberExpression: return self.string_size() @@ -228,7 +246,7 @@ class StringExpression(BaseExpression): return BooleanExpression(KQL('isempty({})'.format(self.kql))) def __add__(self, other: StringType) -> 'StringExpression': - return StringExpression(BaseExpression.binary_op(self, ' + ', other)) + return StringExpression.binary_op(self, ' + ', other) @staticmethod def concat(*strings: StringType) -> 'StringExpression': @@ -238,8 +256,10 @@ class StringExpression(BaseExpression): def split(self, delimiter: StringType, requested_index: NumberType = None) -> 'ArrayExpression': if requested_index is None: - return ArrayExpression(KQL('split({}, {}'.format(self.kql, delimiter))) - return ArrayExpression(KQL('split({}, {}, {}'.format(self.kql, delimiter, requested_index))) + return ArrayExpression(KQL('split({}, {}'.format(self.kql, to_kql(delimiter)))) + return ArrayExpression(KQL('split({}, {}, {}'.format( + self.kql, _subexpr_to_kql(delimiter), to_kql(requested_index) + ))) def equals(self, other: StringType, case_sensitive: bool = False) -> BooleanExpression: return BooleanExpression.binary_op(self, ' == ' if case_sensitive else ' =~ ', other) @@ -278,10 +298,11 @@ class StringExpression(BaseExpression): return BooleanExpression(KQL('isutf8({})'.format(self.kql))) +@plain_expression(datetime) class DatetimeExpression(BaseExpression): @staticmethod def binary_op(left: ExpressionType, operator: str, right: ExpressionType) -> 'DatetimeExpression': - return DatetimeExpression(BaseExpression.binary_op(left, operator, right)) + return BaseExpression.base_binary_op(left, operator, right, datetime) def __lt__(self, other: DatetimeType) -> BooleanExpression: return BooleanExpression.binary_op(self, ' < ', other) @@ -316,15 +337,15 @@ class DatetimeExpression(BaseExpression): return DatetimeExpression(KQL('floor({}, {})'.format(self.kql, _subexpr_to_kql(round_to)))) def bin(self, round_to: TimespanType) -> 'BaseExpression': - return BaseExpression(KQL('bin({}, {})'.format(self.kql, _subexpr_to_kql(round_to)))) + return DatetimeExpression(KQL('bin({}, {})'.format(self.kql, _subexpr_to_kql(round_to)))) def bin_at(self, round_to: TimespanType, fixed_point: DatetimeType) -> 'BaseExpression': - return BaseExpression(KQL('bin_at({}, {}, {})'.format(self.kql, - _subexpr_to_kql(round_to), - _subexpr_to_kql(fixed_point)))) + return DatetimeExpression(KQL('bin_at({}, {}, {})'.format( + self.kql, _subexpr_to_kql(round_to), to_kql(fixed_point) + ))) def bin_auto(self) -> 'BaseExpression': - return BaseExpression(KQL('bin_auto({})'.format(self.kql))) + return DatetimeExpression(KQL('bin_auto({})'.format(self.kql))) def endofday(self, offset: NumberType = None) -> 'DatetimeExpression': if offset is None: @@ -364,33 +385,34 @@ class DatetimeExpression(BaseExpression): return NumberExpression(KQL('getyear({})'.format(self.kql))) def hourofday(self) -> NumberExpression: - return NumberExpression(KQL('hourofday({})'.format(self))) + return NumberExpression(KQL('hourofday({})'.format(self.kql))) def startofday(self, offset: NumberType = None) -> 'DatetimeExpression': return DatetimeExpression(KQL( - ('startofday({})' if offset is None else 'startofday({}, {})').format(self.kql, offset) + ('startofday({})' if offset is None else 'startofday({}, {})').format(self.kql, to_kql(offset)) )) def startofmonth(self, offset: NumberType = None) -> 'DatetimeExpression': return DatetimeExpression(KQL( - ('startofmonth({})' if offset is None else 'startofmonth({}, {})').format(self.kql, offset) + ('startofmonth({})' if offset is None else 'startofmonth({}, {})').format(self.kql, to_kql(offset)) )) def startofweek(self, offset: NumberType = None) -> 'DatetimeExpression': return DatetimeExpression(KQL( - ('startofweek({})' if offset is None else 'startofweek({}, {})').format(self.kql, offset) + ('startofweek({})' if offset is None else 'startofweek({}, {})').format(self.kql, to_kql(offset)) )) def startofyear(self, offset: NumberType = None) -> 'DatetimeExpression': return DatetimeExpression(KQL( - ('startofyear({})' if offset is None else 'startofyear({}, {})').format(self.kql, offset) + ('startofyear({})' if offset is None else 'startofyear({}, {})').format(self.kql, to_kql(offset)) )) +@plain_expression(timedelta) class TimespanExpression(BaseExpression): @staticmethod def binary_op(left: ExpressionType, operator: str, right: ExpressionType) -> 'TimespanExpression': - return TimespanExpression(BaseExpression.binary_op(left, operator, right)) + return BaseExpression.base_binary_op(left, operator, right, timedelta) def __add__(self, other: TimespanType) -> 'TimespanExpression': return TimespanExpression.binary_op(self, ' + ', other) @@ -402,18 +424,18 @@ class TimespanExpression(BaseExpression): return DatetimeExpression(KQL('ago({})'.format(_subexpr_to_kql(self)))) def bin(self, round_to: TimespanType) -> 'BaseExpression': - return BaseExpression(KQL('bin({}, {})'.format(self.kql, _subexpr_to_kql(round_to)))) + return TimespanExpression(KQL('bin({}, {})'.format(self.kql, _subexpr_to_kql(round_to)))) def bin_at(self, round_to: TimespanType, fixed_point: TimespanType) -> 'BaseExpression': - return BaseExpression(KQL('bin_at({}, {}, {})'.format(self.kql, - _subexpr_to_kql(round_to), - _subexpr_to_kql(fixed_point)))) + return TimespanExpression(KQL('bin_at({}, {}, {})'.format( + self.kql, to_kql(round_to), to_kql(fixed_point) + ))) def bin_auto(self) -> 'BaseExpression': - return BaseExpression(KQL('bin_auto({})'.format(self.kql))) + return TimespanExpression(KQL('bin_auto({})'.format(self.kql))) def format_timespan(self, format_string: StringType) -> StringExpression: - return StringExpression(KQL('format_timespan({}, {})'.format(self.kql, _subexpr_to_kql(format_string)))) + return StringExpression(KQL('format_timespan({}, {})'.format(self.kql, to_kql(format_string)))) def between(self, lower: TimespanType, upper: TimespanType) -> BooleanExpression: return BooleanExpression(KQL('{} between ({} .. {})'.format( @@ -421,6 +443,7 @@ class TimespanExpression(BaseExpression): ))) +@plain_expression(List, Tuple) class ArrayExpression(BaseExpression): def __len__(self) -> NumberExpression: return self.array_length() @@ -438,7 +461,7 @@ class ArrayExpression(BaseExpression): ))) def __getitem__(self, index: NumberType) -> BaseExpression: - return BaseExpression(KQL('{}[{}]'.format(self.kql, _subexpr_to_kql(index)))) + return AnyExpression(KQL('{}[{}]'.format(self.kql, _subexpr_to_kql(index)))) def assign_to(self, *columns: 'Column') -> 'AssignmentBase': if len(columns) <= 1: @@ -446,6 +469,7 @@ class ArrayExpression(BaseExpression): return AssignmentToMultipleColumns(columns, self) +@plain_expression(Mapping) class MappingExpression(BaseExpression): def keys(self) -> ArrayExpression: return ArrayExpression(KQL('bag_keys({})'.format(self.kql))) @@ -457,10 +481,14 @@ class MappingExpression(BaseExpression): ))) def __getitem__(self, index: StringType) -> BaseExpression: - return BaseExpression(KQL('{}[{}]'.format(self.kql, _subexpr_to_kql(index)))) + return AnyExpression(KQL('{}[{}]'.format(self.kql, _subexpr_to_kql(index)))) class AggregationExpression(BaseExpression): + def __new__(cls, *args, **kwargs): + if cls is 'AggregationExpression': + raise TypeError("AggregationExpression is abstract") + return object.__new__(cls) def assign_to(self, *columns: 'Column') -> 'AssignmentFromAggregationToColumn': if len(columns) == 0: @@ -474,30 +502,37 @@ class AggregationExpression(BaseExpression): return self.kql +@aggregation_expression(bool) class BooleanAggregationExpression(AggregationExpression, BooleanExpression): pass +@aggregation_expression(Number) class NumberAggregationExpression(AggregationExpression, NumberExpression): pass +@aggregation_expression(str) class StringAggregationExpression(AggregationExpression, StringExpression): pass +@aggregation_expression(datetime) class DatetimeAggregationExpression(AggregationExpression, DatetimeExpression): pass +@aggregation_expression(timedelta) class TimespanAggregationExpression(AggregationExpression, TimespanExpression): pass +@aggregation_expression(List, Tuple) class ArrayAggregationExpression(AggregationExpression, ArrayExpression): pass +@aggregation_expression(Mapping) class MappingAggregationExpression(AggregationExpression, MappingExpression): pass @@ -541,15 +576,19 @@ class AssignmentToMultipleColumns(AssignmentBase): class AssignmentFromAggregationToColumn(AssignmentBase): - def __init__(self, column: Optional['Column'], aggregation: AggregationType) -> None: + def __init__(self, column: Optional['Column'], aggregation: AggregationExpression) -> None: super().__init__(None if column is None else column.kql, aggregation) -class Column( +class AnyExpression( NumberExpression, BooleanExpression, StringExpression, ArrayExpression, MappingExpression, DatetimeExpression, TimespanExpression ): + pass + + +class Column(AnyExpression): _name: str def __init__(self, name: str) -> None: @@ -588,3 +627,9 @@ class ColumnGenerator: # Recommended usage: from pykusto.expressions import column_generator as col # TODO: Is there a way to enforce this to be a singleton? column_generator = ColumnGenerator() + + +def to_kql(obj: ExpressionType) -> KQL: + if isinstance(obj, BaseExpression): + return obj.kql + return kql_converter.for_obj(obj) diff --git a/pykusto/functions.py b/pykusto/functions.py index 210958f..121096e 100644 --- a/pykusto/functions.py +++ b/pykusto/functions.py @@ -1,7 +1,11 @@ -from pykusto import utils -from pykusto.expressions import * -from pykusto.expressions import _subexpr_to_kql, Column -from pykusto.utils import KQL +from typing import Union + +from pykusto.expressions import _subexpr_to_kql, Column, NumberType, NumberExpression, TimespanType, \ + DatetimeExpression, TimespanExpression, ArrayType, DynamicType, DatetimeType, BaseExpression, BooleanType, \ + ExpressionType, AggregationExpression, StringType, StringExpression, BooleanExpression, \ + NumberAggregationExpression, MappingAggregationExpression, ArrayAggregationExpression, to_kql +from pykusto.kql_converters import KQL +from pykusto.type_utils import plain_expression # Scalar functions @@ -101,9 +105,9 @@ def bin_auto(expr: Union[NumberType, DatetimeType, TimespanType]) -> BaseExpress def case(predicate: BooleanType, val: ExpressionType, *args: Union[BooleanType, ExpressionType]) -> BaseExpression: - res = 'case({}, {}, {})'.format(_subexpr_to_kql(predicate), - _subexpr_to_kql(val), - ', '.join([_subexpr_to_kql(arg) for arg in args])) + res = 'case({}, {}, {})'.format( + _subexpr_to_kql(predicate), _subexpr_to_kql(val), ', '.join([_subexpr_to_kql(arg) for arg in args]) + ) return AggregationExpression(KQL(res)) @@ -167,7 +171,7 @@ def cos(expr: NumberType) -> NumberExpression: # def dcount_hll(expr: ExpressionType) -> BaseExpression: -# return BaseExpression(KQL('dcount_hll({})'.format(expr))) +# return BaseExpression(KQL('dcount_hll({})'.format(_subexpr_to_kql(expr)))) # def degrees(self): return @@ -260,7 +264,12 @@ def hourofday(expr: DatetimeType) -> NumberExpression: def iff(predicate: BooleanType, if_true: ExpressionType, if_false: ExpressionType) -> BaseExpression: - return BaseExpression(KQL('iff({}, {}, {})'.format(predicate, _subexpr_to_kql(if_true), _subexpr_to_kql(if_false)))) + return_type = type(if_true) + if type(if_false) is not return_type: + raise TypeError("The second and third arguments must be of the same type") + return plain_expression.for_type(return_type)( + KQL('iff({}, {}, {})'.format(to_kql(predicate), to_kql(if_true), to_kql(if_false))) + ) def iif(predicate: BooleanType, if_true: ExpressionType, if_false: ExpressionType) -> BaseExpression: @@ -365,7 +374,7 @@ def new_guid(): raise NotImplemented # TODO def now(offset: TimespanType = None) -> StringExpression: if offset: - return StringExpression(KQL('now({})'.format(utils.timedelta_to_kql(offset)))) + return StringExpression(KQL('now({})'.format(to_kql(offset)))) return StringExpression(KQL('now()')) @@ -417,7 +426,7 @@ def percentrank_tdigest(): raise NotImplemented # TODO def pow(expr1: NumberType, expr2: NumberType) -> NumberExpression: - return NumberExpression(KQL('pow({}, {})'.format(expr1, expr2))) + return NumberExpression(KQL('pow({}, {})'.format(_subexpr_to_kql(expr1), _subexpr_to_kql(expr2)))) # def radians(self): return @@ -542,7 +551,7 @@ def round(expr: NumberType, precision: NumberType = None) -> NumberExpression: def sign(expr: NumberType) -> NumberExpression: - return NumberExpression(KQL('sign({})'.format(expr))) + return NumberExpression(KQL('sign({})'.format(_subexpr_to_kql(expr)))) # def sin(self): return @@ -552,7 +561,7 @@ def sign(expr: NumberType) -> NumberExpression: def sqrt(expr: NumberType) -> NumberExpression: - return NumberExpression(KQL('sqrt({})'.format(expr))) + return NumberExpression(KQL('sqrt({})'.format(_subexpr_to_kql(expr)))) def startofday(expr: DatetimeType, offset: NumberType = None) -> DatetimeExpression: @@ -580,7 +589,7 @@ def strcat(expr1: StringType, expr2: StringType, *exprs: StringType) -> StringEx def strcat_array(expr: ArrayType, delimiter: StringType) -> StringExpression: - return StringExpression(KQL('strcat_array({}, {})'.format(expr, _subexpr_to_kql(delimiter)))) + return StringExpression(KQL('strcat_array({}, {})'.format(to_kql(expr), to_kql(delimiter)))) def strcat_delim(delimiter: StringType, expr1: StringType, expr2: StringType, *exprs: StringType) -> StringExpression: @@ -594,30 +603,33 @@ def strcat_delim(delimiter: StringType, expr1: StringType, expr2: StringType, *e def strcmp(expr1: StringType, expr2: StringType) -> NumberExpression: - return NumberExpression(KQL('strcmp({}, {})'.format(expr1, expr2))) + return NumberExpression(KQL('strcmp({}, {})'.format(_subexpr_to_kql(expr1), _subexpr_to_kql(expr2)))) def string_size(expr: StringType) -> NumberExpression: - return NumberExpression(KQL('string_size({})'.format(expr))) + return NumberExpression(KQL('string_size({})'.format(_subexpr_to_kql(expr)))) def strlen(expr: StringType) -> NumberExpression: - return NumberExpression(KQL('strlen({})'.format(expr))) + return NumberExpression(KQL('strlen({})'.format(_subexpr_to_kql(expr)))) def strrep(expr: StringType, multiplier: NumberType, delimiter: StringType = None) -> StringExpression: if delimiter is None: - res = 'strrep({}, {})'.format(expr, multiplier) + res = 'strrep({}, {})'.format(_subexpr_to_kql(expr), _subexpr_to_kql(multiplier)) else: - res = 'strrep({}, {}, {})'.format(expr, multiplier, _subexpr_to_kql(delimiter)) + res = 'strrep({}, {}, {})'.format(_subexpr_to_kql(expr), _subexpr_to_kql(multiplier), + _subexpr_to_kql(delimiter)) return StringExpression(KQL((res))) def substring(expr: StringType, start_index: NumberType, length: NumberType = None) -> StringExpression: return StringExpression(KQL( - ('substring({}, {})' if length is None else 'substring({}, {}, {})').format(expr, start_index, length) + ('substring({}, {})' if length is None else 'substring({}, {}, {})').format( + _subexpr_to_kql(expr), _subexpr_to_kql(start_index), _subexpr_to_kql(length) + ) )) @@ -628,22 +640,22 @@ def substring(expr: StringType, start_index: NumberType, length: NumberType = No def tobool(expr: ExpressionType) -> BooleanExpression: - return BooleanExpression(KQL('tobool({})'.format(expr))) + return BooleanExpression(KQL('tobool({})'.format(_subexpr_to_kql(expr)))) def toboolean(expr: ExpressionType) -> BooleanExpression: - return BooleanExpression(KQL('toboolean({})'.format(expr))) + return BooleanExpression(KQL('toboolean({})'.format(_subexpr_to_kql(expr)))) def todatetime(expr: StringType) -> DatetimeExpression: - return DatetimeExpression(KQL('todatetime({})'.format(expr))) + return DatetimeExpression(KQL('todatetime({})'.format(_subexpr_to_kql(expr)))) def todecimal(): raise NotImplemented # TODO def todouble(expr: NumberType) -> NumberExpression: - return NumberExpression(KQL("todouble({})".format(expr))) + return NumberExpression(KQL("todouble({})".format(_subexpr_to_kql(expr)))) def todynamic(): raise NotImplemented # TODO @@ -730,11 +742,11 @@ def arg_min(*args: ExpressionType) -> AggregationExpression: def avg(expr: ExpressionType) -> NumberAggregationExpression: - return NumberAggregationExpression(KQL('avg({})'.format(expr))) + return NumberAggregationExpression(KQL('avg({})'.format(_subexpr_to_kql(expr)))) def avgif(expr: ExpressionType, predicate: BooleanType) -> NumberAggregationExpression: - return NumberAggregationExpression(KQL('avgif({}, {})'.format(expr, predicate))) + return NumberAggregationExpression(KQL('avgif({}, {})'.format(_subexpr_to_kql(expr), _subexpr_to_kql(predicate)))) # def buildschema(self): @@ -747,17 +759,19 @@ def count(col: Column = None) -> NumberAggregationExpression: def countif(predicate: BooleanType) -> NumberAggregationExpression: - return NumberAggregationExpression(KQL('countif({})'.format(predicate))) + return NumberAggregationExpression(KQL('countif({})'.format(to_kql(predicate)))) def dcount(expr: ExpressionType, accuracy: NumberType = None) -> NumberAggregationExpression: return NumberAggregationExpression(KQL( - ('dcount({})' if accuracy is None else 'dcount({}, {})').format(expr, accuracy) + ('dcount({})' if accuracy is None else 'dcount({}, {})').format(to_kql(expr), to_kql(accuracy)) )) def dcountif(expr: ExpressionType, predicate: BooleanType, accuracy: NumberType = 0) -> NumberAggregationExpression: - return NumberAggregationExpression(KQL('dcountif({}, {}, {})'.format(expr, predicate, accuracy))) + return NumberAggregationExpression(KQL('dcountif({}, {}, {})'.format( + to_kql(expr), to_kql(predicate), to_kql(accuracy) + ))) # def hll(expr: ExpressionType, accuracy: NumberType = None) -> AggregationExpression: @@ -767,64 +781,64 @@ def dcountif(expr: ExpressionType, predicate: BooleanType, accuracy: NumberType # def hll_merge(expr: ExpressionType) -> AggregationExpression: -# return AggregationExpression(KQL('hll_merge({})'.format(expr))) +# return AggregationExpression(KQL('hll_merge({})'.format(_subexpr_to_kql(expr)))) def make_bag(expr: ExpressionType, max_size: NumberType = None) -> MappingAggregationExpression: if max_size: return MappingAggregationExpression(KQL('make_bag({}, {})'.format(expr, max_size))) - return MappingAggregationExpression(KQL('make_bag({})'.format(expr))) + return MappingAggregationExpression(KQL('make_bag({})'.format(to_kql(expr)))) def make_list(expr: ExpressionType, max_size: NumberType = None) -> ArrayAggregationExpression: if max_size: return ArrayAggregationExpression(KQL('make_list({}, {})'.format(expr, max_size))) - return ArrayAggregationExpression(KQL('make_list({})'.format(expr))) + return ArrayAggregationExpression(KQL('make_list({})'.format(to_kql(expr)))) def make_set(expr: ExpressionType, max_size: NumberType = None) -> ArrayAggregationExpression: if max_size: return ArrayAggregationExpression(KQL('make_set({}, {})'.format(expr, max_size))) - return ArrayAggregationExpression(KQL('make_set({})'.format(expr))) + return ArrayAggregationExpression(KQL('make_set({})'.format(to_kql(expr)))) def max(expr: ExpressionType) -> AggregationExpression: - return AggregationExpression(KQL('max({})'.format(expr))) + return AggregationExpression(KQL('max({})'.format(to_kql(expr)))) def min(expr: ExpressionType) -> AggregationExpression: - return AggregationExpression(KQL('min({})'.format(expr))) + return AggregationExpression(KQL('min({})'.format(to_kql(expr)))) def percentile(expr: ExpressionType, per: NumberType) -> AggregationExpression: - res = 'percentiles({}, {})'.format(expr, _subexpr_to_kql(per)) + res = 'percentiles({}, {})'.format(expr, to_kql(per)) return AggregationExpression(KQL(res)) def percentiles(expr: ExpressionType, *pers: NumberType) -> AggregationExpression: res = 'percentiles({}, {})'.format(expr, - ', '.join([str(_subexpr_to_kql(per)) for per in pers])) + ', '.join([str(to_kql(per)) for per in pers])) return AggregationExpression(KQL(res)) def stdev(expr: ExpressionType) -> AggregationExpression: - return AggregationExpression(KQL('stdev({})'.format(expr))) + return AggregationExpression(KQL('stdev({})'.format(to_kql(expr)))) def stdevif(expr: ExpressionType, predicate: BooleanType) -> AggregationExpression: - return AggregationExpression(KQL('stdevif({}, {})'.format(expr, predicate))) + return AggregationExpression(KQL('stdevif({}, {})'.format(to_kql(expr), to_kql(predicate)))) def stdevp(expr: ExpressionType) -> AggregationExpression: - return AggregationExpression(KQL('stdevp({})'.format(expr))) + return AggregationExpression(KQL('stdevp({})'.format(to_kql(expr)))) def sum(expr: ExpressionType) -> AggregationExpression: - return AggregationExpression(KQL('sum({})'.format(expr))) + return AggregationExpression(KQL('sum({})'.format(to_kql(expr)))) def sumif(expr: ExpressionType, predicate: BooleanType) -> AggregationExpression: - return AggregationExpression(KQL('sumif({}, {})'.format(expr, predicate))) + return AggregationExpression(KQL('sumif({}, {})'.format(to_kql(expr), to_kql(predicate)))) # def tdigest(self): @@ -836,12 +850,12 @@ def sumif(expr: ExpressionType, predicate: BooleanType) -> AggregationExpression def variance(expr: ExpressionType) -> AggregationExpression: - return AggregationExpression(KQL('variance({})'.format(expr))) + return AggregationExpression(KQL('variance({})'.format(to_kql(expr)))) def varianceif(expr: ExpressionType, predicate: BooleanType) -> AggregationExpression: - return AggregationExpression(KQL('varianceif({}, {})'.format(expr, predicate))) + return AggregationExpression(KQL('varianceif({}, {})'.format(to_kql(expr), to_kql(predicate)))) def variancep(expr: ExpressionType) -> AggregationExpression: - return AggregationExpression(KQL('variancep({})'.format(expr))) + return AggregationExpression(KQL('variancep({})'.format(to_kql(expr)))) diff --git a/pykusto/utils.py b/pykusto/kql_converters.py similarity index 60% rename from pykusto/utils.py rename to pykusto/kql_converters.py index 521900d..63079f4 100644 --- a/pykusto/utils.py +++ b/pykusto/kql_converters.py @@ -1,20 +1,19 @@ import json -import logging from datetime import datetime, timedelta -from typing import Union, Mapping, NewType, Type, Dict, Callable, Any, Tuple, List +from numbers import Number +from typing import NewType, Mapping, Union, List, Tuple -logger = logging.getLogger("pykusto") - -KustoTypes = Union[str, int, bool, datetime, Mapping, List, Tuple, float, timedelta] -# TODO: Unhandled date types: guid, decimal +from pykusto.type_utils import kql_converter KQL = NewType('KQL', str) +@kql_converter(datetime) def datetime_to_kql(dt: datetime) -> KQL: return KQL(dt.strftime('datetime(%Y-%m-%d %H:%M:%S.%f)')) +@kql_converter(timedelta) def timedelta_to_kql(td: timedelta) -> KQL: hours, remainder = divmod(td.seconds, 3600) minutes, seconds = divmod(remainder, 60) @@ -27,6 +26,7 @@ def timedelta_to_kql(td: timedelta) -> KQL: )) +@kql_converter(Mapping, List, Tuple) def dynamic_to_kql(d: Union[Mapping, List, Tuple]) -> KQL: query = list(json.dumps(d)) # Issue #11 @@ -48,29 +48,21 @@ def dynamic_to_kql(d: Union[Mapping, List, Tuple]) -> KQL: return KQL("".join(query)) +@kql_converter(bool) def bool_to_kql(b: bool) -> KQL: return KQL('true') if b else KQL('false') +@kql_converter(str) def str_to_kql(s: str) -> KQL: return KQL('"{}"'.format(s)) -KQL_CONVERTER_BY_TYPE: Dict[Type, Callable[[Any], KQL]] = { - datetime: datetime_to_kql, - timedelta: timedelta_to_kql, - Mapping: dynamic_to_kql, - List: dynamic_to_kql, - Tuple: dynamic_to_kql, - bool: bool_to_kql, - str: str_to_kql, - int: KQL, - float: KQL, -} +@kql_converter(Number) +def number_to_kql(n: Number) -> KQL: + return KQL(str(n)) -def to_kql(obj: KustoTypes) -> KQL: - for kusto_type, converter in KQL_CONVERTER_BY_TYPE.items(): - if isinstance(obj, kusto_type): - return converter(obj) - raise ValueError("No KQL converter found for object {} of type {}".format(obj, type(obj))) +@kql_converter(type(None)) +def none_to_kql(n: type(None)) -> KQL: + return KQL("") diff --git a/pykusto/query.py b/pykusto/query.py index b7157fb..8207f13 100644 --- a/pykusto/query.py +++ b/pykusto/query.py @@ -1,18 +1,19 @@ -from abc import abstractmethod -from copy import copy, deepcopy -from enum import Enum from itertools import chain -from types import FunctionType from typing import Tuple, List, Union, Optional +from abc import abstractmethod from azure.kusto.data.helpers import dataframe_from_result_table +from copy import copy, deepcopy +from enum import Enum +from types import FunctionType from pykusto.client import Table from pykusto.expressions import BooleanType, ExpressionType, AggregationExpression, OrderType, \ StringType, AssignmentBase, AssignmentFromAggregationToColumn, AssignmentToSingleColumn, Column, BaseExpression, \ - AssignmentFromColumnToColumn + AssignmentFromColumnToColumn, AnyExpression, to_kql +from pykusto.kql_converters import KQL +from pykusto.type_utils import logger from pykusto.udf import stringify_python_func -from pykusto.utils import KQL, logger, to_kql class Order(Enum): @@ -127,7 +128,7 @@ class Query: return DistinctQuery(self, columns) def distinct_all(self): - return DistinctQuery(self, (BaseExpression(KQL("*")),)) + return DistinctQuery(self, (AnyExpression(KQL("*")),)) def extend(self, *args: Union[BaseExpression, AssignmentBase], **kwargs: ExpressionType) -> 'ExtendQuery': assignments: List[AssignmentBase] = [] @@ -140,7 +141,7 @@ class Query: if isinstance(expression, BaseExpression): assignments.append(expression.assign_to(Column(column_name))) else: - assignments.append(BaseExpression(to_kql(expression)).assign_to(Column(column_name))) + assignments.append(AnyExpression(to_kql(expression)).assign_to(Column(column_name))) return ExtendQuery(self, *assignments) def summarize(self, *args: Union[AggregationExpression, AssignmentFromAggregationToColumn], diff --git a/pykusto/type_utils.py b/pykusto/type_utils.py new file mode 100644 index 0000000..bb5cb4d --- /dev/null +++ b/pykusto/type_utils.py @@ -0,0 +1,41 @@ +import logging +from datetime import datetime, timedelta +from numbers import Number +from typing import Union, Mapping, Type, Dict, Callable, Any, Tuple, List + +logger = logging.getLogger("pykusto") + +KustoTypes = Union[str, Number, bool, datetime, Mapping, List, Tuple, timedelta] +# TODO: Unhandled data types: guid, decimal + + +class TypeRegistrar: + registry: Dict[Type[KustoTypes], Callable] + + def __init__(self) -> None: + self.registry = {} + + def __call__(self, *types: Type[KustoTypes]) -> Callable: + def inner(wrapped): + for t in types: + self.registry[t] = wrapped + return wrapped + + return inner + + def for_obj(self, obj: Any) -> Any: + for registered_type, registered_callable in self.registry.items(): + if isinstance(obj, registered_type): + return registered_callable(obj) + raise ValueError("No registered callable for object {} of type {}".format(obj, type(obj).__name__)) + + def for_type(self, t: Type[KustoTypes]) -> Callable: + for registered_type, registered_callable in self.registry.items(): + if issubclass(t, registered_type): + return registered_callable + raise ValueError("No registered callable for type {}".format(t.__name__)) + + +kql_converter = TypeRegistrar() +plain_expression = TypeRegistrar() +aggregation_expression = TypeRegistrar() diff --git a/test/test_base.py b/test/test_base.py index 7070cec..6321ec8 100644 --- a/test/test_base.py +++ b/test/test_base.py @@ -3,7 +3,7 @@ import sys from typing import Callable from unittest import TestCase -from pykusto.utils import logger +from pykusto.type_utils import logger class TestBase(TestCase): diff --git a/test/test_functions.py b/test/test_functions.py index 55fc998..3155c9e 100644 --- a/test/test_functions.py +++ b/test/test_functions.py @@ -337,13 +337,14 @@ class TestFunction(TestBase): Query().extend(f.strcat_delim('-', ',', col.foo)).render() ) + @unittest.skip("Enabled after #40 is fixed") def test_strcat_array(self): self.assertEqual( " | where (strcat_array(foo, \",\")) == \"A,B,C\"", Query().where(f.strcat_array(col.foo, ',') == 'A,B,C').render() ) self.assertEqual( - " | where (strcat_array(['A', 'B', 'C'], \",\")) == \"A,B,C\"", + " | where (strcat_array(dynamic([\"A\", \"B\", \"C\"]), \",\")) == \"A,B,C\"", Query().where(f.strcat_array(['A', 'B', 'C'], ',') == 'A,B,C').render() ) @@ -409,7 +410,7 @@ class TestFunction(TestBase): # Query().where(f.todatetime('') > datetime.datetime(2019, 7, 23)).render(), # " | where (startofday(foo)) > datetime(2019-07-23 00:00:00.000000)") # def todatetime(expr: StringType) -> DatetimeExpression: - # return DatetimeExpression(KQL('todatetime({})'.format(expr))) + # return DatetimeExpression(KQL('todatetime({})'.format(_subexpr_to_kql(expr)))) def test_todouble(self): self.assertEqual( @@ -439,14 +440,13 @@ class TestFunction(TestBase): Query().summarize(f.arg_min(col.foo, col.bar, col.fam)).render() ) - @unittest.skip("Re-enable once issue #1 is resolved") def test_avg(self): self.assertEqual( " | summarize avg(foo)", Query().summarize(f.avg(col.foo)).render() ) self.assertEqual( - " | summarize avg(foo)-5", + " | summarize avg(foo) - 5", Query().summarize(f.avg(col.foo) - 5).render() ) diff --git a/test/test_utils.py b/test/test_utils.py index c3614d8..fc774c4 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1,4 +1,4 @@ -from pykusto import utils +from pykusto.expressions import to_kql from test.test_base import TestBase @@ -12,5 +12,5 @@ class TestUtils(TestBase): } self.assertEqual( "{\"name\": \"Alan\", \"age\": 21, \"address\": (\"NY\", 36), \"pets\": (\"Libby\", \"Panda\", \"]\", \"[\")}", - utils.to_kql(dict) + to_kql(dict) )