Improve type inference and conversion

This commit is contained in:
Yonatan Most 2020-01-01 12:21:47 +02:00
Родитель 290b60fd7e
Коммит 9e5d250766
9 изменённых файлов: 217 добавлений и 125 удалений

Просмотреть файл

@ -1,5 +1,4 @@
from typing import Union, List, Tuple
# noinspection PyProtectedMember
from urllib.parse import urlparse
@ -7,7 +6,7 @@ from urllib.parse import urlparse
from azure.kusto.data._response import KustoResponseDataSet
from azure.kusto.data.request import KustoClient, KustoConnectionStringBuilder, ClientRequestProperties
from pykusto.utils import KQL
from pykusto.kql_converters import KQL
class PyKustoClient:

Просмотреть файл

@ -1,9 +1,10 @@
from datetime import datetime, timedelta
from typing import Any, List, Tuple, Mapping, Optional
from numbers import Number
from typing import Any, List, Tuple, Mapping, Optional, Type
from typing import Union
from pykusto.utils import KQL
from pykusto.utils import KustoTypes, to_kql
from pykusto.kql_converters import KQL
from pykusto.type_utils import plain_expression, aggregation_expression, KustoTypes, kql_converter
ExpressionType = Union[KustoTypes, 'BaseExpression']
StringType = Union[str, 'StringExpression']
@ -13,7 +14,6 @@ ArrayType = Union[List, Tuple, 'ArrayExpression']
MappingType = Union[Mapping, 'MappingExpression']
DatetimeType = Union[datetime, 'DatetimeExpression']
TimespanType = Union[timedelta, 'TimespanExpression']
AggregationType = Union['AggregationExpression']
DynamicType = Union[ArrayType, MappingType]
OrderType = Union[DatetimeType, TimespanType, NumberType, StringType]
@ -29,11 +29,17 @@ def _subexpr_to_kql(obj: ExpressionType) -> KQL:
class BaseExpression:
kql: KQL
def __new__(cls, *args, **kwargs):
if cls is 'BaseExpression':
raise TypeError("BaseExpression is abstract")
return object.__new__(cls)
def __init__(self, kql: KQL) -> None:
assert isinstance(kql, str)
self.kql = kql
def __repr__(self) -> str:
return self.kql
return str(self.kql)
def as_subexpression(self) -> KQL:
return KQL('({})'.format(self.kql))
@ -58,10 +64,15 @@ class BaseExpression:
return BooleanExpression(KQL('{} has \"{}\"'.format(self.kql, exp)))
@staticmethod
def binary_op(left: ExpressionType, operator: str, right: ExpressionType) -> KQL:
return KQL('{}{}{}'.format(
def base_binary_op(
left: ExpressionType, operator: str, right: ExpressionType, result_type: Type[KustoTypes]
) -> 'BaseExpression':
registrar = plain_expression
if isinstance(left, AggregationExpression) or isinstance(right, AggregationExpression):
registrar = aggregation_expression
return registrar.for_type(result_type)(KQL('{}{}{}'.format(
_subexpr_to_kql(left), operator, _subexpr_to_kql(right))
)
))
def __eq__(self, other: ExpressionType) -> 'BooleanExpression':
return BooleanExpression.binary_op(self, ' == ', other)
@ -100,10 +111,11 @@ class BaseExpression:
raise ValueError("Only arrays can be assigned to multiple columns")
@plain_expression(bool)
class BooleanExpression(BaseExpression):
@staticmethod
def binary_op(left: ExpressionType, operator: str, right: ExpressionType) -> 'BooleanExpression':
return BooleanExpression(BaseExpression.binary_op(left, operator, right))
return BaseExpression.base_binary_op(left, operator, right, bool)
def __and__(self, other: BooleanType) -> 'BooleanExpression':
return BooleanExpression.binary_op(self, ' and ', other)
@ -115,10 +127,11 @@ class BooleanExpression(BaseExpression):
return BooleanExpression(KQL('not({})'.format(self.kql)))
@plain_expression(Number)
class NumberExpression(BaseExpression):
@staticmethod
def binary_op(left: NumberType, operator: str, right: NumberType) -> 'NumberExpression':
return NumberExpression(BaseExpression.binary_op(left, operator, right))
return BaseExpression.base_binary_op(left, operator, right, Number)
def __lt__(self, other: NumberType) -> BooleanExpression:
return BooleanExpression.binary_op(self, ' < ', other)
@ -168,15 +181,15 @@ class NumberExpression(BaseExpression):
return NumberExpression(KQL('floor({}, {})'.format(self.kql, _subexpr_to_kql(round_to))))
def bin(self, round_to: NumberType) -> 'BaseExpression':
return BaseExpression(KQL('bin({}, {})'.format(self.kql, _subexpr_to_kql(round_to))))
return NumberExpression(KQL('bin({}, {})'.format(self.kql, _subexpr_to_kql(round_to))))
def bin_at(self, round_to: NumberType, fixed_point: NumberType) -> 'BaseExpression':
return BaseExpression(KQL('bin_at({}, {}, {})'.format(self.kql,
_subexpr_to_kql(round_to),
_subexpr_to_kql(fixed_point))))
return NumberExpression(KQL('bin_at({}, {}, {})'.format(self.kql,
_subexpr_to_kql(round_to),
_subexpr_to_kql(fixed_point))))
def bin_auto(self) -> 'BaseExpression':
return BaseExpression(KQL('bin_auto({})'.format(self.kql)))
return NumberExpression(KQL('bin_auto({})'.format(self.kql)))
def ceiling(self) -> 'NumberExpression':
return NumberExpression(KQL('ceiling({})'.format(self.kql)))
@ -213,11 +226,16 @@ class NumberExpression(BaseExpression):
def round(self, precision: NumberType = None) -> 'NumberExpression':
return NumberExpression(KQL(
('round({}, {})' if precision is None else 'round({}, {})').format(self, precision)
('round({}, {})' if precision is None else 'round({}, {})').format(self.kql, to_kql(precision))
))
@plain_expression(str)
class StringExpression(BaseExpression):
@staticmethod
def binary_op(left: ExpressionType, operator: str, right: ExpressionType) -> 'StringExpression':
return BaseExpression.base_binary_op(left, operator, right, str)
def __len__(self) -> NumberExpression:
return self.string_size()
@ -228,7 +246,7 @@ class StringExpression(BaseExpression):
return BooleanExpression(KQL('isempty({})'.format(self.kql)))
def __add__(self, other: StringType) -> 'StringExpression':
return StringExpression(BaseExpression.binary_op(self, ' + ', other))
return StringExpression.binary_op(self, ' + ', other)
@staticmethod
def concat(*strings: StringType) -> 'StringExpression':
@ -238,8 +256,10 @@ class StringExpression(BaseExpression):
def split(self, delimiter: StringType, requested_index: NumberType = None) -> 'ArrayExpression':
if requested_index is None:
return ArrayExpression(KQL('split({}, {}'.format(self.kql, delimiter)))
return ArrayExpression(KQL('split({}, {}, {}'.format(self.kql, delimiter, requested_index)))
return ArrayExpression(KQL('split({}, {}'.format(self.kql, to_kql(delimiter))))
return ArrayExpression(KQL('split({}, {}, {}'.format(
self.kql, _subexpr_to_kql(delimiter), to_kql(requested_index)
)))
def equals(self, other: StringType, case_sensitive: bool = False) -> BooleanExpression:
return BooleanExpression.binary_op(self, ' == ' if case_sensitive else ' =~ ', other)
@ -278,10 +298,11 @@ class StringExpression(BaseExpression):
return BooleanExpression(KQL('isutf8({})'.format(self.kql)))
@plain_expression(datetime)
class DatetimeExpression(BaseExpression):
@staticmethod
def binary_op(left: ExpressionType, operator: str, right: ExpressionType) -> 'DatetimeExpression':
return DatetimeExpression(BaseExpression.binary_op(left, operator, right))
return BaseExpression.base_binary_op(left, operator, right, datetime)
def __lt__(self, other: DatetimeType) -> BooleanExpression:
return BooleanExpression.binary_op(self, ' < ', other)
@ -316,15 +337,15 @@ class DatetimeExpression(BaseExpression):
return DatetimeExpression(KQL('floor({}, {})'.format(self.kql, _subexpr_to_kql(round_to))))
def bin(self, round_to: TimespanType) -> 'BaseExpression':
return BaseExpression(KQL('bin({}, {})'.format(self.kql, _subexpr_to_kql(round_to))))
return DatetimeExpression(KQL('bin({}, {})'.format(self.kql, _subexpr_to_kql(round_to))))
def bin_at(self, round_to: TimespanType, fixed_point: DatetimeType) -> 'BaseExpression':
return BaseExpression(KQL('bin_at({}, {}, {})'.format(self.kql,
_subexpr_to_kql(round_to),
_subexpr_to_kql(fixed_point))))
return DatetimeExpression(KQL('bin_at({}, {}, {})'.format(
self.kql, _subexpr_to_kql(round_to), to_kql(fixed_point)
)))
def bin_auto(self) -> 'BaseExpression':
return BaseExpression(KQL('bin_auto({})'.format(self.kql)))
return DatetimeExpression(KQL('bin_auto({})'.format(self.kql)))
def endofday(self, offset: NumberType = None) -> 'DatetimeExpression':
if offset is None:
@ -364,33 +385,34 @@ class DatetimeExpression(BaseExpression):
return NumberExpression(KQL('getyear({})'.format(self.kql)))
def hourofday(self) -> NumberExpression:
return NumberExpression(KQL('hourofday({})'.format(self)))
return NumberExpression(KQL('hourofday({})'.format(self.kql)))
def startofday(self, offset: NumberType = None) -> 'DatetimeExpression':
return DatetimeExpression(KQL(
('startofday({})' if offset is None else 'startofday({}, {})').format(self.kql, offset)
('startofday({})' if offset is None else 'startofday({}, {})').format(self.kql, to_kql(offset))
))
def startofmonth(self, offset: NumberType = None) -> 'DatetimeExpression':
return DatetimeExpression(KQL(
('startofmonth({})' if offset is None else 'startofmonth({}, {})').format(self.kql, offset)
('startofmonth({})' if offset is None else 'startofmonth({}, {})').format(self.kql, to_kql(offset))
))
def startofweek(self, offset: NumberType = None) -> 'DatetimeExpression':
return DatetimeExpression(KQL(
('startofweek({})' if offset is None else 'startofweek({}, {})').format(self.kql, offset)
('startofweek({})' if offset is None else 'startofweek({}, {})').format(self.kql, to_kql(offset))
))
def startofyear(self, offset: NumberType = None) -> 'DatetimeExpression':
return DatetimeExpression(KQL(
('startofyear({})' if offset is None else 'startofyear({}, {})').format(self.kql, offset)
('startofyear({})' if offset is None else 'startofyear({}, {})').format(self.kql, to_kql(offset))
))
@plain_expression(timedelta)
class TimespanExpression(BaseExpression):
@staticmethod
def binary_op(left: ExpressionType, operator: str, right: ExpressionType) -> 'TimespanExpression':
return TimespanExpression(BaseExpression.binary_op(left, operator, right))
return BaseExpression.base_binary_op(left, operator, right, timedelta)
def __add__(self, other: TimespanType) -> 'TimespanExpression':
return TimespanExpression.binary_op(self, ' + ', other)
@ -402,18 +424,18 @@ class TimespanExpression(BaseExpression):
return DatetimeExpression(KQL('ago({})'.format(_subexpr_to_kql(self))))
def bin(self, round_to: TimespanType) -> 'BaseExpression':
return BaseExpression(KQL('bin({}, {})'.format(self.kql, _subexpr_to_kql(round_to))))
return TimespanExpression(KQL('bin({}, {})'.format(self.kql, _subexpr_to_kql(round_to))))
def bin_at(self, round_to: TimespanType, fixed_point: TimespanType) -> 'BaseExpression':
return BaseExpression(KQL('bin_at({}, {}, {})'.format(self.kql,
_subexpr_to_kql(round_to),
_subexpr_to_kql(fixed_point))))
return TimespanExpression(KQL('bin_at({}, {}, {})'.format(
self.kql, to_kql(round_to), to_kql(fixed_point)
)))
def bin_auto(self) -> 'BaseExpression':
return BaseExpression(KQL('bin_auto({})'.format(self.kql)))
return TimespanExpression(KQL('bin_auto({})'.format(self.kql)))
def format_timespan(self, format_string: StringType) -> StringExpression:
return StringExpression(KQL('format_timespan({}, {})'.format(self.kql, _subexpr_to_kql(format_string))))
return StringExpression(KQL('format_timespan({}, {})'.format(self.kql, to_kql(format_string))))
def between(self, lower: TimespanType, upper: TimespanType) -> BooleanExpression:
return BooleanExpression(KQL('{} between ({} .. {})'.format(
@ -421,6 +443,7 @@ class TimespanExpression(BaseExpression):
)))
@plain_expression(List, Tuple)
class ArrayExpression(BaseExpression):
def __len__(self) -> NumberExpression:
return self.array_length()
@ -438,7 +461,7 @@ class ArrayExpression(BaseExpression):
)))
def __getitem__(self, index: NumberType) -> BaseExpression:
return BaseExpression(KQL('{}[{}]'.format(self.kql, _subexpr_to_kql(index))))
return AnyExpression(KQL('{}[{}]'.format(self.kql, _subexpr_to_kql(index))))
def assign_to(self, *columns: 'Column') -> 'AssignmentBase':
if len(columns) <= 1:
@ -446,6 +469,7 @@ class ArrayExpression(BaseExpression):
return AssignmentToMultipleColumns(columns, self)
@plain_expression(Mapping)
class MappingExpression(BaseExpression):
def keys(self) -> ArrayExpression:
return ArrayExpression(KQL('bag_keys({})'.format(self.kql)))
@ -457,10 +481,14 @@ class MappingExpression(BaseExpression):
)))
def __getitem__(self, index: StringType) -> BaseExpression:
return BaseExpression(KQL('{}[{}]'.format(self.kql, _subexpr_to_kql(index))))
return AnyExpression(KQL('{}[{}]'.format(self.kql, _subexpr_to_kql(index))))
class AggregationExpression(BaseExpression):
def __new__(cls, *args, **kwargs):
if cls is 'AggregationExpression':
raise TypeError("AggregationExpression is abstract")
return object.__new__(cls)
def assign_to(self, *columns: 'Column') -> 'AssignmentFromAggregationToColumn':
if len(columns) == 0:
@ -474,30 +502,37 @@ class AggregationExpression(BaseExpression):
return self.kql
@aggregation_expression(bool)
class BooleanAggregationExpression(AggregationExpression, BooleanExpression):
pass
@aggregation_expression(Number)
class NumberAggregationExpression(AggregationExpression, NumberExpression):
pass
@aggregation_expression(str)
class StringAggregationExpression(AggregationExpression, StringExpression):
pass
@aggregation_expression(datetime)
class DatetimeAggregationExpression(AggregationExpression, DatetimeExpression):
pass
@aggregation_expression(timedelta)
class TimespanAggregationExpression(AggregationExpression, TimespanExpression):
pass
@aggregation_expression(List, Tuple)
class ArrayAggregationExpression(AggregationExpression, ArrayExpression):
pass
@aggregation_expression(Mapping)
class MappingAggregationExpression(AggregationExpression, MappingExpression):
pass
@ -541,15 +576,19 @@ class AssignmentToMultipleColumns(AssignmentBase):
class AssignmentFromAggregationToColumn(AssignmentBase):
def __init__(self, column: Optional['Column'], aggregation: AggregationType) -> None:
def __init__(self, column: Optional['Column'], aggregation: AggregationExpression) -> None:
super().__init__(None if column is None else column.kql, aggregation)
class Column(
class AnyExpression(
NumberExpression, BooleanExpression, StringExpression,
ArrayExpression, MappingExpression, DatetimeExpression,
TimespanExpression
):
pass
class Column(AnyExpression):
_name: str
def __init__(self, name: str) -> None:
@ -588,3 +627,9 @@ class ColumnGenerator:
# Recommended usage: from pykusto.expressions import column_generator as col
# TODO: Is there a way to enforce this to be a singleton?
column_generator = ColumnGenerator()
def to_kql(obj: ExpressionType) -> KQL:
if isinstance(obj, BaseExpression):
return obj.kql
return kql_converter.for_obj(obj)

Просмотреть файл

@ -1,7 +1,11 @@
from pykusto import utils
from pykusto.expressions import *
from pykusto.expressions import _subexpr_to_kql, Column
from pykusto.utils import KQL
from typing import Union
from pykusto.expressions import _subexpr_to_kql, Column, NumberType, NumberExpression, TimespanType, \
DatetimeExpression, TimespanExpression, ArrayType, DynamicType, DatetimeType, BaseExpression, BooleanType, \
ExpressionType, AggregationExpression, StringType, StringExpression, BooleanExpression, \
NumberAggregationExpression, MappingAggregationExpression, ArrayAggregationExpression, to_kql
from pykusto.kql_converters import KQL
from pykusto.type_utils import plain_expression
# Scalar functions
@ -101,9 +105,9 @@ def bin_auto(expr: Union[NumberType, DatetimeType, TimespanType]) -> BaseExpress
def case(predicate: BooleanType, val: ExpressionType, *args: Union[BooleanType, ExpressionType]) -> BaseExpression:
res = 'case({}, {}, {})'.format(_subexpr_to_kql(predicate),
_subexpr_to_kql(val),
', '.join([_subexpr_to_kql(arg) for arg in args]))
res = 'case({}, {}, {})'.format(
_subexpr_to_kql(predicate), _subexpr_to_kql(val), ', '.join([_subexpr_to_kql(arg) for arg in args])
)
return AggregationExpression(KQL(res))
@ -167,7 +171,7 @@ def cos(expr: NumberType) -> NumberExpression:
# def dcount_hll(expr: ExpressionType) -> BaseExpression:
# return BaseExpression(KQL('dcount_hll({})'.format(expr)))
# return BaseExpression(KQL('dcount_hll({})'.format(_subexpr_to_kql(expr))))
# def degrees(self): return
@ -260,7 +264,12 @@ def hourofday(expr: DatetimeType) -> NumberExpression:
def iff(predicate: BooleanType, if_true: ExpressionType, if_false: ExpressionType) -> BaseExpression:
return BaseExpression(KQL('iff({}, {}, {})'.format(predicate, _subexpr_to_kql(if_true), _subexpr_to_kql(if_false))))
return_type = type(if_true)
if type(if_false) is not return_type:
raise TypeError("The second and third arguments must be of the same type")
return plain_expression.for_type(return_type)(
KQL('iff({}, {}, {})'.format(to_kql(predicate), to_kql(if_true), to_kql(if_false)))
)
def iif(predicate: BooleanType, if_true: ExpressionType, if_false: ExpressionType) -> BaseExpression:
@ -365,7 +374,7 @@ def new_guid(): raise NotImplemented # TODO
def now(offset: TimespanType = None) -> StringExpression:
if offset:
return StringExpression(KQL('now({})'.format(utils.timedelta_to_kql(offset))))
return StringExpression(KQL('now({})'.format(to_kql(offset))))
return StringExpression(KQL('now()'))
@ -417,7 +426,7 @@ def percentrank_tdigest(): raise NotImplemented # TODO
def pow(expr1: NumberType, expr2: NumberType) -> NumberExpression:
return NumberExpression(KQL('pow({}, {})'.format(expr1, expr2)))
return NumberExpression(KQL('pow({}, {})'.format(_subexpr_to_kql(expr1), _subexpr_to_kql(expr2))))
# def radians(self): return
@ -542,7 +551,7 @@ def round(expr: NumberType, precision: NumberType = None) -> NumberExpression:
def sign(expr: NumberType) -> NumberExpression:
return NumberExpression(KQL('sign({})'.format(expr)))
return NumberExpression(KQL('sign({})'.format(_subexpr_to_kql(expr))))
# def sin(self): return
@ -552,7 +561,7 @@ def sign(expr: NumberType) -> NumberExpression:
def sqrt(expr: NumberType) -> NumberExpression:
return NumberExpression(KQL('sqrt({})'.format(expr)))
return NumberExpression(KQL('sqrt({})'.format(_subexpr_to_kql(expr))))
def startofday(expr: DatetimeType, offset: NumberType = None) -> DatetimeExpression:
@ -580,7 +589,7 @@ def strcat(expr1: StringType, expr2: StringType, *exprs: StringType) -> StringEx
def strcat_array(expr: ArrayType, delimiter: StringType) -> StringExpression:
return StringExpression(KQL('strcat_array({}, {})'.format(expr, _subexpr_to_kql(delimiter))))
return StringExpression(KQL('strcat_array({}, {})'.format(to_kql(expr), to_kql(delimiter))))
def strcat_delim(delimiter: StringType, expr1: StringType, expr2: StringType, *exprs: StringType) -> StringExpression:
@ -594,30 +603,33 @@ def strcat_delim(delimiter: StringType, expr1: StringType, expr2: StringType, *e
def strcmp(expr1: StringType, expr2: StringType) -> NumberExpression:
return NumberExpression(KQL('strcmp({}, {})'.format(expr1, expr2)))
return NumberExpression(KQL('strcmp({}, {})'.format(_subexpr_to_kql(expr1), _subexpr_to_kql(expr2))))
def string_size(expr: StringType) -> NumberExpression:
return NumberExpression(KQL('string_size({})'.format(expr)))
return NumberExpression(KQL('string_size({})'.format(_subexpr_to_kql(expr))))
def strlen(expr: StringType) -> NumberExpression:
return NumberExpression(KQL('strlen({})'.format(expr)))
return NumberExpression(KQL('strlen({})'.format(_subexpr_to_kql(expr))))
def strrep(expr: StringType,
multiplier: NumberType,
delimiter: StringType = None) -> StringExpression:
if delimiter is None:
res = 'strrep({}, {})'.format(expr, multiplier)
res = 'strrep({}, {})'.format(_subexpr_to_kql(expr), _subexpr_to_kql(multiplier))
else:
res = 'strrep({}, {}, {})'.format(expr, multiplier, _subexpr_to_kql(delimiter))
res = 'strrep({}, {}, {})'.format(_subexpr_to_kql(expr), _subexpr_to_kql(multiplier),
_subexpr_to_kql(delimiter))
return StringExpression(KQL((res)))
def substring(expr: StringType, start_index: NumberType, length: NumberType = None) -> StringExpression:
return StringExpression(KQL(
('substring({}, {})' if length is None else 'substring({}, {}, {})').format(expr, start_index, length)
('substring({}, {})' if length is None else 'substring({}, {}, {})').format(
_subexpr_to_kql(expr), _subexpr_to_kql(start_index), _subexpr_to_kql(length)
)
))
@ -628,22 +640,22 @@ def substring(expr: StringType, start_index: NumberType, length: NumberType = No
def tobool(expr: ExpressionType) -> BooleanExpression:
return BooleanExpression(KQL('tobool({})'.format(expr)))
return BooleanExpression(KQL('tobool({})'.format(_subexpr_to_kql(expr))))
def toboolean(expr: ExpressionType) -> BooleanExpression:
return BooleanExpression(KQL('toboolean({})'.format(expr)))
return BooleanExpression(KQL('toboolean({})'.format(_subexpr_to_kql(expr))))
def todatetime(expr: StringType) -> DatetimeExpression:
return DatetimeExpression(KQL('todatetime({})'.format(expr)))
return DatetimeExpression(KQL('todatetime({})'.format(_subexpr_to_kql(expr))))
def todecimal(): raise NotImplemented # TODO
def todouble(expr: NumberType) -> NumberExpression:
return NumberExpression(KQL("todouble({})".format(expr)))
return NumberExpression(KQL("todouble({})".format(_subexpr_to_kql(expr))))
def todynamic(): raise NotImplemented # TODO
@ -730,11 +742,11 @@ def arg_min(*args: ExpressionType) -> AggregationExpression:
def avg(expr: ExpressionType) -> NumberAggregationExpression:
return NumberAggregationExpression(KQL('avg({})'.format(expr)))
return NumberAggregationExpression(KQL('avg({})'.format(_subexpr_to_kql(expr))))
def avgif(expr: ExpressionType, predicate: BooleanType) -> NumberAggregationExpression:
return NumberAggregationExpression(KQL('avgif({}, {})'.format(expr, predicate)))
return NumberAggregationExpression(KQL('avgif({}, {})'.format(_subexpr_to_kql(expr), _subexpr_to_kql(predicate))))
# def buildschema(self):
@ -747,17 +759,19 @@ def count(col: Column = None) -> NumberAggregationExpression:
def countif(predicate: BooleanType) -> NumberAggregationExpression:
return NumberAggregationExpression(KQL('countif({})'.format(predicate)))
return NumberAggregationExpression(KQL('countif({})'.format(to_kql(predicate))))
def dcount(expr: ExpressionType, accuracy: NumberType = None) -> NumberAggregationExpression:
return NumberAggregationExpression(KQL(
('dcount({})' if accuracy is None else 'dcount({}, {})').format(expr, accuracy)
('dcount({})' if accuracy is None else 'dcount({}, {})').format(to_kql(expr), to_kql(accuracy))
))
def dcountif(expr: ExpressionType, predicate: BooleanType, accuracy: NumberType = 0) -> NumberAggregationExpression:
return NumberAggregationExpression(KQL('dcountif({}, {}, {})'.format(expr, predicate, accuracy)))
return NumberAggregationExpression(KQL('dcountif({}, {}, {})'.format(
to_kql(expr), to_kql(predicate), to_kql(accuracy)
)))
# def hll(expr: ExpressionType, accuracy: NumberType = None) -> AggregationExpression:
@ -767,64 +781,64 @@ def dcountif(expr: ExpressionType, predicate: BooleanType, accuracy: NumberType
# def hll_merge(expr: ExpressionType) -> AggregationExpression:
# return AggregationExpression(KQL('hll_merge({})'.format(expr)))
# return AggregationExpression(KQL('hll_merge({})'.format(_subexpr_to_kql(expr))))
def make_bag(expr: ExpressionType, max_size: NumberType = None) -> MappingAggregationExpression:
if max_size:
return MappingAggregationExpression(KQL('make_bag({}, {})'.format(expr, max_size)))
return MappingAggregationExpression(KQL('make_bag({})'.format(expr)))
return MappingAggregationExpression(KQL('make_bag({})'.format(to_kql(expr))))
def make_list(expr: ExpressionType, max_size: NumberType = None) -> ArrayAggregationExpression:
if max_size:
return ArrayAggregationExpression(KQL('make_list({}, {})'.format(expr, max_size)))
return ArrayAggregationExpression(KQL('make_list({})'.format(expr)))
return ArrayAggregationExpression(KQL('make_list({})'.format(to_kql(expr))))
def make_set(expr: ExpressionType, max_size: NumberType = None) -> ArrayAggregationExpression:
if max_size:
return ArrayAggregationExpression(KQL('make_set({}, {})'.format(expr, max_size)))
return ArrayAggregationExpression(KQL('make_set({})'.format(expr)))
return ArrayAggregationExpression(KQL('make_set({})'.format(to_kql(expr))))
def max(expr: ExpressionType) -> AggregationExpression:
return AggregationExpression(KQL('max({})'.format(expr)))
return AggregationExpression(KQL('max({})'.format(to_kql(expr))))
def min(expr: ExpressionType) -> AggregationExpression:
return AggregationExpression(KQL('min({})'.format(expr)))
return AggregationExpression(KQL('min({})'.format(to_kql(expr))))
def percentile(expr: ExpressionType, per: NumberType) -> AggregationExpression:
res = 'percentiles({}, {})'.format(expr, _subexpr_to_kql(per))
res = 'percentiles({}, {})'.format(expr, to_kql(per))
return AggregationExpression(KQL(res))
def percentiles(expr: ExpressionType, *pers: NumberType) -> AggregationExpression:
res = 'percentiles({}, {})'.format(expr,
', '.join([str(_subexpr_to_kql(per)) for per in pers]))
', '.join([str(to_kql(per)) for per in pers]))
return AggregationExpression(KQL(res))
def stdev(expr: ExpressionType) -> AggregationExpression:
return AggregationExpression(KQL('stdev({})'.format(expr)))
return AggregationExpression(KQL('stdev({})'.format(to_kql(expr))))
def stdevif(expr: ExpressionType, predicate: BooleanType) -> AggregationExpression:
return AggregationExpression(KQL('stdevif({}, {})'.format(expr, predicate)))
return AggregationExpression(KQL('stdevif({}, {})'.format(to_kql(expr), to_kql(predicate))))
def stdevp(expr: ExpressionType) -> AggregationExpression:
return AggregationExpression(KQL('stdevp({})'.format(expr)))
return AggregationExpression(KQL('stdevp({})'.format(to_kql(expr))))
def sum(expr: ExpressionType) -> AggregationExpression:
return AggregationExpression(KQL('sum({})'.format(expr)))
return AggregationExpression(KQL('sum({})'.format(to_kql(expr))))
def sumif(expr: ExpressionType, predicate: BooleanType) -> AggregationExpression:
return AggregationExpression(KQL('sumif({}, {})'.format(expr, predicate)))
return AggregationExpression(KQL('sumif({}, {})'.format(to_kql(expr), to_kql(predicate))))
# def tdigest(self):
@ -836,12 +850,12 @@ def sumif(expr: ExpressionType, predicate: BooleanType) -> AggregationExpression
def variance(expr: ExpressionType) -> AggregationExpression:
return AggregationExpression(KQL('variance({})'.format(expr)))
return AggregationExpression(KQL('variance({})'.format(to_kql(expr))))
def varianceif(expr: ExpressionType, predicate: BooleanType) -> AggregationExpression:
return AggregationExpression(KQL('varianceif({}, {})'.format(expr, predicate)))
return AggregationExpression(KQL('varianceif({}, {})'.format(to_kql(expr), to_kql(predicate))))
def variancep(expr: ExpressionType) -> AggregationExpression:
return AggregationExpression(KQL('variancep({})'.format(expr)))
return AggregationExpression(KQL('variancep({})'.format(to_kql(expr))))

Просмотреть файл

@ -1,20 +1,19 @@
import json
import logging
from datetime import datetime, timedelta
from typing import Union, Mapping, NewType, Type, Dict, Callable, Any, Tuple, List
from numbers import Number
from typing import NewType, Mapping, Union, List, Tuple
logger = logging.getLogger("pykusto")
KustoTypes = Union[str, int, bool, datetime, Mapping, List, Tuple, float, timedelta]
# TODO: Unhandled date types: guid, decimal
from pykusto.type_utils import kql_converter
KQL = NewType('KQL', str)
@kql_converter(datetime)
def datetime_to_kql(dt: datetime) -> KQL:
return KQL(dt.strftime('datetime(%Y-%m-%d %H:%M:%S.%f)'))
@kql_converter(timedelta)
def timedelta_to_kql(td: timedelta) -> KQL:
hours, remainder = divmod(td.seconds, 3600)
minutes, seconds = divmod(remainder, 60)
@ -27,6 +26,7 @@ def timedelta_to_kql(td: timedelta) -> KQL:
))
@kql_converter(Mapping, List, Tuple)
def dynamic_to_kql(d: Union[Mapping, List, Tuple]) -> KQL:
query = list(json.dumps(d))
# Issue #11
@ -48,29 +48,21 @@ def dynamic_to_kql(d: Union[Mapping, List, Tuple]) -> KQL:
return KQL("".join(query))
@kql_converter(bool)
def bool_to_kql(b: bool) -> KQL:
return KQL('true') if b else KQL('false')
@kql_converter(str)
def str_to_kql(s: str) -> KQL:
return KQL('"{}"'.format(s))
KQL_CONVERTER_BY_TYPE: Dict[Type, Callable[[Any], KQL]] = {
datetime: datetime_to_kql,
timedelta: timedelta_to_kql,
Mapping: dynamic_to_kql,
List: dynamic_to_kql,
Tuple: dynamic_to_kql,
bool: bool_to_kql,
str: str_to_kql,
int: KQL,
float: KQL,
}
@kql_converter(Number)
def number_to_kql(n: Number) -> KQL:
return KQL(str(n))
def to_kql(obj: KustoTypes) -> KQL:
for kusto_type, converter in KQL_CONVERTER_BY_TYPE.items():
if isinstance(obj, kusto_type):
return converter(obj)
raise ValueError("No KQL converter found for object {} of type {}".format(obj, type(obj)))
@kql_converter(type(None))
def none_to_kql(n: type(None)) -> KQL:
return KQL("")

Просмотреть файл

@ -1,18 +1,19 @@
from abc import abstractmethod
from copy import copy, deepcopy
from enum import Enum
from itertools import chain
from types import FunctionType
from typing import Tuple, List, Union, Optional
from abc import abstractmethod
from azure.kusto.data.helpers import dataframe_from_result_table
from copy import copy, deepcopy
from enum import Enum
from types import FunctionType
from pykusto.client import Table
from pykusto.expressions import BooleanType, ExpressionType, AggregationExpression, OrderType, \
StringType, AssignmentBase, AssignmentFromAggregationToColumn, AssignmentToSingleColumn, Column, BaseExpression, \
AssignmentFromColumnToColumn
AssignmentFromColumnToColumn, AnyExpression, to_kql
from pykusto.kql_converters import KQL
from pykusto.type_utils import logger
from pykusto.udf import stringify_python_func
from pykusto.utils import KQL, logger, to_kql
class Order(Enum):
@ -127,7 +128,7 @@ class Query:
return DistinctQuery(self, columns)
def distinct_all(self):
return DistinctQuery(self, (BaseExpression(KQL("*")),))
return DistinctQuery(self, (AnyExpression(KQL("*")),))
def extend(self, *args: Union[BaseExpression, AssignmentBase], **kwargs: ExpressionType) -> 'ExtendQuery':
assignments: List[AssignmentBase] = []
@ -140,7 +141,7 @@ class Query:
if isinstance(expression, BaseExpression):
assignments.append(expression.assign_to(Column(column_name)))
else:
assignments.append(BaseExpression(to_kql(expression)).assign_to(Column(column_name)))
assignments.append(AnyExpression(to_kql(expression)).assign_to(Column(column_name)))
return ExtendQuery(self, *assignments)
def summarize(self, *args: Union[AggregationExpression, AssignmentFromAggregationToColumn],

41
pykusto/type_utils.py Normal file
Просмотреть файл

@ -0,0 +1,41 @@
import logging
from datetime import datetime, timedelta
from numbers import Number
from typing import Union, Mapping, Type, Dict, Callable, Any, Tuple, List
logger = logging.getLogger("pykusto")
KustoTypes = Union[str, Number, bool, datetime, Mapping, List, Tuple, timedelta]
# TODO: Unhandled data types: guid, decimal
class TypeRegistrar:
registry: Dict[Type[KustoTypes], Callable]
def __init__(self) -> None:
self.registry = {}
def __call__(self, *types: Type[KustoTypes]) -> Callable:
def inner(wrapped):
for t in types:
self.registry[t] = wrapped
return wrapped
return inner
def for_obj(self, obj: Any) -> Any:
for registered_type, registered_callable in self.registry.items():
if isinstance(obj, registered_type):
return registered_callable(obj)
raise ValueError("No registered callable for object {} of type {}".format(obj, type(obj).__name__))
def for_type(self, t: Type[KustoTypes]) -> Callable:
for registered_type, registered_callable in self.registry.items():
if issubclass(t, registered_type):
return registered_callable
raise ValueError("No registered callable for type {}".format(t.__name__))
kql_converter = TypeRegistrar()
plain_expression = TypeRegistrar()
aggregation_expression = TypeRegistrar()

Просмотреть файл

@ -3,7 +3,7 @@ import sys
from typing import Callable
from unittest import TestCase
from pykusto.utils import logger
from pykusto.type_utils import logger
class TestBase(TestCase):

Просмотреть файл

@ -337,13 +337,14 @@ class TestFunction(TestBase):
Query().extend(f.strcat_delim('-', ',', col.foo)).render()
)
@unittest.skip("Enabled after #40 is fixed")
def test_strcat_array(self):
self.assertEqual(
" | where (strcat_array(foo, \",\")) == \"A,B,C\"",
Query().where(f.strcat_array(col.foo, ',') == 'A,B,C').render()
)
self.assertEqual(
" | where (strcat_array(['A', 'B', 'C'], \",\")) == \"A,B,C\"",
" | where (strcat_array(dynamic([\"A\", \"B\", \"C\"]), \",\")) == \"A,B,C\"",
Query().where(f.strcat_array(['A', 'B', 'C'], ',') == 'A,B,C').render()
)
@ -409,7 +410,7 @@ class TestFunction(TestBase):
# Query().where(f.todatetime('') > datetime.datetime(2019, 7, 23)).render(),
# " | where (startofday(foo)) > datetime(2019-07-23 00:00:00.000000)")
# def todatetime(expr: StringType) -> DatetimeExpression:
# return DatetimeExpression(KQL('todatetime({})'.format(expr)))
# return DatetimeExpression(KQL('todatetime({})'.format(_subexpr_to_kql(expr))))
def test_todouble(self):
self.assertEqual(
@ -439,14 +440,13 @@ class TestFunction(TestBase):
Query().summarize(f.arg_min(col.foo, col.bar, col.fam)).render()
)
@unittest.skip("Re-enable once issue #1 is resolved")
def test_avg(self):
self.assertEqual(
" | summarize avg(foo)",
Query().summarize(f.avg(col.foo)).render()
)
self.assertEqual(
" | summarize avg(foo)-5",
" | summarize avg(foo) - 5",
Query().summarize(f.avg(col.foo) - 5).render()
)

Просмотреть файл

@ -1,4 +1,4 @@
from pykusto import utils
from pykusto.expressions import to_kql
from test.test_base import TestBase
@ -12,5 +12,5 @@ class TestUtils(TestBase):
}
self.assertEqual(
"{\"name\": \"Alan\", \"age\": 21, \"address\": (\"NY\", 36), \"pets\": (\"Libby\", \"Panda\", \"]\", \"[\")}",
utils.to_kql(dict)
to_kql(dict)
)