зеркало из https://github.com/Azure/pykusto.git
Improve type inference and conversion
This commit is contained in:
Родитель
290b60fd7e
Коммит
9e5d250766
|
@ -1,5 +1,4 @@
|
|||
from typing import Union, List, Tuple
|
||||
|
||||
# noinspection PyProtectedMember
|
||||
from urllib.parse import urlparse
|
||||
|
||||
|
@ -7,7 +6,7 @@ from urllib.parse import urlparse
|
|||
from azure.kusto.data._response import KustoResponseDataSet
|
||||
from azure.kusto.data.request import KustoClient, KustoConnectionStringBuilder, ClientRequestProperties
|
||||
|
||||
from pykusto.utils import KQL
|
||||
from pykusto.kql_converters import KQL
|
||||
|
||||
|
||||
class PyKustoClient:
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
from datetime import datetime, timedelta
|
||||
from typing import Any, List, Tuple, Mapping, Optional
|
||||
from numbers import Number
|
||||
from typing import Any, List, Tuple, Mapping, Optional, Type
|
||||
from typing import Union
|
||||
|
||||
from pykusto.utils import KQL
|
||||
from pykusto.utils import KustoTypes, to_kql
|
||||
from pykusto.kql_converters import KQL
|
||||
from pykusto.type_utils import plain_expression, aggregation_expression, KustoTypes, kql_converter
|
||||
|
||||
ExpressionType = Union[KustoTypes, 'BaseExpression']
|
||||
StringType = Union[str, 'StringExpression']
|
||||
|
@ -13,7 +14,6 @@ ArrayType = Union[List, Tuple, 'ArrayExpression']
|
|||
MappingType = Union[Mapping, 'MappingExpression']
|
||||
DatetimeType = Union[datetime, 'DatetimeExpression']
|
||||
TimespanType = Union[timedelta, 'TimespanExpression']
|
||||
AggregationType = Union['AggregationExpression']
|
||||
DynamicType = Union[ArrayType, MappingType]
|
||||
OrderType = Union[DatetimeType, TimespanType, NumberType, StringType]
|
||||
|
||||
|
@ -29,11 +29,17 @@ def _subexpr_to_kql(obj: ExpressionType) -> KQL:
|
|||
class BaseExpression:
|
||||
kql: KQL
|
||||
|
||||
def __new__(cls, *args, **kwargs):
|
||||
if cls is 'BaseExpression':
|
||||
raise TypeError("BaseExpression is abstract")
|
||||
return object.__new__(cls)
|
||||
|
||||
def __init__(self, kql: KQL) -> None:
|
||||
assert isinstance(kql, str)
|
||||
self.kql = kql
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return self.kql
|
||||
return str(self.kql)
|
||||
|
||||
def as_subexpression(self) -> KQL:
|
||||
return KQL('({})'.format(self.kql))
|
||||
|
@ -58,10 +64,15 @@ class BaseExpression:
|
|||
return BooleanExpression(KQL('{} has \"{}\"'.format(self.kql, exp)))
|
||||
|
||||
@staticmethod
|
||||
def binary_op(left: ExpressionType, operator: str, right: ExpressionType) -> KQL:
|
||||
return KQL('{}{}{}'.format(
|
||||
def base_binary_op(
|
||||
left: ExpressionType, operator: str, right: ExpressionType, result_type: Type[KustoTypes]
|
||||
) -> 'BaseExpression':
|
||||
registrar = plain_expression
|
||||
if isinstance(left, AggregationExpression) or isinstance(right, AggregationExpression):
|
||||
registrar = aggregation_expression
|
||||
return registrar.for_type(result_type)(KQL('{}{}{}'.format(
|
||||
_subexpr_to_kql(left), operator, _subexpr_to_kql(right))
|
||||
)
|
||||
))
|
||||
|
||||
def __eq__(self, other: ExpressionType) -> 'BooleanExpression':
|
||||
return BooleanExpression.binary_op(self, ' == ', other)
|
||||
|
@ -100,10 +111,11 @@ class BaseExpression:
|
|||
raise ValueError("Only arrays can be assigned to multiple columns")
|
||||
|
||||
|
||||
@plain_expression(bool)
|
||||
class BooleanExpression(BaseExpression):
|
||||
@staticmethod
|
||||
def binary_op(left: ExpressionType, operator: str, right: ExpressionType) -> 'BooleanExpression':
|
||||
return BooleanExpression(BaseExpression.binary_op(left, operator, right))
|
||||
return BaseExpression.base_binary_op(left, operator, right, bool)
|
||||
|
||||
def __and__(self, other: BooleanType) -> 'BooleanExpression':
|
||||
return BooleanExpression.binary_op(self, ' and ', other)
|
||||
|
@ -115,10 +127,11 @@ class BooleanExpression(BaseExpression):
|
|||
return BooleanExpression(KQL('not({})'.format(self.kql)))
|
||||
|
||||
|
||||
@plain_expression(Number)
|
||||
class NumberExpression(BaseExpression):
|
||||
@staticmethod
|
||||
def binary_op(left: NumberType, operator: str, right: NumberType) -> 'NumberExpression':
|
||||
return NumberExpression(BaseExpression.binary_op(left, operator, right))
|
||||
return BaseExpression.base_binary_op(left, operator, right, Number)
|
||||
|
||||
def __lt__(self, other: NumberType) -> BooleanExpression:
|
||||
return BooleanExpression.binary_op(self, ' < ', other)
|
||||
|
@ -168,15 +181,15 @@ class NumberExpression(BaseExpression):
|
|||
return NumberExpression(KQL('floor({}, {})'.format(self.kql, _subexpr_to_kql(round_to))))
|
||||
|
||||
def bin(self, round_to: NumberType) -> 'BaseExpression':
|
||||
return BaseExpression(KQL('bin({}, {})'.format(self.kql, _subexpr_to_kql(round_to))))
|
||||
return NumberExpression(KQL('bin({}, {})'.format(self.kql, _subexpr_to_kql(round_to))))
|
||||
|
||||
def bin_at(self, round_to: NumberType, fixed_point: NumberType) -> 'BaseExpression':
|
||||
return BaseExpression(KQL('bin_at({}, {}, {})'.format(self.kql,
|
||||
_subexpr_to_kql(round_to),
|
||||
_subexpr_to_kql(fixed_point))))
|
||||
return NumberExpression(KQL('bin_at({}, {}, {})'.format(self.kql,
|
||||
_subexpr_to_kql(round_to),
|
||||
_subexpr_to_kql(fixed_point))))
|
||||
|
||||
def bin_auto(self) -> 'BaseExpression':
|
||||
return BaseExpression(KQL('bin_auto({})'.format(self.kql)))
|
||||
return NumberExpression(KQL('bin_auto({})'.format(self.kql)))
|
||||
|
||||
def ceiling(self) -> 'NumberExpression':
|
||||
return NumberExpression(KQL('ceiling({})'.format(self.kql)))
|
||||
|
@ -213,11 +226,16 @@ class NumberExpression(BaseExpression):
|
|||
|
||||
def round(self, precision: NumberType = None) -> 'NumberExpression':
|
||||
return NumberExpression(KQL(
|
||||
('round({}, {})' if precision is None else 'round({}, {})').format(self, precision)
|
||||
('round({}, {})' if precision is None else 'round({}, {})').format(self.kql, to_kql(precision))
|
||||
))
|
||||
|
||||
|
||||
@plain_expression(str)
|
||||
class StringExpression(BaseExpression):
|
||||
@staticmethod
|
||||
def binary_op(left: ExpressionType, operator: str, right: ExpressionType) -> 'StringExpression':
|
||||
return BaseExpression.base_binary_op(left, operator, right, str)
|
||||
|
||||
def __len__(self) -> NumberExpression:
|
||||
return self.string_size()
|
||||
|
||||
|
@ -228,7 +246,7 @@ class StringExpression(BaseExpression):
|
|||
return BooleanExpression(KQL('isempty({})'.format(self.kql)))
|
||||
|
||||
def __add__(self, other: StringType) -> 'StringExpression':
|
||||
return StringExpression(BaseExpression.binary_op(self, ' + ', other))
|
||||
return StringExpression.binary_op(self, ' + ', other)
|
||||
|
||||
@staticmethod
|
||||
def concat(*strings: StringType) -> 'StringExpression':
|
||||
|
@ -238,8 +256,10 @@ class StringExpression(BaseExpression):
|
|||
|
||||
def split(self, delimiter: StringType, requested_index: NumberType = None) -> 'ArrayExpression':
|
||||
if requested_index is None:
|
||||
return ArrayExpression(KQL('split({}, {}'.format(self.kql, delimiter)))
|
||||
return ArrayExpression(KQL('split({}, {}, {}'.format(self.kql, delimiter, requested_index)))
|
||||
return ArrayExpression(KQL('split({}, {}'.format(self.kql, to_kql(delimiter))))
|
||||
return ArrayExpression(KQL('split({}, {}, {}'.format(
|
||||
self.kql, _subexpr_to_kql(delimiter), to_kql(requested_index)
|
||||
)))
|
||||
|
||||
def equals(self, other: StringType, case_sensitive: bool = False) -> BooleanExpression:
|
||||
return BooleanExpression.binary_op(self, ' == ' if case_sensitive else ' =~ ', other)
|
||||
|
@ -278,10 +298,11 @@ class StringExpression(BaseExpression):
|
|||
return BooleanExpression(KQL('isutf8({})'.format(self.kql)))
|
||||
|
||||
|
||||
@plain_expression(datetime)
|
||||
class DatetimeExpression(BaseExpression):
|
||||
@staticmethod
|
||||
def binary_op(left: ExpressionType, operator: str, right: ExpressionType) -> 'DatetimeExpression':
|
||||
return DatetimeExpression(BaseExpression.binary_op(left, operator, right))
|
||||
return BaseExpression.base_binary_op(left, operator, right, datetime)
|
||||
|
||||
def __lt__(self, other: DatetimeType) -> BooleanExpression:
|
||||
return BooleanExpression.binary_op(self, ' < ', other)
|
||||
|
@ -316,15 +337,15 @@ class DatetimeExpression(BaseExpression):
|
|||
return DatetimeExpression(KQL('floor({}, {})'.format(self.kql, _subexpr_to_kql(round_to))))
|
||||
|
||||
def bin(self, round_to: TimespanType) -> 'BaseExpression':
|
||||
return BaseExpression(KQL('bin({}, {})'.format(self.kql, _subexpr_to_kql(round_to))))
|
||||
return DatetimeExpression(KQL('bin({}, {})'.format(self.kql, _subexpr_to_kql(round_to))))
|
||||
|
||||
def bin_at(self, round_to: TimespanType, fixed_point: DatetimeType) -> 'BaseExpression':
|
||||
return BaseExpression(KQL('bin_at({}, {}, {})'.format(self.kql,
|
||||
_subexpr_to_kql(round_to),
|
||||
_subexpr_to_kql(fixed_point))))
|
||||
return DatetimeExpression(KQL('bin_at({}, {}, {})'.format(
|
||||
self.kql, _subexpr_to_kql(round_to), to_kql(fixed_point)
|
||||
)))
|
||||
|
||||
def bin_auto(self) -> 'BaseExpression':
|
||||
return BaseExpression(KQL('bin_auto({})'.format(self.kql)))
|
||||
return DatetimeExpression(KQL('bin_auto({})'.format(self.kql)))
|
||||
|
||||
def endofday(self, offset: NumberType = None) -> 'DatetimeExpression':
|
||||
if offset is None:
|
||||
|
@ -364,33 +385,34 @@ class DatetimeExpression(BaseExpression):
|
|||
return NumberExpression(KQL('getyear({})'.format(self.kql)))
|
||||
|
||||
def hourofday(self) -> NumberExpression:
|
||||
return NumberExpression(KQL('hourofday({})'.format(self)))
|
||||
return NumberExpression(KQL('hourofday({})'.format(self.kql)))
|
||||
|
||||
def startofday(self, offset: NumberType = None) -> 'DatetimeExpression':
|
||||
return DatetimeExpression(KQL(
|
||||
('startofday({})' if offset is None else 'startofday({}, {})').format(self.kql, offset)
|
||||
('startofday({})' if offset is None else 'startofday({}, {})').format(self.kql, to_kql(offset))
|
||||
))
|
||||
|
||||
def startofmonth(self, offset: NumberType = None) -> 'DatetimeExpression':
|
||||
return DatetimeExpression(KQL(
|
||||
('startofmonth({})' if offset is None else 'startofmonth({}, {})').format(self.kql, offset)
|
||||
('startofmonth({})' if offset is None else 'startofmonth({}, {})').format(self.kql, to_kql(offset))
|
||||
))
|
||||
|
||||
def startofweek(self, offset: NumberType = None) -> 'DatetimeExpression':
|
||||
return DatetimeExpression(KQL(
|
||||
('startofweek({})' if offset is None else 'startofweek({}, {})').format(self.kql, offset)
|
||||
('startofweek({})' if offset is None else 'startofweek({}, {})').format(self.kql, to_kql(offset))
|
||||
))
|
||||
|
||||
def startofyear(self, offset: NumberType = None) -> 'DatetimeExpression':
|
||||
return DatetimeExpression(KQL(
|
||||
('startofyear({})' if offset is None else 'startofyear({}, {})').format(self.kql, offset)
|
||||
('startofyear({})' if offset is None else 'startofyear({}, {})').format(self.kql, to_kql(offset))
|
||||
))
|
||||
|
||||
|
||||
@plain_expression(timedelta)
|
||||
class TimespanExpression(BaseExpression):
|
||||
@staticmethod
|
||||
def binary_op(left: ExpressionType, operator: str, right: ExpressionType) -> 'TimespanExpression':
|
||||
return TimespanExpression(BaseExpression.binary_op(left, operator, right))
|
||||
return BaseExpression.base_binary_op(left, operator, right, timedelta)
|
||||
|
||||
def __add__(self, other: TimespanType) -> 'TimespanExpression':
|
||||
return TimespanExpression.binary_op(self, ' + ', other)
|
||||
|
@ -402,18 +424,18 @@ class TimespanExpression(BaseExpression):
|
|||
return DatetimeExpression(KQL('ago({})'.format(_subexpr_to_kql(self))))
|
||||
|
||||
def bin(self, round_to: TimespanType) -> 'BaseExpression':
|
||||
return BaseExpression(KQL('bin({}, {})'.format(self.kql, _subexpr_to_kql(round_to))))
|
||||
return TimespanExpression(KQL('bin({}, {})'.format(self.kql, _subexpr_to_kql(round_to))))
|
||||
|
||||
def bin_at(self, round_to: TimespanType, fixed_point: TimespanType) -> 'BaseExpression':
|
||||
return BaseExpression(KQL('bin_at({}, {}, {})'.format(self.kql,
|
||||
_subexpr_to_kql(round_to),
|
||||
_subexpr_to_kql(fixed_point))))
|
||||
return TimespanExpression(KQL('bin_at({}, {}, {})'.format(
|
||||
self.kql, to_kql(round_to), to_kql(fixed_point)
|
||||
)))
|
||||
|
||||
def bin_auto(self) -> 'BaseExpression':
|
||||
return BaseExpression(KQL('bin_auto({})'.format(self.kql)))
|
||||
return TimespanExpression(KQL('bin_auto({})'.format(self.kql)))
|
||||
|
||||
def format_timespan(self, format_string: StringType) -> StringExpression:
|
||||
return StringExpression(KQL('format_timespan({}, {})'.format(self.kql, _subexpr_to_kql(format_string))))
|
||||
return StringExpression(KQL('format_timespan({}, {})'.format(self.kql, to_kql(format_string))))
|
||||
|
||||
def between(self, lower: TimespanType, upper: TimespanType) -> BooleanExpression:
|
||||
return BooleanExpression(KQL('{} between ({} .. {})'.format(
|
||||
|
@ -421,6 +443,7 @@ class TimespanExpression(BaseExpression):
|
|||
)))
|
||||
|
||||
|
||||
@plain_expression(List, Tuple)
|
||||
class ArrayExpression(BaseExpression):
|
||||
def __len__(self) -> NumberExpression:
|
||||
return self.array_length()
|
||||
|
@ -438,7 +461,7 @@ class ArrayExpression(BaseExpression):
|
|||
)))
|
||||
|
||||
def __getitem__(self, index: NumberType) -> BaseExpression:
|
||||
return BaseExpression(KQL('{}[{}]'.format(self.kql, _subexpr_to_kql(index))))
|
||||
return AnyExpression(KQL('{}[{}]'.format(self.kql, _subexpr_to_kql(index))))
|
||||
|
||||
def assign_to(self, *columns: 'Column') -> 'AssignmentBase':
|
||||
if len(columns) <= 1:
|
||||
|
@ -446,6 +469,7 @@ class ArrayExpression(BaseExpression):
|
|||
return AssignmentToMultipleColumns(columns, self)
|
||||
|
||||
|
||||
@plain_expression(Mapping)
|
||||
class MappingExpression(BaseExpression):
|
||||
def keys(self) -> ArrayExpression:
|
||||
return ArrayExpression(KQL('bag_keys({})'.format(self.kql)))
|
||||
|
@ -457,10 +481,14 @@ class MappingExpression(BaseExpression):
|
|||
)))
|
||||
|
||||
def __getitem__(self, index: StringType) -> BaseExpression:
|
||||
return BaseExpression(KQL('{}[{}]'.format(self.kql, _subexpr_to_kql(index))))
|
||||
return AnyExpression(KQL('{}[{}]'.format(self.kql, _subexpr_to_kql(index))))
|
||||
|
||||
|
||||
class AggregationExpression(BaseExpression):
|
||||
def __new__(cls, *args, **kwargs):
|
||||
if cls is 'AggregationExpression':
|
||||
raise TypeError("AggregationExpression is abstract")
|
||||
return object.__new__(cls)
|
||||
|
||||
def assign_to(self, *columns: 'Column') -> 'AssignmentFromAggregationToColumn':
|
||||
if len(columns) == 0:
|
||||
|
@ -474,30 +502,37 @@ class AggregationExpression(BaseExpression):
|
|||
return self.kql
|
||||
|
||||
|
||||
@aggregation_expression(bool)
|
||||
class BooleanAggregationExpression(AggregationExpression, BooleanExpression):
|
||||
pass
|
||||
|
||||
|
||||
@aggregation_expression(Number)
|
||||
class NumberAggregationExpression(AggregationExpression, NumberExpression):
|
||||
pass
|
||||
|
||||
|
||||
@aggregation_expression(str)
|
||||
class StringAggregationExpression(AggregationExpression, StringExpression):
|
||||
pass
|
||||
|
||||
|
||||
@aggregation_expression(datetime)
|
||||
class DatetimeAggregationExpression(AggregationExpression, DatetimeExpression):
|
||||
pass
|
||||
|
||||
|
||||
@aggregation_expression(timedelta)
|
||||
class TimespanAggregationExpression(AggregationExpression, TimespanExpression):
|
||||
pass
|
||||
|
||||
|
||||
@aggregation_expression(List, Tuple)
|
||||
class ArrayAggregationExpression(AggregationExpression, ArrayExpression):
|
||||
pass
|
||||
|
||||
|
||||
@aggregation_expression(Mapping)
|
||||
class MappingAggregationExpression(AggregationExpression, MappingExpression):
|
||||
pass
|
||||
|
||||
|
@ -541,15 +576,19 @@ class AssignmentToMultipleColumns(AssignmentBase):
|
|||
|
||||
|
||||
class AssignmentFromAggregationToColumn(AssignmentBase):
|
||||
def __init__(self, column: Optional['Column'], aggregation: AggregationType) -> None:
|
||||
def __init__(self, column: Optional['Column'], aggregation: AggregationExpression) -> None:
|
||||
super().__init__(None if column is None else column.kql, aggregation)
|
||||
|
||||
|
||||
class Column(
|
||||
class AnyExpression(
|
||||
NumberExpression, BooleanExpression, StringExpression,
|
||||
ArrayExpression, MappingExpression, DatetimeExpression,
|
||||
TimespanExpression
|
||||
):
|
||||
pass
|
||||
|
||||
|
||||
class Column(AnyExpression):
|
||||
_name: str
|
||||
|
||||
def __init__(self, name: str) -> None:
|
||||
|
@ -588,3 +627,9 @@ class ColumnGenerator:
|
|||
# Recommended usage: from pykusto.expressions import column_generator as col
|
||||
# TODO: Is there a way to enforce this to be a singleton?
|
||||
column_generator = ColumnGenerator()
|
||||
|
||||
|
||||
def to_kql(obj: ExpressionType) -> KQL:
|
||||
if isinstance(obj, BaseExpression):
|
||||
return obj.kql
|
||||
return kql_converter.for_obj(obj)
|
||||
|
|
|
@ -1,7 +1,11 @@
|
|||
from pykusto import utils
|
||||
from pykusto.expressions import *
|
||||
from pykusto.expressions import _subexpr_to_kql, Column
|
||||
from pykusto.utils import KQL
|
||||
from typing import Union
|
||||
|
||||
from pykusto.expressions import _subexpr_to_kql, Column, NumberType, NumberExpression, TimespanType, \
|
||||
DatetimeExpression, TimespanExpression, ArrayType, DynamicType, DatetimeType, BaseExpression, BooleanType, \
|
||||
ExpressionType, AggregationExpression, StringType, StringExpression, BooleanExpression, \
|
||||
NumberAggregationExpression, MappingAggregationExpression, ArrayAggregationExpression, to_kql
|
||||
from pykusto.kql_converters import KQL
|
||||
from pykusto.type_utils import plain_expression
|
||||
|
||||
|
||||
# Scalar functions
|
||||
|
@ -101,9 +105,9 @@ def bin_auto(expr: Union[NumberType, DatetimeType, TimespanType]) -> BaseExpress
|
|||
|
||||
|
||||
def case(predicate: BooleanType, val: ExpressionType, *args: Union[BooleanType, ExpressionType]) -> BaseExpression:
|
||||
res = 'case({}, {}, {})'.format(_subexpr_to_kql(predicate),
|
||||
_subexpr_to_kql(val),
|
||||
', '.join([_subexpr_to_kql(arg) for arg in args]))
|
||||
res = 'case({}, {}, {})'.format(
|
||||
_subexpr_to_kql(predicate), _subexpr_to_kql(val), ', '.join([_subexpr_to_kql(arg) for arg in args])
|
||||
)
|
||||
return AggregationExpression(KQL(res))
|
||||
|
||||
|
||||
|
@ -167,7 +171,7 @@ def cos(expr: NumberType) -> NumberExpression:
|
|||
|
||||
|
||||
# def dcount_hll(expr: ExpressionType) -> BaseExpression:
|
||||
# return BaseExpression(KQL('dcount_hll({})'.format(expr)))
|
||||
# return BaseExpression(KQL('dcount_hll({})'.format(_subexpr_to_kql(expr))))
|
||||
|
||||
|
||||
# def degrees(self): return
|
||||
|
@ -260,7 +264,12 @@ def hourofday(expr: DatetimeType) -> NumberExpression:
|
|||
|
||||
|
||||
def iff(predicate: BooleanType, if_true: ExpressionType, if_false: ExpressionType) -> BaseExpression:
|
||||
return BaseExpression(KQL('iff({}, {}, {})'.format(predicate, _subexpr_to_kql(if_true), _subexpr_to_kql(if_false))))
|
||||
return_type = type(if_true)
|
||||
if type(if_false) is not return_type:
|
||||
raise TypeError("The second and third arguments must be of the same type")
|
||||
return plain_expression.for_type(return_type)(
|
||||
KQL('iff({}, {}, {})'.format(to_kql(predicate), to_kql(if_true), to_kql(if_false)))
|
||||
)
|
||||
|
||||
|
||||
def iif(predicate: BooleanType, if_true: ExpressionType, if_false: ExpressionType) -> BaseExpression:
|
||||
|
@ -365,7 +374,7 @@ def new_guid(): raise NotImplemented # TODO
|
|||
|
||||
def now(offset: TimespanType = None) -> StringExpression:
|
||||
if offset:
|
||||
return StringExpression(KQL('now({})'.format(utils.timedelta_to_kql(offset))))
|
||||
return StringExpression(KQL('now({})'.format(to_kql(offset))))
|
||||
return StringExpression(KQL('now()'))
|
||||
|
||||
|
||||
|
@ -417,7 +426,7 @@ def percentrank_tdigest(): raise NotImplemented # TODO
|
|||
|
||||
|
||||
def pow(expr1: NumberType, expr2: NumberType) -> NumberExpression:
|
||||
return NumberExpression(KQL('pow({}, {})'.format(expr1, expr2)))
|
||||
return NumberExpression(KQL('pow({}, {})'.format(_subexpr_to_kql(expr1), _subexpr_to_kql(expr2))))
|
||||
|
||||
|
||||
# def radians(self): return
|
||||
|
@ -542,7 +551,7 @@ def round(expr: NumberType, precision: NumberType = None) -> NumberExpression:
|
|||
|
||||
|
||||
def sign(expr: NumberType) -> NumberExpression:
|
||||
return NumberExpression(KQL('sign({})'.format(expr)))
|
||||
return NumberExpression(KQL('sign({})'.format(_subexpr_to_kql(expr))))
|
||||
|
||||
|
||||
# def sin(self): return
|
||||
|
@ -552,7 +561,7 @@ def sign(expr: NumberType) -> NumberExpression:
|
|||
|
||||
|
||||
def sqrt(expr: NumberType) -> NumberExpression:
|
||||
return NumberExpression(KQL('sqrt({})'.format(expr)))
|
||||
return NumberExpression(KQL('sqrt({})'.format(_subexpr_to_kql(expr))))
|
||||
|
||||
|
||||
def startofday(expr: DatetimeType, offset: NumberType = None) -> DatetimeExpression:
|
||||
|
@ -580,7 +589,7 @@ def strcat(expr1: StringType, expr2: StringType, *exprs: StringType) -> StringEx
|
|||
|
||||
|
||||
def strcat_array(expr: ArrayType, delimiter: StringType) -> StringExpression:
|
||||
return StringExpression(KQL('strcat_array({}, {})'.format(expr, _subexpr_to_kql(delimiter))))
|
||||
return StringExpression(KQL('strcat_array({}, {})'.format(to_kql(expr), to_kql(delimiter))))
|
||||
|
||||
|
||||
def strcat_delim(delimiter: StringType, expr1: StringType, expr2: StringType, *exprs: StringType) -> StringExpression:
|
||||
|
@ -594,30 +603,33 @@ def strcat_delim(delimiter: StringType, expr1: StringType, expr2: StringType, *e
|
|||
|
||||
|
||||
def strcmp(expr1: StringType, expr2: StringType) -> NumberExpression:
|
||||
return NumberExpression(KQL('strcmp({}, {})'.format(expr1, expr2)))
|
||||
return NumberExpression(KQL('strcmp({}, {})'.format(_subexpr_to_kql(expr1), _subexpr_to_kql(expr2))))
|
||||
|
||||
|
||||
def string_size(expr: StringType) -> NumberExpression:
|
||||
return NumberExpression(KQL('string_size({})'.format(expr)))
|
||||
return NumberExpression(KQL('string_size({})'.format(_subexpr_to_kql(expr))))
|
||||
|
||||
|
||||
def strlen(expr: StringType) -> NumberExpression:
|
||||
return NumberExpression(KQL('strlen({})'.format(expr)))
|
||||
return NumberExpression(KQL('strlen({})'.format(_subexpr_to_kql(expr))))
|
||||
|
||||
|
||||
def strrep(expr: StringType,
|
||||
multiplier: NumberType,
|
||||
delimiter: StringType = None) -> StringExpression:
|
||||
if delimiter is None:
|
||||
res = 'strrep({}, {})'.format(expr, multiplier)
|
||||
res = 'strrep({}, {})'.format(_subexpr_to_kql(expr), _subexpr_to_kql(multiplier))
|
||||
else:
|
||||
res = 'strrep({}, {}, {})'.format(expr, multiplier, _subexpr_to_kql(delimiter))
|
||||
res = 'strrep({}, {}, {})'.format(_subexpr_to_kql(expr), _subexpr_to_kql(multiplier),
|
||||
_subexpr_to_kql(delimiter))
|
||||
return StringExpression(KQL((res)))
|
||||
|
||||
|
||||
def substring(expr: StringType, start_index: NumberType, length: NumberType = None) -> StringExpression:
|
||||
return StringExpression(KQL(
|
||||
('substring({}, {})' if length is None else 'substring({}, {}, {})').format(expr, start_index, length)
|
||||
('substring({}, {})' if length is None else 'substring({}, {}, {})').format(
|
||||
_subexpr_to_kql(expr), _subexpr_to_kql(start_index), _subexpr_to_kql(length)
|
||||
)
|
||||
))
|
||||
|
||||
|
||||
|
@ -628,22 +640,22 @@ def substring(expr: StringType, start_index: NumberType, length: NumberType = No
|
|||
|
||||
|
||||
def tobool(expr: ExpressionType) -> BooleanExpression:
|
||||
return BooleanExpression(KQL('tobool({})'.format(expr)))
|
||||
return BooleanExpression(KQL('tobool({})'.format(_subexpr_to_kql(expr))))
|
||||
|
||||
|
||||
def toboolean(expr: ExpressionType) -> BooleanExpression:
|
||||
return BooleanExpression(KQL('toboolean({})'.format(expr)))
|
||||
return BooleanExpression(KQL('toboolean({})'.format(_subexpr_to_kql(expr))))
|
||||
|
||||
|
||||
def todatetime(expr: StringType) -> DatetimeExpression:
|
||||
return DatetimeExpression(KQL('todatetime({})'.format(expr)))
|
||||
return DatetimeExpression(KQL('todatetime({})'.format(_subexpr_to_kql(expr))))
|
||||
|
||||
|
||||
def todecimal(): raise NotImplemented # TODO
|
||||
|
||||
|
||||
def todouble(expr: NumberType) -> NumberExpression:
|
||||
return NumberExpression(KQL("todouble({})".format(expr)))
|
||||
return NumberExpression(KQL("todouble({})".format(_subexpr_to_kql(expr))))
|
||||
|
||||
|
||||
def todynamic(): raise NotImplemented # TODO
|
||||
|
@ -730,11 +742,11 @@ def arg_min(*args: ExpressionType) -> AggregationExpression:
|
|||
|
||||
|
||||
def avg(expr: ExpressionType) -> NumberAggregationExpression:
|
||||
return NumberAggregationExpression(KQL('avg({})'.format(expr)))
|
||||
return NumberAggregationExpression(KQL('avg({})'.format(_subexpr_to_kql(expr))))
|
||||
|
||||
|
||||
def avgif(expr: ExpressionType, predicate: BooleanType) -> NumberAggregationExpression:
|
||||
return NumberAggregationExpression(KQL('avgif({}, {})'.format(expr, predicate)))
|
||||
return NumberAggregationExpression(KQL('avgif({}, {})'.format(_subexpr_to_kql(expr), _subexpr_to_kql(predicate))))
|
||||
|
||||
|
||||
# def buildschema(self):
|
||||
|
@ -747,17 +759,19 @@ def count(col: Column = None) -> NumberAggregationExpression:
|
|||
|
||||
|
||||
def countif(predicate: BooleanType) -> NumberAggregationExpression:
|
||||
return NumberAggregationExpression(KQL('countif({})'.format(predicate)))
|
||||
return NumberAggregationExpression(KQL('countif({})'.format(to_kql(predicate))))
|
||||
|
||||
|
||||
def dcount(expr: ExpressionType, accuracy: NumberType = None) -> NumberAggregationExpression:
|
||||
return NumberAggregationExpression(KQL(
|
||||
('dcount({})' if accuracy is None else 'dcount({}, {})').format(expr, accuracy)
|
||||
('dcount({})' if accuracy is None else 'dcount({}, {})').format(to_kql(expr), to_kql(accuracy))
|
||||
))
|
||||
|
||||
|
||||
def dcountif(expr: ExpressionType, predicate: BooleanType, accuracy: NumberType = 0) -> NumberAggregationExpression:
|
||||
return NumberAggregationExpression(KQL('dcountif({}, {}, {})'.format(expr, predicate, accuracy)))
|
||||
return NumberAggregationExpression(KQL('dcountif({}, {}, {})'.format(
|
||||
to_kql(expr), to_kql(predicate), to_kql(accuracy)
|
||||
)))
|
||||
|
||||
|
||||
# def hll(expr: ExpressionType, accuracy: NumberType = None) -> AggregationExpression:
|
||||
|
@ -767,64 +781,64 @@ def dcountif(expr: ExpressionType, predicate: BooleanType, accuracy: NumberType
|
|||
|
||||
|
||||
# def hll_merge(expr: ExpressionType) -> AggregationExpression:
|
||||
# return AggregationExpression(KQL('hll_merge({})'.format(expr)))
|
||||
# return AggregationExpression(KQL('hll_merge({})'.format(_subexpr_to_kql(expr))))
|
||||
|
||||
|
||||
def make_bag(expr: ExpressionType, max_size: NumberType = None) -> MappingAggregationExpression:
|
||||
if max_size:
|
||||
return MappingAggregationExpression(KQL('make_bag({}, {})'.format(expr, max_size)))
|
||||
return MappingAggregationExpression(KQL('make_bag({})'.format(expr)))
|
||||
return MappingAggregationExpression(KQL('make_bag({})'.format(to_kql(expr))))
|
||||
|
||||
|
||||
def make_list(expr: ExpressionType, max_size: NumberType = None) -> ArrayAggregationExpression:
|
||||
if max_size:
|
||||
return ArrayAggregationExpression(KQL('make_list({}, {})'.format(expr, max_size)))
|
||||
return ArrayAggregationExpression(KQL('make_list({})'.format(expr)))
|
||||
return ArrayAggregationExpression(KQL('make_list({})'.format(to_kql(expr))))
|
||||
|
||||
|
||||
def make_set(expr: ExpressionType, max_size: NumberType = None) -> ArrayAggregationExpression:
|
||||
if max_size:
|
||||
return ArrayAggregationExpression(KQL('make_set({}, {})'.format(expr, max_size)))
|
||||
return ArrayAggregationExpression(KQL('make_set({})'.format(expr)))
|
||||
return ArrayAggregationExpression(KQL('make_set({})'.format(to_kql(expr))))
|
||||
|
||||
|
||||
def max(expr: ExpressionType) -> AggregationExpression:
|
||||
return AggregationExpression(KQL('max({})'.format(expr)))
|
||||
return AggregationExpression(KQL('max({})'.format(to_kql(expr))))
|
||||
|
||||
|
||||
def min(expr: ExpressionType) -> AggregationExpression:
|
||||
return AggregationExpression(KQL('min({})'.format(expr)))
|
||||
return AggregationExpression(KQL('min({})'.format(to_kql(expr))))
|
||||
|
||||
|
||||
def percentile(expr: ExpressionType, per: NumberType) -> AggregationExpression:
|
||||
res = 'percentiles({}, {})'.format(expr, _subexpr_to_kql(per))
|
||||
res = 'percentiles({}, {})'.format(expr, to_kql(per))
|
||||
return AggregationExpression(KQL(res))
|
||||
|
||||
|
||||
def percentiles(expr: ExpressionType, *pers: NumberType) -> AggregationExpression:
|
||||
res = 'percentiles({}, {})'.format(expr,
|
||||
', '.join([str(_subexpr_to_kql(per)) for per in pers]))
|
||||
', '.join([str(to_kql(per)) for per in pers]))
|
||||
return AggregationExpression(KQL(res))
|
||||
|
||||
|
||||
def stdev(expr: ExpressionType) -> AggregationExpression:
|
||||
return AggregationExpression(KQL('stdev({})'.format(expr)))
|
||||
return AggregationExpression(KQL('stdev({})'.format(to_kql(expr))))
|
||||
|
||||
|
||||
def stdevif(expr: ExpressionType, predicate: BooleanType) -> AggregationExpression:
|
||||
return AggregationExpression(KQL('stdevif({}, {})'.format(expr, predicate)))
|
||||
return AggregationExpression(KQL('stdevif({}, {})'.format(to_kql(expr), to_kql(predicate))))
|
||||
|
||||
|
||||
def stdevp(expr: ExpressionType) -> AggregationExpression:
|
||||
return AggregationExpression(KQL('stdevp({})'.format(expr)))
|
||||
return AggregationExpression(KQL('stdevp({})'.format(to_kql(expr))))
|
||||
|
||||
|
||||
def sum(expr: ExpressionType) -> AggregationExpression:
|
||||
return AggregationExpression(KQL('sum({})'.format(expr)))
|
||||
return AggregationExpression(KQL('sum({})'.format(to_kql(expr))))
|
||||
|
||||
|
||||
def sumif(expr: ExpressionType, predicate: BooleanType) -> AggregationExpression:
|
||||
return AggregationExpression(KQL('sumif({}, {})'.format(expr, predicate)))
|
||||
return AggregationExpression(KQL('sumif({}, {})'.format(to_kql(expr), to_kql(predicate))))
|
||||
|
||||
|
||||
# def tdigest(self):
|
||||
|
@ -836,12 +850,12 @@ def sumif(expr: ExpressionType, predicate: BooleanType) -> AggregationExpression
|
|||
|
||||
|
||||
def variance(expr: ExpressionType) -> AggregationExpression:
|
||||
return AggregationExpression(KQL('variance({})'.format(expr)))
|
||||
return AggregationExpression(KQL('variance({})'.format(to_kql(expr))))
|
||||
|
||||
|
||||
def varianceif(expr: ExpressionType, predicate: BooleanType) -> AggregationExpression:
|
||||
return AggregationExpression(KQL('varianceif({}, {})'.format(expr, predicate)))
|
||||
return AggregationExpression(KQL('varianceif({}, {})'.format(to_kql(expr), to_kql(predicate))))
|
||||
|
||||
|
||||
def variancep(expr: ExpressionType) -> AggregationExpression:
|
||||
return AggregationExpression(KQL('variancep({})'.format(expr)))
|
||||
return AggregationExpression(KQL('variancep({})'.format(to_kql(expr))))
|
||||
|
|
|
@ -1,20 +1,19 @@
|
|||
import json
|
||||
import logging
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Union, Mapping, NewType, Type, Dict, Callable, Any, Tuple, List
|
||||
from numbers import Number
|
||||
from typing import NewType, Mapping, Union, List, Tuple
|
||||
|
||||
logger = logging.getLogger("pykusto")
|
||||
|
||||
KustoTypes = Union[str, int, bool, datetime, Mapping, List, Tuple, float, timedelta]
|
||||
# TODO: Unhandled date types: guid, decimal
|
||||
from pykusto.type_utils import kql_converter
|
||||
|
||||
KQL = NewType('KQL', str)
|
||||
|
||||
|
||||
@kql_converter(datetime)
|
||||
def datetime_to_kql(dt: datetime) -> KQL:
|
||||
return KQL(dt.strftime('datetime(%Y-%m-%d %H:%M:%S.%f)'))
|
||||
|
||||
|
||||
@kql_converter(timedelta)
|
||||
def timedelta_to_kql(td: timedelta) -> KQL:
|
||||
hours, remainder = divmod(td.seconds, 3600)
|
||||
minutes, seconds = divmod(remainder, 60)
|
||||
|
@ -27,6 +26,7 @@ def timedelta_to_kql(td: timedelta) -> KQL:
|
|||
))
|
||||
|
||||
|
||||
@kql_converter(Mapping, List, Tuple)
|
||||
def dynamic_to_kql(d: Union[Mapping, List, Tuple]) -> KQL:
|
||||
query = list(json.dumps(d))
|
||||
# Issue #11
|
||||
|
@ -48,29 +48,21 @@ def dynamic_to_kql(d: Union[Mapping, List, Tuple]) -> KQL:
|
|||
return KQL("".join(query))
|
||||
|
||||
|
||||
@kql_converter(bool)
|
||||
def bool_to_kql(b: bool) -> KQL:
|
||||
return KQL('true') if b else KQL('false')
|
||||
|
||||
|
||||
@kql_converter(str)
|
||||
def str_to_kql(s: str) -> KQL:
|
||||
return KQL('"{}"'.format(s))
|
||||
|
||||
|
||||
KQL_CONVERTER_BY_TYPE: Dict[Type, Callable[[Any], KQL]] = {
|
||||
datetime: datetime_to_kql,
|
||||
timedelta: timedelta_to_kql,
|
||||
Mapping: dynamic_to_kql,
|
||||
List: dynamic_to_kql,
|
||||
Tuple: dynamic_to_kql,
|
||||
bool: bool_to_kql,
|
||||
str: str_to_kql,
|
||||
int: KQL,
|
||||
float: KQL,
|
||||
}
|
||||
@kql_converter(Number)
|
||||
def number_to_kql(n: Number) -> KQL:
|
||||
return KQL(str(n))
|
||||
|
||||
|
||||
def to_kql(obj: KustoTypes) -> KQL:
|
||||
for kusto_type, converter in KQL_CONVERTER_BY_TYPE.items():
|
||||
if isinstance(obj, kusto_type):
|
||||
return converter(obj)
|
||||
raise ValueError("No KQL converter found for object {} of type {}".format(obj, type(obj)))
|
||||
@kql_converter(type(None))
|
||||
def none_to_kql(n: type(None)) -> KQL:
|
||||
return KQL("")
|
|
@ -1,18 +1,19 @@
|
|||
from abc import abstractmethod
|
||||
from copy import copy, deepcopy
|
||||
from enum import Enum
|
||||
from itertools import chain
|
||||
from types import FunctionType
|
||||
from typing import Tuple, List, Union, Optional
|
||||
|
||||
from abc import abstractmethod
|
||||
from azure.kusto.data.helpers import dataframe_from_result_table
|
||||
from copy import copy, deepcopy
|
||||
from enum import Enum
|
||||
from types import FunctionType
|
||||
|
||||
from pykusto.client import Table
|
||||
from pykusto.expressions import BooleanType, ExpressionType, AggregationExpression, OrderType, \
|
||||
StringType, AssignmentBase, AssignmentFromAggregationToColumn, AssignmentToSingleColumn, Column, BaseExpression, \
|
||||
AssignmentFromColumnToColumn
|
||||
AssignmentFromColumnToColumn, AnyExpression, to_kql
|
||||
from pykusto.kql_converters import KQL
|
||||
from pykusto.type_utils import logger
|
||||
from pykusto.udf import stringify_python_func
|
||||
from pykusto.utils import KQL, logger, to_kql
|
||||
|
||||
|
||||
class Order(Enum):
|
||||
|
@ -127,7 +128,7 @@ class Query:
|
|||
return DistinctQuery(self, columns)
|
||||
|
||||
def distinct_all(self):
|
||||
return DistinctQuery(self, (BaseExpression(KQL("*")),))
|
||||
return DistinctQuery(self, (AnyExpression(KQL("*")),))
|
||||
|
||||
def extend(self, *args: Union[BaseExpression, AssignmentBase], **kwargs: ExpressionType) -> 'ExtendQuery':
|
||||
assignments: List[AssignmentBase] = []
|
||||
|
@ -140,7 +141,7 @@ class Query:
|
|||
if isinstance(expression, BaseExpression):
|
||||
assignments.append(expression.assign_to(Column(column_name)))
|
||||
else:
|
||||
assignments.append(BaseExpression(to_kql(expression)).assign_to(Column(column_name)))
|
||||
assignments.append(AnyExpression(to_kql(expression)).assign_to(Column(column_name)))
|
||||
return ExtendQuery(self, *assignments)
|
||||
|
||||
def summarize(self, *args: Union[AggregationExpression, AssignmentFromAggregationToColumn],
|
||||
|
|
|
@ -0,0 +1,41 @@
|
|||
import logging
|
||||
from datetime import datetime, timedelta
|
||||
from numbers import Number
|
||||
from typing import Union, Mapping, Type, Dict, Callable, Any, Tuple, List
|
||||
|
||||
logger = logging.getLogger("pykusto")
|
||||
|
||||
KustoTypes = Union[str, Number, bool, datetime, Mapping, List, Tuple, timedelta]
|
||||
# TODO: Unhandled data types: guid, decimal
|
||||
|
||||
|
||||
class TypeRegistrar:
|
||||
registry: Dict[Type[KustoTypes], Callable]
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.registry = {}
|
||||
|
||||
def __call__(self, *types: Type[KustoTypes]) -> Callable:
|
||||
def inner(wrapped):
|
||||
for t in types:
|
||||
self.registry[t] = wrapped
|
||||
return wrapped
|
||||
|
||||
return inner
|
||||
|
||||
def for_obj(self, obj: Any) -> Any:
|
||||
for registered_type, registered_callable in self.registry.items():
|
||||
if isinstance(obj, registered_type):
|
||||
return registered_callable(obj)
|
||||
raise ValueError("No registered callable for object {} of type {}".format(obj, type(obj).__name__))
|
||||
|
||||
def for_type(self, t: Type[KustoTypes]) -> Callable:
|
||||
for registered_type, registered_callable in self.registry.items():
|
||||
if issubclass(t, registered_type):
|
||||
return registered_callable
|
||||
raise ValueError("No registered callable for type {}".format(t.__name__))
|
||||
|
||||
|
||||
kql_converter = TypeRegistrar()
|
||||
plain_expression = TypeRegistrar()
|
||||
aggregation_expression = TypeRegistrar()
|
|
@ -3,7 +3,7 @@ import sys
|
|||
from typing import Callable
|
||||
from unittest import TestCase
|
||||
|
||||
from pykusto.utils import logger
|
||||
from pykusto.type_utils import logger
|
||||
|
||||
|
||||
class TestBase(TestCase):
|
||||
|
|
|
@ -337,13 +337,14 @@ class TestFunction(TestBase):
|
|||
Query().extend(f.strcat_delim('-', ',', col.foo)).render()
|
||||
)
|
||||
|
||||
@unittest.skip("Enabled after #40 is fixed")
|
||||
def test_strcat_array(self):
|
||||
self.assertEqual(
|
||||
" | where (strcat_array(foo, \",\")) == \"A,B,C\"",
|
||||
Query().where(f.strcat_array(col.foo, ',') == 'A,B,C').render()
|
||||
)
|
||||
self.assertEqual(
|
||||
" | where (strcat_array(['A', 'B', 'C'], \",\")) == \"A,B,C\"",
|
||||
" | where (strcat_array(dynamic([\"A\", \"B\", \"C\"]), \",\")) == \"A,B,C\"",
|
||||
Query().where(f.strcat_array(['A', 'B', 'C'], ',') == 'A,B,C').render()
|
||||
)
|
||||
|
||||
|
@ -409,7 +410,7 @@ class TestFunction(TestBase):
|
|||
# Query().where(f.todatetime('') > datetime.datetime(2019, 7, 23)).render(),
|
||||
# " | where (startofday(foo)) > datetime(2019-07-23 00:00:00.000000)")
|
||||
# def todatetime(expr: StringType) -> DatetimeExpression:
|
||||
# return DatetimeExpression(KQL('todatetime({})'.format(expr)))
|
||||
# return DatetimeExpression(KQL('todatetime({})'.format(_subexpr_to_kql(expr))))
|
||||
|
||||
def test_todouble(self):
|
||||
self.assertEqual(
|
||||
|
@ -439,14 +440,13 @@ class TestFunction(TestBase):
|
|||
Query().summarize(f.arg_min(col.foo, col.bar, col.fam)).render()
|
||||
)
|
||||
|
||||
@unittest.skip("Re-enable once issue #1 is resolved")
|
||||
def test_avg(self):
|
||||
self.assertEqual(
|
||||
" | summarize avg(foo)",
|
||||
Query().summarize(f.avg(col.foo)).render()
|
||||
)
|
||||
self.assertEqual(
|
||||
" | summarize avg(foo)-5",
|
||||
" | summarize avg(foo) - 5",
|
||||
Query().summarize(f.avg(col.foo) - 5).render()
|
||||
)
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
from pykusto import utils
|
||||
from pykusto.expressions import to_kql
|
||||
from test.test_base import TestBase
|
||||
|
||||
|
||||
|
@ -12,5 +12,5 @@ class TestUtils(TestBase):
|
|||
}
|
||||
self.assertEqual(
|
||||
"{\"name\": \"Alan\", \"age\": 21, \"address\": (\"NY\", 36), \"pets\": (\"Libby\", \"Panda\", \"]\", \"[\")}",
|
||||
utils.to_kql(dict)
|
||||
to_kql(dict)
|
||||
)
|
||||
|
|
Загрузка…
Ссылка в новой задаче