This commit is contained in:
Kyle Lahnakoski 2019-01-15 17:26:49 -05:00
Родитель ba6cec27f8
Коммит a718a6db24
151 изменённых файлов: 8508 добавлений и 11515 удалений

88
vendor/jx_base/__init__.py поставляемый
Просмотреть файл

@ -7,14 +7,15 @@
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from uuid import uuid4
from mo_dots import wrap, coalesce, listwrap
from mo_future import text_type
from jx_base.expressions import jx_expression
from jx_python.expressions import Literal, Python
from mo_dots import coalesce, listwrap, wrap
from mo_dots.datas import register_data
from mo_future import is_text, text_type
from mo_json import value2json
from mo_logs import Log
from mo_logs.strings import expand_template, quote
@ -72,18 +73,26 @@ def DataClass(name, columns, constraint=None):
:return: The class that has been created
"""
from jx_python.expressions import jx_expression
columns = wrap([{"name": c, "required": True, "nulls": False, "type": object} if isinstance(c, text_type) else c for c in columns])
columns = wrap(
[
{"name": c, "required": True, "nulls": False, "type": object}
if is_text(c)
else c
for c in columns
]
)
slots = columns.name
required = wrap(filter(lambda c: c.required and not c.nulls and not c.default, columns)).name
required = wrap(
filter(lambda c: c.required and not c.nulls and not c.default, columns)
).name
nulls = wrap(filter(lambda c: c.nulls, columns)).name
defaults = {c.name: coalesce(c.default, None) for c in columns}
types = {c.name: coalesce(c.type, object) for c in columns}
types = {c.name: coalesce(c.jx_type, object) for c in columns}
code = expand_template(
"""
"""
from __future__ import unicode_literals
from mo_future import is_text, is_binary
from collections import Mapping
meta = None
@ -170,31 +179,32 @@ class {{class_name}}(Mapping):
"slots": "(" + (", ".join(quote(s) for s in slots)) + ")",
"required": "{" + (", ".join(quote(s) for s in required)) + "}",
"nulls": "{" + (", ".join(quote(s) for s in nulls)) + "}",
"defaults": jx_expression({"literal": defaults}).to_python(),
"defaults": Literal(defaults).to_python(),
"len_slots": len(slots),
"dict": "{" + (", ".join(quote(s) + ": self." + s for s in slots)) + "}",
"assign": "; ".join("_set(output, "+quote(s)+", self."+s+")" for s in slots),
"types": "{" + (",".join(quote(k) + ": " + v.__name__ for k, v in types.items())) + "}",
"constraint_expr": jx_expression(constraint).to_python(),
"constraint": value2json(constraint)
}
"assign": "; ".join(
"_set(output, " + quote(s) + ", self." + s + ")" for s in slots
),
"types": "{"
+ (",".join(quote(k) + ": " + v.__name__ for k, v in types.items()))
+ "}",
"constraint_expr": Python[jx_expression(constraint)].to_python(),
"constraint": value2json(constraint),
},
)
return _exec(code, name)
output = _exec(code, name)
register_data(output)
return output
class TableDesc(DataClass(
"Table",
[
"name",
"url",
"query_path",
"timestamp"
],
constraint={"and": [
{"eq": [{"last": "query_path"}, {"literal": "."}]}
]}
)):
class TableDesc(
DataClass(
"Table",
["name", "url", "query_path", "timestamp"],
constraint={"and": [{"eq": [{"last": "query_path"}, {"literal": "."}]}]},
)
):
@property
def columns(self):
raise NotImplementedError()
@ -204,23 +214,25 @@ class TableDesc(DataClass(
Column = DataClass(
"Column",
[
# "table",
"names", # MAP FROM TABLE NAME TO COLUMN NAME (ONE COLUMN CAN HAVE MULTIPLE NAMES)
"name",
"es_column",
"es_index",
"es_type",
{"name": "jx_type", "nulls": True},
"jx_type",
{"name": "useSource", "default": False},
{"name": "nested_path", "nulls": True}, # AN ARRAY OF PATHS (FROM DEEPEST TO SHALLOWEST) INDICATING THE JSON SUB-ARRAYS
"nested_path", # AN ARRAY OF PATHS (FROM DEEPEST TO SHALLOWEST) INDICATING THE JSON SUB-ARRAYS
{"name": "count", "nulls": True},
{"name": "cardinality", "nulls": True},
{"name": "multi", "nulls": True},
{"name": "partitions", "nulls": True},
{"name": "last_updated", "nulls": True}
"last_updated",
],
constraint={"and": [
{"eq": [{"last": "nested_path"}, {"literal": "."}]}
]}
constraint={
"and": [
{"not": {"eq": {"es_column": "string"}}},
{"eq": [{"last": "nested_path"}, {"literal": "."}]},
]
},
)

15
vendor/jx_base/container.py поставляемый
Просмотреть файл

@ -7,15 +7,12 @@
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from collections import Mapping
from mo_future import is_text, is_binary
from copy import copy
from mo_dots import Data
from mo_dots import set_default, split_field, wrap, join_field
from mo_dots import Data, is_data, join_field, set_default, split_field, wrap, is_many
from mo_future import generator_types, text_type
from mo_logs import Log
@ -67,9 +64,9 @@ class Container(object):
return frum
elif isinstance(frum, _Query):
return _run(frum)
elif isinstance(frum, (list, set) + generator_types):
elif is_many(frum):
return _ListContainer(frum)
elif isinstance(frum, text_type):
elif is_text(frum):
# USE DEFAULT STORAGE TO FIND Container
if not config.default.settings:
Log.error("expecting jx_base.container.config.default.settings to contain default elasticsearch connection info")
@ -83,7 +80,7 @@ class Container(object):
)
settings.type = None # WE DO NOT WANT TO INFLUENCE THE TYPE BECAUSE NONE IS IN THE frum STRING ANYWAY
return type2container["elasticsearch"](settings)
elif isinstance(frum, Mapping):
elif is_data(frum):
frum = wrap(frum)
if frum.type and type2container[frum.type]:
return type2container[frum.type](frum.settings)

23
vendor/jx_base/dimensions.py поставляемый
Просмотреть файл

@ -7,17 +7,12 @@
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from collections import Mapping
from __future__ import absolute_import, division, unicode_literals
from jx_base.domains import ALGEBRAIC, Domain, KNOWN
from mo_dots import Data, FlatList, Null, coalesce, is_data, is_list, join_field, listwrap, split_field, wrap
import mo_dots as dot
from jx_base.domains import Domain, ALGEBRAIC, KNOWN
from mo_dots import Null, coalesce, join_field, split_field, Data
from mo_dots import wrap, listwrap
from mo_dots.lists import FlatList
from mo_future import transpose
from mo_logs import Log
from mo_math import SUM
from mo_times.timer import Timer
@ -56,7 +51,7 @@ class Dimension(object):
fields = coalesce(dim.field, dim.fields)
if not fields:
return # NO FIELDS TO SEARCH
elif isinstance(fields, Mapping):
elif is_data(fields):
self.fields = wrap(fields)
edges = wrap([{"name": k, "value": v, "allowNulls": False} for k, v in self.fields.items()])
else:
@ -88,7 +83,7 @@ class Dimension(object):
temp = Data(partitions=[])
for i, count in enumerate(parts):
a = dim.path(d.getEnd(d.partitions[i]))
if not isinstance(a, list):
if not is_list(a):
Log.error("The path function on " + dim.name + " must return an ARRAY of parts")
addParts(
temp,
@ -98,7 +93,7 @@ class Dimension(object):
)
self.value = coalesce(dim.value, "name")
self.partitions = temp.partitions
elif isinstance(fields, Mapping):
elif is_data(fields):
self.value = "name" # USE THE "name" ATTRIBUTE OF PARTS
partitions = FlatList()
@ -135,7 +130,7 @@ class Dimension(object):
array = parts.data.values()[0].cube # DIG DEEP INTO RESULT (ASSUME SINGLE VALUE CUBE, WITH NULL AT END)
def edges2value(*values):
if isinstance(fields, Mapping):
if is_data(fields):
output = Data()
for e, v in transpose(edges, values):
output[e.name] = v
@ -192,7 +187,7 @@ class Dimension(object):
def getDomain(self, **kwargs):
# kwargs.depth IS MEANT TO REACH INTO SUB-PARTITIONS
kwargs = wrap(kwargs)
kwargs.depth = coalesce(kwargs.depth, len(self.fields)-1 if isinstance(self.fields, list) else None)
kwargs.depth = coalesce(kwargs.depth, len(self.fields)-1 if is_list(self.fields) else None)
if not self.partitions and self.edges:
# USE EACH EDGE AS A PARTITION, BUT isFacet==True SO IT ALLOWS THE OVERLAP

40
vendor/jx_base/domains.py поставляемый
Просмотреть файл

@ -7,21 +7,15 @@
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
import itertools
from collections import Mapping
from numbers import Number
from mo_future import text_type
from jx_base.expressions import jx_expression
from mo_collections.unique_index import UniqueIndex
from mo_dots import coalesce, Data, set_default, Null, listwrap
from mo_dots import wrap
from mo_dots.lists import FlatList
from mo_dots import Data, FlatList, Null, coalesce, is_container, is_data, listwrap, set_default, unwrap, wrap
from mo_future import text_type
from mo_logs import Log
from mo_math import MAX, MIN
from mo_times.dates import Date
@ -210,7 +204,12 @@ class SimpleSetDomain(Domain):
DOMAIN IS A LIST OF OBJECTS, EACH WITH A value PROPERTY
"""
__slots__ = ["NULL", "partitions", "map", "order"]
__slots__ = [
"NULL", # THE value FOR NULL
"partitions", # LIST OF {name, value, dataIndex} dicts
"map", # MAP FROM value TO name
"order" # MAP FROM value TO dataIndex
]
def __init__(self, **desc):
Domain.__init__(self, **desc)
@ -246,15 +245,18 @@ class SimpleSetDomain(Domain):
if desc.partitions and desc.dimension.fields and len(desc.dimension.fields) > 1:
self.key = desc.key
self.map = UniqueIndex(keys=desc.dimension.fields)
elif desc.partitions and isinstance(desc.key, (list, set)):
elif desc.partitions and is_container(desc.key):
# TODO: desc.key CAN BE MUCH LIKE A SELECT, WHICH UniqueIndex CAN NOT HANDLE
self.key = desc.key
self.map = UniqueIndex(keys=desc.key)
elif desc.partitions and isinstance(desc.partitions[0][desc.key], Mapping):
elif desc.partitions and is_data(desc.partitions[0][desc.key]):
# LOOKS LIKE OBJECTS
# sorted = desc.partitions[desc.key]
self.key = desc.key
self.map = UniqueIndex(keys=desc.key)
# self.key = UNION(set(d[desc.key].keys()) for d in desc.partitions)
# self.map = UniqueIndex(keys=self.key)
self.order = {p[self.key]: p.dataIndex for p in desc.partitions}
self.partitions = desc.partitions
elif len(desc.partitions) == 0:
# CREATE AN EMPTY DOMAIN
self.key = "value"
@ -388,11 +390,11 @@ class SetDomain(Domain):
elif desc.partitions and desc.dimension.fields and len(desc.dimension.fields) > 1:
self.key = desc.key
self.map = UniqueIndex(keys=desc.dimension.fields)
elif desc.partitions and isinstance(desc.key, (list, set)):
elif desc.partitions and is_container(desc.key):
# TODO: desc.key CAN BE MUCH LIKE A SELECT, WHICH UniqueIndex CAN NOT HANDLE
self.key = desc.key
self.map = UniqueIndex(keys=desc.key)
elif desc.partitions and isinstance(desc.partitions[0][desc.key], Mapping):
elif desc.partitions and is_data(desc.partitions[0][desc.key]):
self.key = desc.key
self.map = UniqueIndex(keys=desc.key)
# self.key = UNION(set(d[desc.key].keys()) for d in desc.partitions)
@ -663,7 +665,7 @@ class RangeDomain(Domain):
if not self.key:
Log.error("Must have a key value")
parts = list(listwrap(self.partitions))
parts =listwrap(self.partitions)
for i, p in enumerate(parts):
self.min = MIN([self.min, p.min])
self.max = MAX([self.max, p.max])
@ -675,10 +677,10 @@ class RangeDomain(Domain):
# VERIFY PARTITIONS DO NOT OVERLAP, HOLES ARE FINE
for p, q in itertools.product(parts, parts):
if p is not q and p.min <= q.min and q.min < p.max:
if p.min <= q.min and q.min < p.max and unwrap(p) is not unwrap(q):
Log.error("partitions overlap!")
self.partitions = parts
self.partitions = wrap(parts)
return
elif any([self.min == None, self.max == None, self.interval == None]):
Log.error("Can not handle missing parameter")

1791
vendor/jx_base/expressions.py поставляемый

Разница между файлами не показана из-за своего большого размера Загрузить разницу

5
vendor/jx_base/facts.py поставляемый
Просмотреть файл

@ -7,11 +7,10 @@
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from mo_future import is_text, is_binary
class Facts(object):
"""
REPRESENT A HIERARCHICAL DATASTORE: MULTIPLE TABLES IN A DATABASE ALONG

233
vendor/jx_base/language.py поставляемый Normal file
Просмотреть файл

@ -0,0 +1,233 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import, division, unicode_literals
from copy import copy
from math import isnan
from mo_dots import Data, data_types, listwrap
from mo_dots.lists import list_types
from mo_future import boolean_type, long, none_type, text_type
from mo_logs import Log
from mo_times import Date
builtin_tuple = tuple
Expression = None
expression_module = None
JX = None
_next_id = 0
def next_id():
global _next_id
try:
return _next_id
finally:
_next_id+=1
def all_bases(bases):
for b in bases:
yield b
for y in all_bases(b.__bases__):
yield y
# EVERY OPERATOR WILL HAVE lang WHICH POINTS TO LANGUAGE
class LanguageElement(type):
def __new__(cls, name, bases, dct):
x = type.__new__(cls, name, bases, dct)
x.lang = None
if x.__module__ == expression_module:
# ALL OPS IN expression_module ARE GIVEN AN ID, NO OTHERS
x.id = next_id()
return x
def __init__(cls, *args):
global Expression, expression_module
type.__init__(cls, *args)
if not expression_module and cls.__name__ == "Expression":
# THE expression_module IS DETERMINED BY THE LOCATION OF Expression CLASS
Expression = cls
expression_module = cls.__module__
BaseExpression = LanguageElement(str("BaseExpression"), (object,), {})
class Language(object):
def __init__(self, name):
self.name = name
self.ops = None
def __getitem__(self, item):
class_ = self.ops[item.id]
if class_.__name__ != item.__class__.__name__:
Log.error("programming error")
item.__class__ = class_
return item
def __str__(self):
return self.name
def define_language(lang_name, module_vars):
# LET ALL EXPRESSIONS POINT TO lang OBJECT WITH ALL EXPRESSIONS
# ENSURE THIS IS BELOW ALL SUB_CLASS DEFINITIONS SO var() CAPTURES ALL EXPRESSIONS
global JX
if lang_name:
language = Language(lang_name)
language.ops = copy(JX.ops)
else:
num_ops = 1 + max(
obj.id
for obj in module_vars.values() if isinstance(obj, type) and hasattr(obj, 'id')
)
language = JX = Language("JX")
language.ops = [None] * num_ops
for _, new_op in module_vars.items():
if isinstance(new_op, type) and hasattr(new_op, 'id'):
# EXPECT OPERATORS TO HAVE id
# EXPECT NEW DEFINED OPS IN THIS MODULE TO HAVE lang NOT SET
curr = getattr(new_op, "lang")
if not curr:
old_op = language.ops[new_op.id]
if old_op is not None and old_op.__name__ != new_op.__name__:
Log.error("Logic error")
language.ops[new_op.id] = new_op
setattr(new_op, "lang", language)
if lang_name:
# ENSURE THE ALL OPS ARE DEFINED ON THE NEW LANGUAGE
for base_op, new_op in list(zip(JX.ops, language.ops)):
if new_op is base_op:
# MISSED DEFINITION, ADD ONE
new_op = type(base_op.__name__, (base_op,), {})
language.ops[new_op.id] = new_op
setattr(new_op, "lang", language)
return language
def is_op(call, op):
"""
:param call: The specific operator instance (a method call)
:param op: The the operator we are testing against
:return: isinstance(call, op), but faster
"""
try:
return call.id == op.id
except Exception as e:
return False
def is_expression(call):
try:
output = getattr(call, 'id', None) != None
except Exception:
output = False
if output != isinstance(call, Expression):
Log.error("programmer error")
return output
def value_compare(left, right, ordering=1):
"""
SORT VALUES, NULL IS THE LEAST VALUE
:param left: LHS
:param right: RHS
:param ordering: (-1, 0, 1) TO AFFECT SORT ORDER
:return: The return value is negative if x < y, zero if x == y and strictly positive if x > y.
"""
try:
ltype = left.__class__
rtype = right.__class__
if ltype in list_types or rtype in list_types:
if left == None:
return ordering
elif right == None:
return - ordering
left = listwrap(left)
right = listwrap(right)
for a, b in zip(left, right):
c = value_compare(a, b) * ordering
if c != 0:
return c
if len(left) < len(right):
return - ordering
elif len(left) > len(right):
return ordering
else:
return 0
if ltype is float and isnan(left):
left = None
ltype = none_type
if rtype is float and isnan(right):
right = None
rtype = none_type
null_order = ordering*10
ltype_num = TYPE_ORDER.get(ltype, null_order)
rtype_num = TYPE_ORDER.get(rtype, null_order)
type_diff = ltype_num - rtype_num
if type_diff != 0:
return ordering if type_diff > 0 else -ordering
if ltype_num == null_order:
return 0
elif ltype is builtin_tuple:
for a, b in zip(left, right):
c = value_compare(a, b)
if c != 0:
return c * ordering
return 0
elif ltype in data_types:
for k in sorted(set(left.keys()) | set(right.keys())):
c = value_compare(left.get(k), right.get(k)) * ordering
if c != 0:
return c
return 0
elif left > right:
return ordering
elif left < right:
return -ordering
else:
return 0
except Exception as e:
Log.error("Can not compare values {{left}} to {{right}}", left=left, right=right, cause=e)
TYPE_ORDER = {
boolean_type: 0,
int: 1,
float: 1,
Date: 1,
long: 1,
text_type: 2,
list: 3,
builtin_tuple: 3,
dict: 4,
Data: 4
}

12
vendor/jx_base/namespace.py поставляемый
Просмотреть файл

@ -7,13 +7,11 @@
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from collections import Mapping
from __future__ import absolute_import, division, unicode_literals
from mo_future import is_text, is_binary
from jx_base.query import QueryOp
from mo_dots import is_data
class Namespace(object):
@ -32,7 +30,7 @@ class Namespace(object):
raise NotImplementedError()
def _convert_query(self, query):
output = QueryOp("from", None)
output = QueryOp(None)
output.select = self._convert_clause(query.select)
output.where = self.convert(query.where)
output["from"] = self._convert_from(query["from"])
@ -60,7 +58,7 @@ class Namespace(object):
def convert_list(operator, operand):
if operand==None:
return None
elif isinstance(operand, Mapping):
elif is_data(operand):
return operator(operand)
else:
return map(operator, operand)

8
vendor/jx_base/queries.py поставляемый
Просмотреть файл

@ -5,14 +5,12 @@
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from mo_future import is_text, is_binary
import re
from mo_future import text_type
from mo_logs import Log
keyword_pattern = re.compile(r"(\w|[\\.,$-])+(?:\.(\w|[\\.,$-])+)*")
@ -23,7 +21,7 @@ def is_variable_name(value):
Log.warning("not expected")
return True
if not value or not isinstance(value, text_type):
if not value or not is_text(value):
return False # _a._b
value = value.lstrip(".")
if not value:

142
vendor/jx_base/query.py поставляемый
Просмотреть файл

@ -7,26 +7,24 @@
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from collections import Mapping
from copy import copy
import jx_base
from jx_base.dimensions import Dimension
from jx_base.domains import Domain, SetDomain, DefaultDomain
from jx_base.expressions import jx_expression, Expression, Variable, LeavesOp, ScriptOp, OffsetOp, TRUE, FALSE
from jx_base.queries import is_variable_name
from mo_dots import Data, relative_field, concat_field
from mo_dots import coalesce, Null, set_default, unwraplist, literal_field
from mo_dots import wrap, unwrap, listwrap
from mo_dots.lists import FlatList
from mo_future import text_type
from mo_json.typed_encoder import untype_path, STRUCT
from jx_base.domains import DefaultDomain, Domain, SetDomain
from jx_base.expressions import Expression, FALSE, LeavesOp, QueryOp as QueryOp_, ScriptOp, TRUE, Variable, jx_expression
from jx_base.utils import is_variable_name
from jx_base.language import is_expression, is_op
from mo_dots import Data, FlatList, Null, coalesce, concat_field, is_container, is_data, is_list, listwrap, literal_field, relative_field, set_default, unwrap, unwraplist, wrap
from mo_future import is_text, text_type
from mo_json import STRUCT
from mo_json.typed_encoder import untype_path
from mo_logs import Log
from mo_math import AND, UNION, Math
import mo_math
from mo_math import AND, UNION, is_number
DEFAULT_LIMIT = 10
MAX_LIMIT = 10000
@ -47,8 +45,7 @@ def _late_import():
_ = _Column
class QueryOp(Expression):
class QueryOp(QueryOp_):
__slots__ = ["frum", "select", "edges", "groupby", "where", "window", "sort", "limit", "having", "format", "isLean"]
# def __new__(cls, op=None, frum=None, select=None, edges=None, groupby=None, window=None, where=None, sort=None, limit=None, format=None):
@ -57,11 +54,11 @@ class QueryOp(Expression):
# setattr(output, s, None)
# return output
def __init__(self, op, frum, select=None, edges=None, groupby=None, window=None, where=None, sort=None, limit=None, format=None):
def __init__(self,frum, select=None, edges=None, groupby=None, window=None, where=None, sort=None, limit=None, format=None):
if isinstance(frum, jx_base.Table):
pass
else:
Expression.__init__(self, op, frum)
Expression.__init__(self,frum)
self.frum = frum
self.select = select
self.edges = edges
@ -74,7 +71,7 @@ class QueryOp(Expression):
def __data__(self):
def select___data__():
if isinstance(self.select, list):
if is_list(self.select):
return [s.__data__() for s in self.select]
else:
return self.select.__data__()
@ -103,16 +100,15 @@ class QueryOp(Expression):
format=copy(self.format)
)
def vars(self, exclude_where=False, exclude_select=False):
"""
:return: variables in query
"""
def edges_get_all_vars(e):
output = set()
if isinstance(e.value, text_type):
if is_text(e.value):
output.add(e.value)
if isinstance(e.value, Expression):
if is_expression(e.value):
output |= e.value.vars()
if e.domain.key:
output.add(e.domain.key)
@ -180,13 +176,12 @@ class QueryOp(Expression):
edge.range.max = e.range.max.map(map_)
return edge
if isinstance(self.select, list):
if is_list(self.select):
select = wrap([map_select(s, map_) for s in self.select])
else:
select = map_select(self.select, map_)
return QueryOp(
"from",
frum=self.frum.map(map_),
select=select,
edges=wrap([map_edge(e, map_) for e in self.edges]),
@ -206,17 +201,16 @@ class QueryOp(Expression):
"""
NORMALIZE QUERY SO IT CAN STILL BE JSON
"""
if isinstance(query, QueryOp) or query == None:
if is_op(query, QueryOp) or query == None:
return query
query = wrap(query)
table = container.get_table(query['from'])
schema = table.schema
output = QueryOp(
op="from",
frum=table,
format=query.format,
limit=Math.min(MAX_LIMIT, coalesce(query.limit, DEFAULT_LIMIT))
limit=mo_math.min(MAX_LIMIT, coalesce(query.limit, DEFAULT_LIMIT))
)
if query.select or isinstance(query.select, (Mapping, list)):
@ -243,7 +237,7 @@ class QueryOp(Expression):
output.window = [_normalize_window(w) for w in listwrap(query.window)]
output.having = None
output.sort = _normalize_sort(query.sort)
if not Math.is_integer(output.limit) or output.limit < 0:
if not mo_math.is_integer(output.limit) or output.limit < 0:
Log.error("Expecting limit >= 0")
output.isLean = query.isLean
@ -263,7 +257,6 @@ class QueryOp(Expression):
def column_names(self):
return listwrap(self.select).name + self.edges.name + self.groupby.name
def __getitem__(self, item):
if item == "from":
return self.frum
@ -281,6 +274,7 @@ class QueryOp(Expression):
canonical_aggregates = wrap({
"cardinality": {"name":"cardinality", "default": 0},
"count": {"name": "count", "default": 0},
"min": {"name": "minimum"},
"max": {"name": "maximum"},
@ -292,14 +286,14 @@ canonical_aggregates = wrap({
def _normalize_selects(selects, frum, schema=None, ):
if frum == None or isinstance(frum, (list, set, text_type)):
if isinstance(selects, list):
if is_list(selects):
if len(selects) == 0:
return Null
else:
output = [_normalize_select_no_context(s, schema=schema) for s in selects]
else:
return _normalize_select_no_context(selects, schema=schema)
elif isinstance(selects, list):
elif is_list(selects):
output = [ss for s in selects for ss in _normalize_select(s, frum=frum, schema=schema)]
else:
output = _normalize_select(selects, frum, schema=schema)
@ -322,7 +316,7 @@ def _normalize_select(select, frum, schema=None):
if not _Column:
_late_import()
if isinstance(select, text_type):
if is_text(select):
canonical = select = Data(value=select)
else:
select = wrap(select)
@ -346,16 +340,16 @@ def _normalize_select(select, frum, schema=None):
)
for c in frum.get_leaves()
])
elif isinstance(select.value, text_type):
elif is_text(select.value):
if select.value.endswith(".*"):
canonical.name = coalesce(select.name, ".")
value = jx_expression(select[:-2], schema=schema)
if not isinstance(value, Variable):
if not is_op(value, Variable):
Log.error("`*` over general expression not supported yet")
output.append([
set_default(
{
"value": LeavesOp("leaves", value, prefix=select.prefix),
"value": LeavesOp(value, prefix=select.prefix),
"format": "dict" # MARKUP FOR DECODING
},
canonical
@ -383,7 +377,7 @@ def _normalize_select_no_context(select, schema=None):
if not _Column:
_late_import()
if isinstance(select, text_type):
if is_text(select):
select = Data(value=select)
else:
select = wrap(select)
@ -395,24 +389,24 @@ def _normalize_select_no_context(select, schema=None):
output.value = jx_expression(".", schema=schema)
else:
return Null
elif isinstance(select.value, text_type):
elif is_text(select.value):
if select.value.endswith(".*"):
name = select.value[:-2]
name = select.value[:-2].lstrip(".")
output.name = coalesce(select.name, name)
output.value = LeavesOp("leaves", Variable(name), prefix=coalesce(select.prefix, name))
output.value = LeavesOp(Variable(name), prefix=coalesce(select.prefix, name))
else:
if select.value == ".":
output.name = coalesce(select.name, select.aggregate, ".")
output.value = jx_expression(select.value, schema=schema)
elif select.value == "*":
output.name = coalesce(select.name, select.aggregate, ".")
output.value = LeavesOp("leaves", Variable("."))
output.value = LeavesOp(Variable("."))
else:
output.name = coalesce(select.name, select.value, select.aggregate)
output.name = coalesce(select.name, select.value.lstrip("."), select.aggregate)
output.value = jx_expression(select.value, schema=schema)
elif isinstance(select.value, (int, float)):
elif is_number(output.value):
if not output.name:
output.name = text_type(select.value)
output.name = text_type(output.value)
output.value = jx_expression(select.value, schema=schema)
else:
output.value = jx_expression(select.value, schema=schema)
@ -441,18 +435,19 @@ def _normalize_edge(edge, dim_index, limit, schema=None):
if not _Column:
_late_import()
if edge == None:
if not edge:
Log.error("Edge has no value, or expression is empty")
elif isinstance(edge, text_type):
elif is_text(edge):
if schema:
leaves = unwraplist(list(schema.leaves(edge)))
if not leaves or isinstance(leaves, (list, set)):
if not leaves or is_container(leaves):
return [
Data(
name=edge,
value=jx_expression(edge, schema=schema),
allowNulls=True,
dim=dim_index
dim=dim_index,
domain=_normalize_domain(None, limit)
)
]
elif isinstance(leaves, _Column):
@ -463,7 +458,7 @@ def _normalize_edge(edge, dim_index, limit, schema=None):
dim=dim_index,
domain=_normalize_domain(domain=leaves, limit=limit, schema=schema)
)]
elif isinstance(leaves.fields, list) and len(leaves.fields) == 1:
elif is_list(leaves.fields) and len(leaves.fields) == 1:
return [Data(
name=leaves.name,
value=jx_expression(leaves.fields[0], schema=schema),
@ -490,10 +485,10 @@ def _normalize_edge(edge, dim_index, limit, schema=None):
]
else:
edge = wrap(edge)
if not edge.name and not isinstance(edge.value, text_type):
if not edge.name and not is_text(edge.value):
Log.error("You must name compound and complex edges: {{edge}}", edge=edge)
if isinstance(edge.value, (list, set)) and not edge.domain:
if is_container(edge.value) and not edge.domain:
# COMPLEX EDGE IS SHORT HAND
domain = _normalize_domain(schema=schema)
domain.dimension = Data(fields=edge.value)
@ -521,8 +516,10 @@ def _normalize_edge(edge, dim_index, limit, schema=None):
def _normalize_groupby(groupby, limit, schema=None):
if groupby == None:
return None
output = wrap([n for ie, e in enumerate(listwrap(groupby)) for n in _normalize_group(e, ie, limit, schema=schema) ])
if any(o==None for o in output):
output = wrap([n for e in listwrap(groupby) for n in _normalize_group(e, None, limit, schema=schema)])
for i, o in enumerate(output):
o.dim = i
if any(o == None for o in output):
Log.error("not expected")
return output
@ -534,14 +531,14 @@ def _normalize_group(edge, dim_index, limit, schema=None):
:param schema: for context
:return: a normalized groupby
"""
if isinstance(edge, text_type):
if is_text(edge):
if edge.endswith(".*"):
prefix = edge[:-2]
if schema:
output = wrap([
{
"name": concat_field(prefix, literal_field(relative_field(untype_path(c.names["."]), prefix))),
"put": {"name": literal_field(untype_path(c.names["."]))},
{ # BECASUE THIS IS A GROUPBY, EARLY SPLIT INTO LEAVES WORKS JUST FINE
"name": concat_field(prefix, literal_field(relative_field(untype_path(c.name), prefix))),
"put": {"name": literal_field(untype_path(c.name))},
"value": jx_expression(c.es_column, schema=schema),
"allowNulls": True,
"domain": {"type": "default"}
@ -553,7 +550,7 @@ def _normalize_group(edge, dim_index, limit, schema=None):
return wrap([{
"name": untype_path(prefix),
"put": {"name": literal_field(untype_path(prefix))},
"value": jx_expression(prefix, schema=schema),
"value": LeavesOp(Variable(prefix)),
"allowNulls": True,
"dim":dim_index,
"domain": {"type": "default"}
@ -571,7 +568,7 @@ def _normalize_group(edge, dim_index, limit, schema=None):
if (edge.domain and edge.domain.type != "default") or edge.allowNulls != None:
Log.error("groupby does not accept complicated domains")
if not edge.name and not isinstance(edge.value, text_type):
if not edge.name and not is_text(edge.value):
Log.error("You must name compound edges: {{edge}}", edge= edge)
return wrap([{
@ -593,7 +590,7 @@ def _normalize_domain(domain=None, limit=None, schema=None):
return DefaultDomain(type="default", limit=limit)
elif isinstance(domain, Dimension):
return domain.getDomain()
elif schema and isinstance(domain, text_type) and schema[domain]:
elif schema and is_text(domain) and schema[domain]:
return schema[domain].getDomain()
elif isinstance(domain, Domain):
return domain
@ -613,7 +610,7 @@ def _normalize_window(window, schema=None):
if hasattr(v, "__call__"):
expr = v
else:
expr = ScriptOp("script", v)
expr = ScriptOp(v)
return Data(
name=coalesce(window.name, window.value),
@ -653,7 +650,7 @@ def _map_term_using_schema(master, path, term, schema_edges):
if isinstance(dimension, Dimension):
domain = dimension.getDomain()
if dimension.fields:
if isinstance(dimension.fields, Mapping):
if is_data(dimension.fields):
# EXPECTING A TUPLE
for local_field, es_field in dimension.fields.items():
local_value = v[local_field]
@ -696,7 +693,7 @@ def _map_term_using_schema(master, path, term, schema_edges):
continue
else:
Log.error("not expected")
elif isinstance(v, Mapping):
elif is_data(v):
sub = _map_term_using_schema(master, path + [k], v, schema_edges[k])
output.append(sub)
continue
@ -710,7 +707,7 @@ def _where_terms(master, where, schema):
USE THE SCHEMA TO CONVERT DIMENSION NAMES TO ES FILTERS
master - TOP LEVEL WHERE (FOR PLACING NESTED FILTERS)
"""
if isinstance(where, Mapping):
if is_data(where):
if where.term:
# MAP TERM
try:
@ -722,13 +719,13 @@ def _where_terms(master, where, schema):
# MAP TERM
output = FlatList()
for k, v in where.terms.items():
if not isinstance(v, (list, set)):
if not is_container(v):
Log.error("terms filter expects list of values")
edge = schema.edges[k]
if not edge:
output.append({"terms": {k: v}})
else:
if isinstance(edge, text_type):
if is_text(edge):
# DIRECT FIELD REFERENCE
return {"terms": {edge: v}}
try:
@ -736,7 +733,7 @@ def _where_terms(master, where, schema):
except Exception as e:
Log.error("programmer error", e)
fields = domain.dimension.fields
if isinstance(fields, Mapping):
if is_data(fields):
or_agg = []
for vv in v:
and_agg = []
@ -746,7 +743,7 @@ def _where_terms(master, where, schema):
and_agg.append({"term": {es_field: vvv}})
or_agg.append({"and": and_agg})
output.append({"or": or_agg})
elif isinstance(fields, list) and len(fields) == 1 and is_variable_name(fields[0]):
elif is_list(fields) and len(fields) == 1 and is_variable_name(fields[0]):
output.append({"terms": {fields[0]: v}})
elif domain.partitions:
output.append({"or": [domain.getPartByKey(vv).esfilter for vv in v]})
@ -770,19 +767,19 @@ def _normalize_sort(sort=None):
output = FlatList()
for s in listwrap(sort):
if isinstance(s, text_type):
if is_text(s):
output.append({"value": jx_expression(s), "sort": 1})
elif isinstance(s, Expression):
elif is_expression(s):
output.append({"value": s, "sort": 1})
elif Math.is_integer(s):
output.append({"value": OffsetOp("offset", s), "sort": 1})
elif mo_math.is_integer(s):
output.append({"value": jx_expression({"offset": s}), "sort": 1})
elif not s.sort and not s.value and all(d in sort_direction for d in s.values()):
for v, d in s.items():
output.append({"value": jx_expression(v), "sort": sort_direction[d]})
elif not s.sort and not s.value:
Log.error("`sort` clause must have a `value` property")
else:
output.append({"value": jx_expression(coalesce(s.value, s.field)), "sort": coalesce(sort_direction[s.sort], 1)})
output.append({"value": jx_expression(coalesce(s.value, s.field)), "sort": sort_direction[s.sort]})
return output
@ -795,8 +792,7 @@ sort_direction = {
1: 1,
0: 0,
-1: -1,
None: 1,
Null: 1
None: 1
}

25
vendor/jx_base/schema.py поставляемый
Просмотреть файл

@ -7,14 +7,14 @@
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from mo_future import is_text, is_binary
from copy import copy
from mo_dots import Null, startswith_field, set_default, wrap
from mo_json.typed_encoder import unnest_path, untype_path, STRUCT, EXISTS, OBJECT, NESTED
from mo_dots import Null, relative_field, set_default, startswith_field, wrap
from mo_json import EXISTS, NESTED, OBJECT, STRUCT
from mo_json.typed_encoder import unnest_path, untype_path
from mo_logs import Log
@ -56,7 +56,7 @@ class Schema(object):
:param column:
:return: NAME OF column
"""
return column.names[self.query_path]
return relative_field(column.name, query_path)
def values(self, name):
"""
@ -86,13 +86,13 @@ class Schema(object):
full_name = self.query_path
return set_default(
{
c.names[full_name]: c.es_column
relative_field(c.name, full_name): c.es_column
for k, cs in self.lookup.items()
# if startswith_field(k, full_name)
for c in cs if c.jx_type not in STRUCT
},
{
c.names["."]: c.es_column
c.name: c.es_column
for k, cs in self.lookup.items()
# if startswith_field(k, full_name)
for c in cs if c.jx_type not in STRUCT
@ -104,14 +104,13 @@ class Schema(object):
return copy(self._columns)
def _indexer(columns, query_path):
all_names = set(unnest_path(n) for c in columns for n in c.names.values()) | {"."}
all_names = set(unnest_path(c.name) for c in columns) | {"."}
lookup_leaves = {} # ALL LEAF VARIABLES
for full_name in all_names:
for c in columns:
cname = c.names[query_path]
cname = relative_field(c.name, query_path)
nfp = unnest_path(cname)
if (
startswith_field(nfp, full_name) and
@ -126,7 +125,7 @@ def _indexer(columns, query_path):
lookup_variables = {} # ALL NOT-NESTED VARIABLES
for full_name in all_names:
for c in columns:
cname = c.names[query_path]
cname = relative_field(c.name, query_path)
nfp = unnest_path(cname)
if (
startswith_field(nfp, full_name) and
@ -142,7 +141,7 @@ def _indexer(columns, query_path):
relative_lookup = {}
for c in columns:
try:
cname = c.names[query_path]
cname = relative_field(c.name, query_path)
cs = relative_lookup.setdefault(cname, set())
cs.add(c)

5
vendor/jx_base/snowflake.py поставляемый
Просмотреть файл

@ -7,11 +7,10 @@
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from mo_future import is_text, is_binary
class Snowflake(object):
"""
REPRESENT ONE ALIAS, AND ITS NESTED ARRAYS

5
vendor/jx_base/table.py поставляемый
Просмотреть файл

@ -7,11 +7,10 @@
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from mo_future import is_text, is_binary
class Table(object):
def __init__(self, full_name):

56
vendor/jx_base/utils.py поставляемый Normal file
Просмотреть файл

@ -0,0 +1,56 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
#
from __future__ import absolute_import, division, unicode_literals
import re
from mo_future import is_text
from mo_logs import Log
keyword_pattern = re.compile(r"(\w|[\\.,$-])+(?:\.(\w|[\\.,$-])+)*")
def is_variable_name(value):
if value.__class__.__name__ == "Variable":
Log.warning("not expected")
return True
if not value or not is_text(value):
return False # _a._b
value = value.lstrip(".")
if not value:
return True
match = keyword_pattern.match(value)
if not match:
return False
return match.group(0) == value
def dequote(s):
"""
If a string has single or double quotes around it, remove them.
Make sure the pair of quotes match.
If a matching pair of quotes is not found, return the string unchanged.
"""
if (s[0] == s[-1]) and s.startswith(("'", '"')):
return s[1:-1]
return s
def is_column_name(col):
if re.match(r"(\$|\w|\\\.)+(?:\.(\$|\w|\\\.)+)*\.\$\w{6}$", col):
return True
else:
return False
def get_property_name(s):
if s == ".":
return s
else:
return s.lstrip(".")

16
vendor/jx_elasticsearch/README.md поставляемый Normal file
Просмотреть файл

@ -0,0 +1,16 @@
# `jx_elasticsearch`
This library implements [JSON Query Expressions]() atop an Elasticsearch.
## Contribution
New, or old, versions of Elasticsearch should be added by copying the `es52` subdirectory, and altering the implementation to deal with the differences.
There are two directories in the git history that may help for old versions.
1. `es09` for Elasticsearch version 0.9.x (with MVEL scripting)
2. `es14` is for any version 1.x variant of Elasticsearch (with Groovy scripting)
Both of these directories are too old to be used directly, but they do have code templates for their respective scripting language, and they do have other hints about how to construct queries with the limitations of the older versions.

5
vendor/jx_elasticsearch/__init__.py поставляемый
Просмотреть файл

@ -7,10 +7,9 @@
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from mo_future import is_text, is_binary
from jx_base.container import type2container
from mo_files.url import URL
from mo_kwargs import override

0
vendor/jx_elasticsearch/es09/__init__.py поставляемый
Просмотреть файл

106
vendor/jx_elasticsearch/es09/aggop.py поставляемый
Просмотреть файл

@ -1,106 +0,0 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http:# mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from jx_base.expressions import Variable
from jx_base.queries import is_variable_name
from jx_elasticsearch import es09
from jx_elasticsearch.es09.util import aggregates, fix_es_stats, build_es_query
from jx_elasticsearch import post as es_post
# from jx_elasticsearch.es52.expressions import Variable
from jx_python.containers.cube import Cube
from jx_python.expressions import jx_expression_to_function
from mo_collections.matrix import Matrix
from mo_dots import listwrap, unwrap, literal_field
from mo_math import AND
def is_aggop(query):
if not query.edges:
return True
return False
def es_aggop(es, mvel, query):
select = listwrap(query.select)
FromES = build_es_query(query)
isSimple = AND(aggregates[s.aggregate] == "count" for s in select)
if isSimple:
return es_countop(es, query) # SIMPLE, USE TERMS FACET INSTEAD
value2facet = dict() # ONLY ONE FACET NEEDED PER
name2facet = dict() # MAP name TO FACET WITH STATS
for s in select:
if s.value not in value2facet:
if isinstance(s.value, Variable):
unwrap(FromES.facets)[s.name] = {
"statistical": {
"field": s.value.var
},
"facet_filter": query.where.to_esfilter()
}
else:
unwrap(FromES.facets)[s.name] = {
"statistical": {
"script": jx_expression_to_function(s.value)
},
"facet_filter": query.where.to_es_filter()
}
value2facet[s.value] = s.name
name2facet[s.name] = value2facet[s.value]
data = es_post(es, FromES, query.limit)
matricies = {s.name: Matrix(value=fix_es_stats(data.facets[literal_field(s.name)])[aggregates[s.aggregate]]) for s in select}
cube = Cube(query.select, [], matricies)
cube.frum = query
return cube
def es_countop(es, mvel, query):
"""
RETURN SINGLE COUNT
"""
select = listwrap(query.select)
FromES = build_es_query(query)
for s in select:
if is_variable_name(s.value):
FromES.facets[s.name] = {
"terms": {
"field": s.value,
"size": query.limit,
},
"facet_filter":{"exists":{"field":s.value}}
}
else:
# COMPLICATED value IS PROBABLY A SCRIPT, USE IT
FromES.facets[s.name] = {
"terms": {
"script_field": es09.expressions.compile_expression(s.value, query),
"size": 200000
}
}
data = es_post(es, FromES, query.limit)
matricies = {}
for s in select:
matricies[s.name] = Matrix(value=data.hits.facets[s.name].total)
cube = Cube(query.select, query.edges, matricies)
cube.frum = query
return cube

730
vendor/jx_elasticsearch/es09/expressions.py поставляемый
Просмотреть файл

@ -1,730 +0,0 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http:# mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
from __future__ import division
from __future__ import absolute_import
from collections import Mapping
from datetime import datetime
import re
from jx_base.queries import keyword_pattern
from mo_future import text_type
from pyLibrary import convert
from mo_collections import reverse
from mo_logs import Log
from mo_logs.strings import quote
from mo_math import Math
from mo_dots import split_field, Data, Null, join_field, coalesce, listwrap
from mo_times.durations import Duration
class _MVEL(object):
def __init__(self, fromData, isLean=False):
self.fromData = fromData
self.isLean = isLean
self.prefixMap = []
self.functions = {}
def code(self, query):
"""
RETURN THE MVEL THAT WILL FILTER USING query.where AND TERM-PACK THE query.select CLAUSE
"""
selectList = listwrap(query.select)
fromPath = query.frum.name # FIRST NAME IS THE INDEX
sourceVar = "__sourcedoc__"
whereClause = query.where
# PARSE THE fromPath
code = self.frum(fromPath, sourceVar, "__loop")
select = self.select(selectList, fromPath, "output", sourceVar)
body = "var output = \"\";\n" + \
code.replace(
"<CODE>",
"if (" + _where(whereClause, lambda v: self._translate(v)) + "){\n" +
select.body +
"}\n"
) + \
"output\n"
# ADD REFERENCED CONTEXT VARIABLES
context = self.getFrameVariables(body)
func = UID()
predef = addFunctions(select.head+context+body).head
param = "_source" if body.find(sourceVar) else ""
output = predef + \
select.head + \
context + \
'var ' + func + ' = function('+sourceVar+'){\n' + \
body + \
'};\n' + \
func + '('+param+')\n'
return Compiled(output)
def frum(self, fromPath, sourceVar, loopVariablePrefix):
"""
indexName NAME USED TO REFER TO HIGH LEVEL DOCUMENT
loopVariablePrefix PREFIX FOR LOOP VARIABLES
"""
loopCode = "if (<PATH> != null){ for(<VAR> : <PATH>){\n<CODE>\n}}\n"
self.prefixMap = []
code = "<CODE>"
path = split_field(fromPath)
# ADD LOCAL VARIABLES
columns = INDEX_CACHE[path[0]].columns
for i, c in enumerate(columns):
if c.name.find("\\.") >= 0:
self.prefixMap.insert(0, {
"path": c.name,
"variable": "get(" + sourceVar + ", \"" + c.name.replace("\\.", ".") + "\")"
})
else:
self.prefixMap.insert(0, {
"path": c.name,
"variable": sourceVar + ".?" + c.name
})
# ADD LOOP VARIABLES
currPath = []
# self.prefixMap.insert(0, {"path": path[0], "variable": path[0]})
for i, step in enumerate(path[1::]):
loopVariable = loopVariablePrefix + str(i)
currPath.append(step)
pathi = ".".join(currPath)
shortPath = self._translate(pathi)
self.prefixMap.insert(0, {"path": pathi, "variable": loopVariable})
loop = loopCode.replace("<VAR>", loopVariable).replace("<PATH>", shortPath)
code = code.replace("<CODE>", loop)
return code
def _translate(self, variableName):
shortForm = variableName
for p in self.prefixMap:
prefix = p["path"]
if shortForm == prefix:
shortForm = p["variable"]
else:
shortForm = replacePrefix(shortForm, prefix + ".", p["variable"] + ".?") # ADD NULL CHECK
shortForm = replacePrefix(shortForm, prefix + "[", p["variable"] + "[")
return shortForm
# CREATE A PIPE DELIMITED RESULT SET
def select(self, selectList, fromPath, varName, sourceVar):
path = split_field(fromPath)
is_deep = len(path) > 1
heads = []
list = []
for s in selectList:
if is_deep:
if s.value and is_variable_name(s.value):
shortForm = self._translate(s.value)
list.append("Value2Pipe(" + shortForm + ")\n")
else:
Log.error("do not know how to handle yet")
else:
if s.value and is_variable_name(s.value):
list.append("Value2Pipe(getDocValue(" + value2MVEL(s.value) + "))\n")
elif s.value:
shortForm = self._translate(s.value)
list.append("Value2Pipe(" + shortForm + ")\n")
else:
code, decode = self.Parts2Term(s.domain)
heads.append(code.head)
list.append("Value2Pipe(" + code.body + ")\n")
if len(split_field(fromPath)) > 1:
output = 'if (' + varName + ' != "") ' + varName + '+="|";\n' + varName + '+=' + '+"|"+'.join(["Value2Pipe("+v+")\n" for v in list]) + ';\n'
else:
output = varName + ' = ' + '+"|"+'.join(["Value2Pipe("+v+")\n" for v in list]) + ';\n'
return Data(
head="".join(heads),
body=output
)
def Parts2Term(self, domain):
"""
TERMS ARE ALWAYS ESCAPED SO THEY CAN BE COMPOUNDED WITH PIPE (|)
CONVERT AN ARRAY OF PARTS{name, esfilter} TO AN MVEL EXPRESSION
RETURN expression, function PAIR, WHERE
expression - MVEL EXPRESSION
function - TAKES RESULT OF expression AND RETURNS PART
"""
fields = domain.dimension.fields
term = []
if len(split_field(self.fromData.name)) == 1 and fields:
if isinstance(fields, Mapping):
# CONVERT UNORDERED FIELD DEFS
jx_fields, es_fields = transpose(*[(k, fields[k]) for k in sorted(fields.keys())])
else:
jx_fields, es_fields = transpose(*[(i, e) for i, e in enumerate(fields)])
# NO LOOPS BECAUSE QUERY IS SHALLOW
# DOMAIN IS FROM A DIMENSION, USE IT'S FIELD DEFS TO PULL
if len(es_fields) == 1:
def fromTerm(term):
return domain.getPartByKey(term)
return Data(
head="",
body='getDocValue('+quote(domain.dimension.fields[0])+')'
), fromTerm
else:
def fromTerm(term):
terms = [convert.pipe2value(t) for t in convert.pipe2value(term).split("|")]
candidate = dict(zip(jx_fields, terms))
for p in domain.partitions:
for k, t in candidate.items():
if p.value[k] != t:
break
else:
return p
if domain.type in ["uid", "default"]:
part = {"value": candidate}
domain.partitions.append(part)
return part
else:
return Null
for f in es_fields:
term.append('Value2Pipe(getDocValue('+quote(f)+'))')
return Data(
head="",
body='Value2Pipe('+('+"|"+'.join(term))+')'
), fromTerm
else:
for v in domain.partitions:
term.append("if (" + _where(v.esfilter, lambda x: self._translate(x)) + ") " + value2MVEL(domain.getKey(v)) + "; else ")
term.append(value2MVEL(domain.getKey(domain.NULL)))
func_name = "_temp"+UID()
return self.register_function("+\"|\"+".join(term))
def Parts2TermScript(self, domain):
code, decode = self.Parts2Term(domain)
func = addFunctions(code.head + code.body)
return func.head + code.head + code.body, decode
def getFrameVariables(self, body):
contextVariables = []
columns = self.fromData.columns
parentVarNames = set() # ALL PARENTS OF VARIABLES WITH "." IN NAME
body = body.replace(".?", ".")
for i, c in enumerate(columns):
j = body.find(c.name, 0)
while j >= 0:
s = j
j = body.find(c.name, s + 1)
test0 = body[s - 1: s + len(c.name) + 1:]
test3 = body[s - 8: s + len(c.name):]
if test0[:-1] == "\"" + c.name:
continue
if test3 == "_source." + c.name:
continue
def defParent(name):
# DO NOT MAKE THE SAME PARENT TWICE
if name in parentVarNames:
return
parentVarNames.add(name)
if len(split_field(name)) == 1:
contextVariables.append("Map " + name + " = new HashMap();\n")
else:
defParent(join_field(split_field(name)[0:-1]))
contextVariables.append(name + " = new HashMap();\n")
body = body.replace(c.name, "-"*len(c.name))
if self.isLean or c.useSource:
if len(split_field(c.name)) > 1:
defParent(join_field(split_field(c.name)[0:-1]))
contextVariables.append(c.name + " = getSourceValue(\"" + c.name + "\");\n")
else:
contextVariables.append(c.name + " = _source[\"" + c.name + "\"];\n")
else:
if len(split_field(c.name)) > 1:
defParent(join_field(split_field(c.name)[0:-1]))
contextVariables.append(c.name + " = getDocValue(\"" + c.name + "\");\n")
else:
contextVariables.append(c.name + " = getDocValue(\"" + c.name + "\");\n")
break
return "".join(contextVariables)
def compile_expression(self, expression, constants=None):
# EXPAND EXPRESSION WITH ANY CONSTANTS
expression = setValues(expression, constants)
fromPath = self.fromData.name # FIRST NAME IS THE INDEX
indexName = join_field(split_field(fromPath)[:1:])
context = self.getFrameVariables(expression)
if context == "":
return addFunctions(expression).head+expression
func = UID()
code = addFunctions(context+expression)
output = code.head + \
'var ' + func + ' = function(' + indexName + '){\n' + \
context + \
expression + ";\n" + \
'};\n' + \
func + '(_source)\n'
return Compiled(output)
def register_function(self, code):
for n, c in self.functions.items():
if c == code:
break
else:
n = "_temp" + UID()
self.functions[n] = code
return Data(
head='var ' + n + ' = function(){\n' + code + '\n};\n',
body=n + '()\n'
)
class Compiled(object):
def __init__(self, code):
self.code=code
def __str__(self):
return self.code
def __data__(self):
return self.code
__UID__ = 1000
def UID():
output = "_" + str(__UID__)
globals()["__UID__"] += 1
return output
def setValues(expression, constants):
if not constants:
return expression
constants = constants.copy()
# EXPAND ALL CONSTANTS TO PRIMITIVE VALUES (MVEL CAN ONLY ACCEPT PRIMITIVE VALUES)
for c in constants:
value = c.value
n = c.name
if len(split_field(n)) >= 3:
continue # DO NOT GO TOO DEEP
if isinstance(value, list):
continue # DO NOT MESS WITH ARRAYS
if isinstance(value, Mapping):
for k, v in value.items():
constants.append({"name": n + "." + k, "value": v})
for c in reverse(constants):# REVERSE ORDER, SO LONGER NAMES ARE TESTED FIRST
s = 0
while True:
s = expression.find(c.name, s)
if s == -1:
break
if re.match(r"\w", expression[s - 1]):
break
if re.match(r"\w", expression[s + len(c.name)]):
break
v = value2MVEL(c.value)
expression = expression[:s:] + "" + v + expression[:s + len(c.name):]
return expression
def unpack_terms(facet, selects):
# INTERPRET THE TERM-PACKED ES RESULTS AND RETURN DATA CUBE
# ASSUME THE .term IS JSON OBJECT WITH ARRAY OF RESULT OBJECTS
mod = len(selects)
output = []
for t in facet.terms:
if t.term == "":
continue # NO DATA
value = []
for i, v in enumerate(t.term.split("|")):
value.append(convert.pipe2value(v))
if ((i + 1) % mod) == 0:
value.append(t.count)
output.append(value)
value = []
return output
# PASS esFilter SIMPLIFIED ElasticSearch FILTER OBJECT
# RETURN MVEL EXPRESSION
def _where(esFilter, _translate):
if not esFilter or esFilter is True:
return "true"
keys = esFilter.keys()
if len(keys) != 1:
Log.error("Expecting only one filter aggregate")
op = keys[0]
if op == "and":
list = esFilter[op]
if not (list):
return "true"
if len(list) == 1:
return _where(list[0], _translate)
output = "(" + " && ".join(_where(l, _translate) for l in list) + ")"
return output
elif op == "or":
list = esFilter[op]
if not list:
return "false"
if len(list) == 1:
return _where(list[0], _translate)
output = "(" + " || ".join(_where(l, _translate) for l in list) + ")"
return output
elif op == "not":
return "!(" + _where(esFilter[op, _translate]) + ")"
elif op == "term":
pair = esFilter[op]
if len(pair.keys()) == 1:
return [_translate(k) + "==" + value2MVEL(v) for k, v in pair.items()][0]
else:
return "(" + " && ".join(_translate(k) + "==" + value2MVEL(v) for k, v in pair.items()) + ")"
elif op == "terms":
output = []
for variableName, valueList in esFilter[op].items():
if not valueList:
Log.error("Expecting something in 'terms' array")
if len(valueList) == 1:
output.append(_translate(variableName) + "==" + value2MVEL(valueList[0]))
else:
output.append("(" + " || ".join(_translate(variableName) + "==" + value2MVEL(v) for v in valueList) + ")")
return " && ".join(output)
elif op == "exists":
# "exists":{"field":"myField"}
pair = esFilter[op]
variableName = pair.field
return "(" + _translate(variableName) + "!=null)"
elif op == "missing":
fieldName = _translate(esFilter[op].field)
testExistence = coalesce(esFilter[op].existence, True)
testNull = coalesce(esFilter[op].null_value, True)
output = []
if testExistence and not testNull:
output.append("(" + fieldName.replace(".?", ".") + " == empty)") # REMOVE THE .? SO WE REFER TO THE FIELD, NOT GET THE VALUE
if testNull:
output.append("(" + fieldName + "==null)")
return " || ".join(output)
elif op == "range":
pair = esFilter[op]
ranges = []
for variableName, r in pair.items():
if r.gte:
ranges.append(value2MVEL(r.gte) + "<=" + _translate(variableName))
elif r.gt:
ranges.append(value2MVEL(r.gt) + "<" + _translate(variableName))
elif r["from"]:
if r.include_lower == None or r.include_lower:
ranges.append(value2MVEL(r["from"]) + "<=" + _translate(variableName))
else:
ranges.append(value2MVEL(r["from"]) + "<" + _translate(variableName))
if r.lte:
ranges.append(value2MVEL(r.lte) + ">=" + _translate(variableName))
elif r.lt:
ranges.append(value2MVEL(r.lt) + ">" + _translate(variableName))
elif r["from"]:
if r.include_lower == None or r.include_lower:
ranges.append(value2MVEL(r["from"]) + ">=" + _translate(variableName))
else:
ranges.append(value2MVEL(r["from"]) + ">" + _translate(variableName))
return "("+" && ".join(ranges)+")"
elif op == "script":
script = esFilter[op].script
return _translate(script)
elif op == "prefix":
pair = esFilter[op]
variableName, value = pair.items()[0]
return _translate(variableName) + ".startsWith(" + quote(value) + ")"
elif op == "match_all":
return "true"
else:
Log.error("'" + op + "' is an unknown aggregate")
return ""
VAR_CHAR = "abcdefghijklmnopqurstvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_.\""
def value2MVEL(value):
"""
FROM PYTHON VALUE TO MVEL EQUIVALENT
"""
if isinstance(value, datetime):
return str(convert.datetime2milli(value)) + " /*" + value.format("yyNNNdd HHmmss") + "*/" # TIME
if isinstance(value, Duration):
return str(convert.timedelta2milli(value)) + " /*" + str(value) + "*/" # DURATION
if Math.is_number(value):
return str(value)
return quote(value)
# FROM PYTHON VALUE TO ES QUERY EQUIVALENT
def value2query(value):
if isinstance(value, datetime):
return convert.datetime2milli(value)
if isinstance(value, Duration):
return value.milli
if Math.is_number(value):
return value
return quote(value)
def value2value(value):
"""
CONVERT FROM PYTHON VALUE TO ES EQUIVALENT
"""
if isinstance(value, datetime):
return convert.datetime2milli(value)
if isinstance(value, Duration):
return value.milli # DURATION
return value
def addFunctions(mvel):
"""
PREPEND THE REQUIRED MVEL FUNCTIONS TO THE CODE
"""
isAdded = Data() # SOME FUNCTIONS DEPEND ON OTHERS
head=[]
body=mvel
keepAdding = True
while keepAdding:
keepAdding = False
for func_name, func_code in FUNCTIONS.items():
if isAdded[func_name]:
continue
if mvel.find(func_name) == -1:
continue
keepAdding = True
isAdded[func_name] = func_code
head.append(func_code)
mvel = func_code + mvel
return Data(
head="".join(head),
body=body
)
FUNCTIONS = {
"String2Quote":
"var String2Quote = function(str){\n" +
"if (!(str is String)){ str; }else{\n" + # LAST VALUE IS RETURNED. "return" STOPS EXECUTION COMPLETELY!
"" + value2MVEL("\"") + "+" +
"str.replace(" + value2MVEL("\\") + "," + value2MVEL("\\\\") +
").replace(" + value2MVEL("\"") + "," + value2MVEL("\\\"") +
").replace(" + value2MVEL("\'") + "," + value2MVEL("\\\'") + ")+" +
value2MVEL("\"") + ";\n" +
"}};\n",
"Value2Pipe":
'var Value2Pipe = function(value){\n' + # SPACES ARE IMPORTANT BETWEEN "=".
"if (value==null){ \"0\" }else " +
"if (value is ArrayList || value is org.elasticsearch.common.mvel2.util.FastList){" +
"var out = \"\";\n" +
"foreach (v : value) out = (out==\"\") ? v : out + \"|\" + Value2Pipe(v);\n" +
"'a'+Value2Pipe(out);\n" +
"}else \n" +
"if (value is Long || value is Integer || value is Double){ 'n'+value; }else \n" +
"if (!(value is String)){ 's'+value.getClass().getName(); }else \n" +
'"s"+value.replace("\\\\", "\\\\\\\\").replace("|", "\\\\p");' + # CAN NOT value TO MAKE NUMBER A STRING (OR EVEN TO PREPEND A STRING!)
"};\n",
# "replaceAll":
# "var replaceAll = function(output, find, replace){\n" +
# "if (output.length()==0) return output;\n"+
# "s = output.indexOf(find, 0);\n" +
# "while(s>=0){\n" +
# "output=output.replace(find, replace);\n" +
# "s=s-find.length()+replace.length();\n" +
# "s = output.indexOf(find, s);\n" +
# "}\n"+
# "output;\n"+
# '};\n',
"floorDay":
"var floorDay = function(value){ Math.floor(value/86400000))*86400000;};\n",
"floorInterval":
"var floorInterval = function(value, interval){ Math.floor((double)value/(double)interval)*interval;};\n",
"maximum": # JUST BECAUSE MVEL'S MAX ONLY USES MAX(int, int). G*DDA*NIT!
"var maximum = function(a, b){if (a==null) b; else if (b==null) a; else if (a>b) a; else b;\n};\n",
"minimum": # JUST BECAUSE MVEL'S MAX ONLY USES MAX(int, int). G*DDA*NIT!
"var minimum = function(a, b){if (a==null) b; else if (b==null) a; else if (a<b) a; else b;\n};\n",
"coalesce": # PICK FIRST NOT-NULL VALUE
"var coalesce = function(a, b){if (a==null) b; else a; \n};\n",
"zero2null": # ES MAKES IT DIFFICULT TO DETECT NULL/MISSING VALUES, BUT WHEN DEALING WITH NUMBERS, ES DEFAULTS TO RETURNING ZERO FOR missing VALUES!!
"var zero2null = function(a){if (a==0) null; else a; \n};\n",
"get": # MY OWN PERSONAL *FU* TO THE TWISTED MVEL PROPERTY ACCESS
"var get = function(hash, key){\n" +
"if (hash==null) null; else hash[key];\n" +
"};\n",
"isNumeric":
"var isNumeric = function(value){\n" +
"value = value + \"\";\n" +
# "try{ value-0; }catch(e){ 0; }"+
"var isNum = value.length()>0;\n" +
"for (v : value.toCharArray()){\n" +
"if (\"0123456789\".indexOf(v)==-1) isNum = false;\n" +
"};\n" +
"isNum;\n" +
"};\n",
"alpha2zero":
"var alpha2zero = function(value){\n" +
"var output = 0;\n" +
"if (isNumeric(value)) output = value-0;\n" +
"return output;" +
"};\n",
# KANBAN SOFTWARE
# CAN SEE QUEUE BLOCKAGES AND SEE SINGLE BLOCKERS
"concat":
"var concat = function(array){\n" +
"if (array==null) \"\"; else {\n" +
"var output = \"\";\n" +
"for (v : array){ output = output+\"|\"+v+\"|\"; };\n" +
"output;\n" +
"}};\n",
# "contains":
# "var contains = function(array, value){\n"+
# "if (array==null) false; else {\n"+
# "var good = false;\n"+
# "for (v : array){ if (v==value) good=true; };\n"+
# "good;\n"+
# "}};\n",
"getFlagValue": # SPECIFICALLY FOR cf_* FLAGS: CONCATENATE THE ATTRIBUTE NAME WITH ATTRIBUTE VALUE, IF EXISTS
"var getFlagValue = function(name){\n" +
"if (_source[name]!=null)" +
"\" \"+name+_source[name];\n" +
"else \n" +
"\"\";\n" +
"};\n",
"getDocValue":
"var getDocValue = function(name){\n" +
"var out = [];\n" +
"var v = doc[name];\n" +
# "if (v is org.elasticsearch.common.mvel2.ast.Function) v = v();=n" +
"if (v==null || v.value==null) { null; } else\n" +
"if (v.values.size()<=1){ v.value; } else\n" + # ES MAKES NO DISTINCTION BETWEEN v or [v], SO NEITHER DO I
"{for(k : v.values) out.add(k); out;}" +
"};\n",
"getSourceValue":
"var getSourceValue = function(name){\n" +
"var out = [];\n" +
"var v = _source[name];\n" +
# "if (v is org.elasticsearch.common.mvel2.ast.Function) v = v();=n" +
"if (v==null) { null; } else\n" +
"if (v[\"values\"]==null || v.values.size()<=1){ v.value; } else {\n" + # ES MAKES NO DISTINCTION BETWEEN v or [v], SO NEITHER DO I
"for(k : v) out.add(k); out;\n" + # .size() MUST BE USED INSTEAD OF .length, THE LATTER WILL CRASH IF JITTED (https://github.com/elasticsearch/elasticsearch/issues/3094)
"}};\n",
"getDocArray":
"var getDocArray = function(name){\n" +
"var out = [];\n" +
"var v = doc[name];\n" +
"if (v!=null && v.value!=null) for(k : v.values) out.add(k);" +
"out;" +
"};\n",
"milli2Month":
"var milli2Month = function(value, milliOffset){\n" +
"g=new java.util.GregorianCalendar(new java.util.SimpleTimeZone(0, \"GMT\"));\n" +
"g.setTimeInMillis(value);\n" +
"g.add(java.util.GregorianCalendar.MILLISECOND, -milliOffset);\n" +
"m = g.get(java.util.GregorianCalendar.MONTH);\n" +
"output = \"\"+g.get(java.util.GregorianCalendar.YEAR)+(m>9?\"\":\"0\")+m;\n" +
"output;\n" +
"};\n",
"between":
"var between = function(value, prefix, suffix){\n" +
"if (value==null){ null; }else{\n" +
"var start = value.indexOf(prefix, 0);\n" +
"if (start==-1){ null; }else{\n" +
"var end = value.indexOf(suffix, start+prefix.length());\n" +
"if (end==-1){ null; }else{\n" +
"value.substring(start+prefix.length(), end);\n" +
"}}}\n" +
"};\n"
}
def replacePrefix(value, prefix, new_prefix):
try:
if value.startswith(prefix):
return new_prefix+value[len(prefix)::]
return value
except Exception as e:
Log.error("can not replace prefix", e)

248
vendor/jx_elasticsearch/es09/setop.py поставляемый
Просмотреть файл

@ -1,248 +0,0 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http:# mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from collections import Mapping
from jx_base import domains
from jx_base.expressions import TRUE, jx_expression, Variable, LeavesOp
from jx_base.queries import is_variable_name
from jx_elasticsearch import es09
from jx_elasticsearch.es09.expressions import unpack_terms
from jx_elasticsearch.es09.util import aggregates
from jx_elasticsearch import post as es_post
from jx_python.containers.cube import Cube
from mo_collections.matrix import Matrix
from mo_dots import coalesce, split_field, Data, wrap
from mo_dots import listwrap, unwrap
from mo_dots.lists import FlatList
from mo_logs import Log
from mo_math import AND, SUM, OR
def is_fieldop(query):
# THESE SMOOTH EDGES REQUIRE ALL DATA (SETOP)
select = listwrap(query.select)
if not query.edges:
isDeep = len(split_field(query.frum.name)) > 1 # LOOKING INTO NESTED WILL REQUIRE A SCRIPT
isSimple = AND(s.value != None and (s.value == "*" or is_variable_name(s.value)) for s in select)
noAgg = AND(s.aggregate == "none" for s in select)
if not isDeep and isSimple and noAgg:
return True
else:
isSmooth = AND((e.domain.type in domains.ALGEBRAIC and e.domain.interval == "none") for e in query.edges)
if isSmooth:
return True
return False
def es_fieldop(es, query):
FromES = es09.util.build_es_query(query)
select = listwrap(query.select)
FromES.query = {
"bool": {
"query": {
"match_all": {}
},
"filter": jx_expression(query.where).to_esfilter()
}
}
FromES.size = coalesce(query.limit, 200000)
FromES.fields = FlatList()
for s in select.value:
if s == "*":
FromES.fields = None
elif isinstance(s, list):
FromES.fields.extend(s)
elif isinstance(s, Mapping):
FromES.fields.extend(s.values())
else:
FromES.fields.append(s)
FromES.sort = [{s.field: "asc" if s.sort >= 0 else "desc"} for s in query.sort]
data = es_post(es, FromES, query.limit)
T = data.hits.hits
matricies = {}
for s in select:
if s.value == "*":
matricies[s.name] = Matrix.wrap([t._source for t in T])
elif isinstance(s.value, Mapping):
# for k, v in s.value.items():
# matricies[join_field(split_field(s.name)+[k])] = Matrix.wrap([unwrap(t.fields)[v] for t in T])
matricies[s.name] = Matrix.wrap([{k: unwrap(t.fields).get(v, None) for k, v in s.value.items()}for t in T])
elif isinstance(s.value, list):
matricies[s.name] = Matrix.wrap([tuple(unwrap(t.fields).get(ss, None) for ss in s.value) for t in T])
elif not s.value:
matricies[s.name] = Matrix.wrap([unwrap(t.fields).get(s.value, None) for t in T])
else:
try:
matricies[s.name] = Matrix.wrap([unwrap(t.fields).get(s.value, None) for t in T])
except Exception as e:
Log.error("", e)
cube = Cube(query.select, query.edges, matricies, frum=query)
cube.frum = query
return cube
def is_setop(query):
select = listwrap(query.select)
if not query.edges:
isDeep = len(split_field(query.frum.name)) > 1 # LOOKING INTO NESTED WILL REQUIRE A SCRIPT
simpleAgg = AND([s.aggregate in ("count", "none") for s in select]) # CONVERTING esfilter DEFINED PARTS WILL REQUIRE SCRIPT
# NO EDGES IMPLIES SIMPLER QUERIES: EITHER A SET OPERATION, OR RETURN SINGLE AGGREGATE
if simpleAgg or isDeep:
return True
else:
isSmooth = AND((e.domain.type in domains.ALGEBRAIC and e.domain.interval == "none") for e in query.edges)
if isSmooth:
return True
return False
def es_setop(es, mvel, query):
FromES = es09.util.build_es_query(query)
select = listwrap(query.select)
isDeep = len(split_field(query.frum.name)) > 1 # LOOKING INTO NESTED WILL REQUIRE A SCRIPT
isComplex = OR([s.value == None and s.aggregate not in ("count", "none") for s in select]) # CONVERTING esfilter DEFINED PARTS WILL REQUIRE SCRIPT
if not isDeep and not isComplex:
if len(select) == 1 and isinstance(select[0].value, LeavesOp):
FromES = wrap({
"query": {"bool": {
"query": {"match_all": {}},
"filter": query.where.to_esfilter()
}},
"sort": query.sort,
"size": 0
})
elif all(isinstance(v, Variable) for v in select.value):
FromES = wrap({
"query": {"bool": {
"query": {"match_all": {}},
"filter": query.where.to_esfilter()
}},
"fields": select.value,
"sort": query.sort,
"size": coalesce(query.limit, 200000)
})
elif not isDeep:
simple_query = query.copy()
simple_query.where = TRUE # THE FACET FILTER IS FASTER
FromES.facets.mvel = {
"terms": {
"script_field": mvel.code(simple_query),
"size": coalesce(simple_query.limit, 200000)
},
"facet_filter": jx_expression(query.where).to_esfilter()
}
else:
FromES.facets.mvel = {
"terms": {
"script_field": mvel.code(query),
"size": coalesce(query.limit, 200000)
},
"facet_filter": jx_expression(query.where).to_esfilter()
}
data = es_post(es, FromES, query.limit)
if len(select) == 1 and isinstance(select[0].value, LeavesOp):
# SPECIAL CASE FOR SINGLE COUNT
cube = wrap(data).hits.hits._source
elif isinstance(select[0].value, Variable):
# SPECIAL CASE FOR SINGLE TERM
cube = wrap(data).hits.hits.fields
else:
data_list = unpack_terms(data.facets.mvel, select)
if not data_list:
cube = Cube(select, [], {s.name: Matrix.wrap([]) for s in select})
else:
output = transpose(*data_list)
cube = Cube(select, [], {s.name: Matrix(list=output[i]) for i, s in enumerate(select)})
return Data(
meta={"esquery": FromES},
data=cube
)
def is_deep(query):
select = listwrap(query.select)
if len(select) > 1:
return False
if aggregates[select[0].aggregate] not in ("none", "count"):
return False
if len(query.edges)<=1:
return False
isDeep = len(split_field(query["from"].name)) > 1 # LOOKING INTO NESTED WILL REQUIRE A SCRIPT
if not isDeep:
return False # BETTER TO USE TERM QUERY
return True
def es_deepop(es, mvel, query):
FromES = es09.util.build_es_query(query)
select = query.edges
temp_query = query.copy()
temp_query.select = select
temp_query.edges = FlatList()
FromES.facets.mvel = {
"terms": {
"script_field": mvel.code(temp_query),
"size": query.limit
},
"facet_filter": jx_expression(query.where).to_esfilter()
}
data = es_post(es, FromES, query.limit)
rows = unpack_terms(data.facets.mvel, query.edges)
terms = transpose(*rows)
# NUMBER ALL EDGES FOR JSON EXPRESSION INDEXING
edges = query.edges
for f, e in enumerate(edges):
for r in terms[f]:
e.domain.getPartByKey(r)
e.index = f
for p, part in enumerate(e.domain.partitions):
part.dataIndex = p
e.domain.NULL.dataIndex = len(e.domain.partitions)
# MAKE CUBE
dims = [len(e.domain.partitions) for e in query.edges]
output = Matrix(*dims)
# FILL CUBE
for r in rows:
term_coord = [e.domain.getPartByKey(r[i]).dataIndex for i, e in enumerate(edges)]
output[term_coord] = SUM(output[term_coord], r[-1])
cube = Cube(query.select, query.edges, {query.select.name: output})
cube.frum = query
return cube

152
vendor/jx_elasticsearch/es09/terms.py поставляемый
Просмотреть файл

@ -1,152 +0,0 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http:# mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from jx_elasticsearch.es09.util import aggregates, build_es_query, compileEdges2Term
from jx_elasticsearch import post as es_post
from jx_python import jx
from jx_python.containers.cube import Cube
from mo_collections.matrix import Matrix
from mo_dots import coalesce
from mo_dots import wrap, listwrap
from mo_dots.lists import FlatList
from mo_math import AND
def is_terms(query):
select = listwrap(query.select)
isSimple = not query.select or AND(aggregates[s.aggregate] in ("none", "count") for s in select)
if isSimple:
return True
return False
def es_terms(es, mvel, query):
"""
RETURN LIST OF ALL EDGE QUERIES
EVERY FACET IS NAMED <select.name>, <c1>, ... <cN> WHERE <ci> ARE THE ELEMENT COORDINATES
WE TRY TO PACK DIMENSIONS INTO THE TERMS TO MINIMIZE THE CROSS-PRODUCT EXPLOSION
"""
if len(query.edges) == 2:
return _es_terms2(es, mvel, query)
select = listwrap(query.select)
FromES = build_es_query(query)
packed_term = compileEdges2Term(mvel, query.edges, wrap([]))
for s in select:
FromES.facets[s.name] = {
"terms": {
"field": packed_term.field,
"script_field": packed_term.expression,
"size": coalesce(query.limit, 200000)
},
"facet_filter": simplify_esfilter(query.where)
}
term2Parts = packed_term.term2parts
data = es_post(es, FromES, query.limit)
# GETTING ALL PARTS WILL EXPAND THE EDGES' DOMAINS
# BUT HOW TO UNPACK IT FROM THE term FASTER IS UNKNOWN
for k, f in data.facets.items():
for t in f.terms:
term2Parts(t.term)
# NUMBER ALL EDGES FOR jx INDEXING
for f, e in enumerate(query.edges):
e.index = f
if e.domain.type in ["uid", "default"]:
# e.domain.partitions = jx.sort(e.domain.partitions, "value")
for p, part in enumerate(e.domain.partitions):
part.dataIndex = p
e.domain.NULL.dataIndex = len(e.domain.partitions)
# MAKE CUBE
output = {}
dims = [len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges]
for s in select:
output[s.name] = Matrix(*dims)
# FILL CUBE
# EXPECTING ONLY SELECT CLAUSE FACETS
for facetName, facet in data.facets.items():
for term in facet.terms:
term_coord = term2Parts(term.term).dataIndex
for s in select:
try:
output[s.name][term_coord] = term[aggregates[s.aggregate]]
except Exception as e:
# USUALLY CAUSED BY output[s.name] NOT BEING BIG ENOUGH TO HANDLE NULL COUNTS
pass
cube = Cube(query.select, query.edges, output)
cube.query = query
return cube
def _es_terms2(es, mvel, query):
"""
WE ASSUME THERE ARE JUST TWO EDGES, AND EACH HAS A SIMPLE value
"""
# REQUEST VALUES IN FIRST DIMENSION
q1 = query.copy()
q1.edges = query.edges[0:1:]
values1 = es_terms(es, mvel, q1).edges[0].domain.partitions.value
select = listwrap(query.select)
FromES = build_es_query(query)
for s in select:
for i, v in enumerate(values1):
FromES.facets[s.name + "," + str(i)] = {
"terms": {
"field": query.edges[1].value,
"size": coalesce(query.limit, 200000)
},
"facet_filter": simplify_esfilter({"and": [
query.where,
{"term": {query.edges[0].value: v}}
]})
}
data = es_post(es, FromES, query.limit)
# UNION ALL TERMS FROM SECOND DIMENSION
values2 = set()
for k, f in data.facets.items():
values2.update(f.terms.term)
values2 = jx.sort(values2)
term2index = {v: i for i, v in enumerate(values2)}
query.edges[1].domain.partitions = FlatList([{"name": v, "value": v} for v in values2])
# MAKE CUBE
output = {}
dims = [len(values1), len(values2)]
for s in select:
output[s.name] = Matrix(*dims)
# FILL CUBE
# EXPECTING ONLY SELECT CLAUSE FACETS
for facetName, facet in data.facets.items():
coord = facetName.split(",")
s = [s for s in select if s.name == coord[0]][0]
i1 = int(coord[1])
for term in facet.terms:
i2 = term2index[term.term]
output[s.name][(i1, i2)] = term[aggregates[s.aggregate]]
cube = Cube(query.select, query.edges, output)
cube.query = query
return cube

337
vendor/jx_elasticsearch/es09/terms_stats.py поставляемый
Просмотреть файл

@ -1,337 +0,0 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http:# mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from jx_base.queries import is_variable_name
from jx_elasticsearch.es09.expressions import UID
from jx_elasticsearch.es09.util import aggregates, build_es_query, compileEdges2Term
from jx_python import domains
from jx_python import es09
from jx_python.containers.cube import Cube
from jx_python.expressions import simplify_esfilter
from mo_collections.matrix import Matrix
from mo_dots import literal_field, coalesce
from mo_dots import wrap, listwrap
from mo_dots.lists import FlatList
from mo_logs import Log
from mo_math import COUNT, PRODUCT
def is_terms_stats(query):
# ONLY ALLOWED ONE UNKNOWN DOMAIN
num_unknown = COUNT(1 for e in query.edges if e.domain.type not in domains.KNOWN)
if num_unknown <= 1:
if query.sort:
Log.error("terms_stats can not be sorted")
return True
return False
def es_terms_stats(esq, mvel, query):
select = listwrap(query.select)
facetEdges = [] # EDGES THAT WILL REQUIRE A FACET FOR EACH PART
termsEdges = FlatList()
specialEdge = None
special_index = -1
# A SPECIAL EDGE IS ONE THAT HAS AN UNDEFINED NUMBER OF PARTITIONS AT QUERY TIME
# FIND THE specialEdge, IF ONE
for f, tedge in enumerate(query.edges):
if tedge.domain.type in domains.KNOWN:
for p, part in enumerate(tedge.domain.partitions):
part.dataIndex = p
# FACETS ARE ONLY REQUIRED IF SQL JOIN ON DOMAIN IS REQUIRED (RANGE QUERY)
# OR IF WE ARE NOT SIMPLY COUNTING
# OR IF NO SCRIPTING IS ALLOWED (SOME OTHER CODE IS RESPONSIBLE FOR SETTING isFacet)
# OR IF WE JUST WANT TO FORCE IT :)
# OF COURSE THE default EDGE IS NOT EXPLICIT, SO MUST BE A TERM
facetEdges.append(tedge)
else:
if specialEdge:
Log.error("There is more than one open-ended edge: self can not be handled")
specialEdge = tedge
special_index = f
termsEdges.append(tedge)
if not specialEdge:
# WE SERIOUSLY WANT A SPECIAL EDGE, OTHERWISE WE WILL HAVE TOO MANY FACETS
# THE BIGGEST EDGE MAY BE COLLAPSED TO A TERM, MAYBE?
num_parts = 0
special_index = -1
for i, e in enumerate(facetEdges):
l = len(e.domain.partitions)
if ((e.value and is_variable_name(e.value)) or len(e.domain.dimension.fields) == 1) and l > num_parts:
num_parts = l
specialEdge = e
special_index = i
facetEdges.pop(special_index)
termsEdges.append(specialEdge)
total_facets = PRODUCT(len(f.domain.partitions) for f in facetEdges)*len(select)
if total_facets > 100:
# WE GOT A PROBLEM, LETS COUNT THE SIZE OF REALITY:
counts = esq.query({
"from": query.frum,
"select": {"aggregate": "count"},
"edges": facetEdges,
"where": query.where,
"limit": query.limit
})
esFacets = []
def add_facet(value, parts, cube):
if value:
esFacets.append(parts)
counts["count"].forall(add_facet)
Log.note("{{theory_count}} theoretical combinations, {{real_count}} actual combos found", real_count= len(esFacets), theory_count=total_facets)
if not esFacets:
# MAKE EMPTY CUBE
matricies = {}
dims = [len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges]
for s in select:
matricies[s.name] = Matrix(*dims)
cube = Cube(query.select, query.edges, matricies)
cube.frum = query
return cube
else:
# GENERATE ALL COMBOS
esFacets = getAllEdges(facetEdges)
calcTerm = compileEdges2Term(mvel, termsEdges, FlatList())
term2parts = calcTerm.term2parts
if len(esFacets) * len(select) > 1000:
Log.error("not implemented yet") # WE HAVE SOME SERIOUS PERMUTATIONS, WE MUST ISSUE MULTIPLE QUERIES
pass
FromES = build_es_query(query)
for s in select:
for parts in esFacets:
condition = FlatList()
constants = FlatList()
name = [literal_field(s.name)]
for f, fedge in enumerate(facetEdges):
name.append(str(parts[f].dataIndex))
condition.append(buildCondition(mvel, fedge, parts[f]))
constants.append({"name": fedge.domain.name, "value": parts[f]})
condition.append(query.where)
name = ",".join(name)
FromES.facets[name] = {
"terms_stats": {
"key_field": calcTerm.field,
"value_field": s.value if is_variable_name(s.value) else None,
"value_script": mvel.compile_expression(s.value) if not is_variable_name(s.value) else None,
"size": coalesce(query.limit, 200000)
}
}
if condition:
FromES.facets[name].facet_filter = simplify_esfilter({"and": condition})
data = es_post(esq.es, FromES, query.limit)
if specialEdge.domain.type not in domains.KNOWN:
# WE BUILD THE PARTS BASED ON THE RESULTS WE RECEIVED
partitions = FlatList()
map = {}
for facetName, parts in data.facets.items():
for stats in parts.terms:
if not map[stats]:
part = {"value": stats, "name": stats}
partitions.append(part)
map[stats] = part
partitions.sort(specialEdge.domain.compare)
for p, part in enumerate(partitions):
part.dataIndex = p
specialEdge.domain.map = map
specialEdge.domain.partitions = partitions
# MAKE CUBE
matricies = {}
dims = [len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges]
for s in select:
matricies[s.name] = Matrix(*dims)
name2agg = {s.name: aggregates[s.aggregate] for s in select}
# FILL CUBE
for edgeName, parts in data.facets.items():
temp = edgeName.split(",")
pre_coord = tuple(int(c) for c in temp[1:])
sname = temp[0]
for stats in parts.terms:
if specialEdge:
special = term2parts(stats.term)[0]
coord = pre_coord[:special_index]+(special.dataIndex, )+pre_coord[special_index:]
else:
coord = pre_coord
matricies[sname][coord] = stats[name2agg[sname]]
cube = Cube(query.select, query.edges, matricies)
cube.frum = query
return cube
def register_script_field(FromES, code):
if not FromES.script_fields:
FromES.script_fields = {}
# IF CODE IS IDENTICAL, THEN USE THE EXISTING SCRIPT
for n, c in FromES.script_fields.items():
if c.script == code:
return n
name = "script" + UID()
FromES.script_fields[name].script = code
return name
def getAllEdges(facetEdges):
if not facetEdges:
return [()]
return _getAllEdges(facetEdges, 0)
def _getAllEdges(facetEdges, edgeDepth):
"""
RETURN ALL PARTITION COMBINATIONS: A LIST OF ORDERED TUPLES
"""
if edgeDepth == len(facetEdges):
return [()]
edge = facetEdges[edgeDepth]
deeper = _getAllEdges(facetEdges, edgeDepth + 1)
output = FlatList()
partitions = edge.domain.partitions
for part in partitions:
for deep in deeper:
output.append((part,) + deep)
return output
def buildCondition(mvel, edge, partition):
"""
RETURN AN ES FILTER OBJECT
"""
output = {}
if edge.domain.isFacet:
# MUST USE THIS' esFacet
condition = wrap(coalesce(partition.where, {"and": []}))
if partition.min and partition.max and is_variable_name(edge.value):
condition["and"].append({
"range": {edge.value: {"gte": partition.min, "lt": partition.max}}
})
# ES WILL FREAK OUT IF WE SEND {"not":{"and":x}} (OR SOMETHING LIKE THAT)
return simplify_esfilter(condition)
elif edge.range:
# THESE REALLY NEED FACETS TO PERFORM THE JOIN-TO-DOMAIN
# USE MVEL CODE
if edge.domain.type in domains.ALGEBRAIC:
output = {"and": []}
if edge.range.mode and edge.range.mode == "inclusive":
# IF THE range AND THE partition OVERLAP, THEN MATCH IS MADE
if is_variable_name(edge.range.min):
output["and"].append({"range": {edge.range.min: {"lt": es09.expressions.value2value(partition.max)}}})
else:
# WHOA!! SUPER SLOW!!
output["and"].append({"script": {"script": mvel.compile_expression(
edge.range.min + " < " + es09.expressions.value2MVEL(partition.max)
)}})
if is_variable_name(edge.range.max):
output["and"].append({"or": [
{"missing": {"field": edge.range.max}},
{"range": {edge.range.max, {"gt": es09.expressions.value2value(partition.min)}}}
]})
else:
# WHOA!! SUPER SLOW!!
output["and"].append({"script": {"script": mvel.compile_expression(
edge.range.max + " > " + es09.expressions.value2MVEL(partition.min))}})
else:
# SNAPSHOT - IF range INCLUDES partition.min, THEN MATCH IS MADE
if is_variable_name(edge.range.min):
output["and"].append({"range": {edge.range.min: {"lte": es09.expressions.value2value(partition.min)}}})
else:
# WHOA!! SUPER SLOW!!
output["and"].append({"script": {"script": mvel.compile_expression(
edge.range.min + "<=" + es09.expressions.value2MVEL(partition.min)
)}})
if is_variable_name(edge.range.max):
output["and"].append({"or": [
{"missing": {"field": edge.range.max}},
{"range": {edge.range.max, {"gte": es09.expressions.value2value(partition.min)}}}
]})
else:
# WHOA!! SUPER SLOW!!
output["and"].append({"script": {"script": mvel.compile_expression(
es09.expressions.value2MVEL(partition.min) + " <= " + edge.range.max
)}})
return output
else:
Log.error("Do not know how to handle range query on non-continuous domain")
elif not edge.value:
# MUST USE THIS' esFacet, AND NOT(ALL THOSE ABOVE)
return partition.esfilter
elif is_variable_name(edge.value):
# USE FAST ES SYNTAX
if edge.domain.type in domains.ALGEBRAIC:
output.range = {}
output.range[edge.value] = {"gte": es09.expressions.value2query(partition.min), "lt": es09.expressions.value2query(partition.max)}
elif edge.domain.type == "set":
if partition.value:
if partition.value != edge.domain.getKey(partition):
Log.error("please ensure the key attribute of the domain matches the value attribute of all partitions, if only because we are now using the former")
# DEFAULT TO USING THE .value ATTRIBUTE, IF ONLY BECAUSE OF LEGACY REASONS
output.term = {edge.value: partition.value}
else:
output.term = {edge.value: edge.domain.getKey(partition)}
elif edge.domain.type == "default":
output.term = dict()
output.term[edge.value] = partition.value
else:
Log.error("Edge \"" + edge.name + "\" is not supported")
return output
else:
# USE MVEL CODE
if edge.domain.type in domains.ALGEBRAIC:
output.script = {"script": edge.value + ">=" + es09.expressions.value2MVEL(partition.min) + " and " + edge.value + "<" + es09.expressions.value2MVEL(partition.max)}
else:
output.script = {"script": "( " + edge.value + " ) ==" + es09.expressions.value2MVEL(partition.value)}
code = es09.expressions.addFunctions(output.script.script)
output.script.script = code.head + code.body
return output

355
vendor/jx_elasticsearch/es09/util.py поставляемый
Просмотреть файл

@ -1,355 +0,0 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http:# mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from datetime import datetime
from jx_base.queries import is_variable_name
from mo_logs.strings import quote
from mo_logs import Log, strings
from mo_dots import Data
from mo_dots import coalesce
from mo_dots import wrap
from mo_dots.lists import FlatList
from pyLibrary import convert
from mo_math import COUNT
from mo_math import Math
from mo_math import stats
from jx_base import domains
from jx_elasticsearch.es09.expressions import value2MVEL
from mo_times import durations
DEBUG = False
def build_es_query(query):
output = wrap({
"query": {"match_all": {}},
"from": 0,
"size": 100 if DEBUG else 0,
"sort": [],
"facets": {
}
})
if DEBUG:
# TO LIMIT RECORDS TO WHAT'S IN FACETS
output.query = {
"bool": {
"query": {
"match_all": {}
},
"filter": query.where.to_esfilter()
}
}
return output
def compileTime2Term(edge):
"""
RETURN MVEL CODE THAT MAPS TIME AND DURATION DOMAINS DOWN TO AN INTEGER AND
AND THE JAVASCRIPT THAT WILL TURN THAT INTEGER BACK INTO A PARTITION (INCLUDING NULLS)
"""
if edge.esscript:
Log.error("edge script not supported yet")
# IS THERE A LIMIT ON THE DOMAIN?
numPartitions = len(edge.domain.partitions)
value = edge.value
if is_variable_name(value):
value = "doc[\"" + value + "\"].value"
nullTest = compileNullTest(edge)
ref = coalesce(edge.domain.min, edge.domain.max, datetime(2000, 1, 1))
if edge.domain.interval.month > 0:
offset = ref.subtract(ref.floorMonth(), durations.DAY).milli
if offset > durations.DAY.milli * 28:
offset = ref.subtract(ref.ceilingMonth(), durations.DAY).milli
partition2int = "milli2Month(" + value + ", " + value2MVEL(offset) + ")"
partition2int = "((" + nullTest + ") ? 0 : " + partition2int + ")"
def int2Partition(value):
if Math.round(value) == 0:
return edge.domain.NULL
d = datetime(str(value)[:4:], str(value)[-2:], 1)
d = d.addMilli(offset)
return edge.domain.getPartByKey(d)
else:
partition2int = "Math.floor((" + value + "-" + value2MVEL(ref) + ")/" + edge.domain.interval.milli + ")"
partition2int = "((" + nullTest + ") ? " + numPartitions + " : " + partition2int + ")"
def int2Partition(value):
if Math.round(value) == numPartitions:
return edge.domain.NULL
return edge.domain.getPartByKey(ref.add(edge.domain.interval.multiply(value)))
return Data(toTerm={"head": "", "body": partition2int}, fromTerm=int2Partition)
# RETURN MVEL CODE THAT MAPS DURATION DOMAINS DOWN TO AN INTEGER AND
# AND THE JAVASCRIPT THAT WILL TURN THAT INTEGER BACK INTO A PARTITION (INCLUDING NULLS)
def compileDuration2Term(edge):
if edge.esscript:
Log.error("edge script not supported yet")
# IS THERE A LIMIT ON THE DOMAIN?
numPartitions = len(edge.domain.partitions)
value = edge.value
if is_variable_name(value):
value = "doc[\"" + value + "\"].value"
ref = coalesce(edge.domain.min, edge.domain.max, durations.ZERO)
nullTest = compileNullTest(edge)
ms = edge.domain.interval.milli
if edge.domain.interval.month > 0:
ms = durations.YEAR.milli / 12 * edge.domain.interval.month
partition2int = "Math.floor((" + value + "-" + value2MVEL(ref) + ")/" + ms + ")"
partition2int = "((" + nullTest + ") ? " + numPartitions + " : " + partition2int + ")"
def int2Partition(value):
if Math.round(value) == numPartitions:
return edge.domain.NULL
return edge.domain.getPartByKey(ref.add(edge.domain.interval.multiply(value)))
return Data(toTerm={"head": "", "body": partition2int}, fromTerm=int2Partition)
# RETURN MVEL CODE THAT MAPS THE numeric DOMAIN DOWN TO AN INTEGER AND
# AND THE JAVASCRIPT THAT WILL TURN THAT INTEGER BACK INTO A PARTITION (INCLUDING NULLS)
def compileNumeric2Term(edge):
if edge.script:
Log.error("edge script not supported yet")
if edge.domain.type != "numeric" and edge.domain.type != "count":
Log.error("can only translate numeric domains")
numPartitions = len(edge.domain.partitions)
value = edge.value
if is_variable_name(value):
value = "doc[\"" + value + "\"].value"
if not edge.domain.max:
if not edge.domain.min:
ref = 0
partition2int = "Math.floor(" + value + ")/" + value2MVEL(edge.domain.interval) + ")"
nullTest = "false"
else:
ref = value2MVEL(edge.domain.min)
partition2int = "Math.floor((" + value + "-" + ref + ")/" + value2MVEL(edge.domain.interval) + ")"
nullTest = "" + value + "<" + ref
elif not edge.domain.min:
ref = value2MVEL(edge.domain.max)
partition2int = "Math.floor((" + value + "-" + ref + ")/" + value2MVEL(edge.domain.interval) + ")"
nullTest = "" + value + ">=" + ref
else:
top = value2MVEL(edge.domain.max)
ref = value2MVEL(edge.domain.min)
partition2int = "Math.floor((" + value + "-" + ref + ")/" + value2MVEL(edge.domain.interval) + ")"
nullTest = "(" + value + "<" + ref + ") or (" + value + ">=" + top + ")"
partition2int = "((" + nullTest + ") ? " + numPartitions + " : " + partition2int + ")"
offset = convert.value2int(ref)
def int2Partition(value):
if Math.round(value) == numPartitions:
return edge.domain.NULL
return edge.domain.getPartByKey((value * edge.domain.interval) + offset)
return Data(toTerm={"head": "", "body": partition2int}, fromTerm=int2Partition)
def compileString2Term(edge):
if edge.esscript:
Log.error("edge script not supported yet")
value = edge.value
if is_variable_name(value):
value = strings.expand_template("getDocValue({{path}})", {"path": quote(value)})
else:
Log.error("not handled")
def fromTerm(value):
return edge.domain.getPartByKey(value)
return Data(
toTerm={"head": "", "body": value},
fromTerm=fromTerm
)
def compileNullTest(edge):
"""
RETURN A MVEL EXPRESSION THAT WILL EVALUATE TO true FOR OUT-OF-BOUNDS
"""
if edge.domain.type not in domains.ALGEBRAIC:
Log.error("can only translate time and duration domains")
# IS THERE A LIMIT ON THE DOMAIN?
value = edge.value
if is_variable_name(value):
value = "doc[\"" + value + "\"].value"
if not edge.domain.max:
if not edge.domain.min:
return False
bot = value2MVEL(edge.domain.min)
nullTest = "" + value + "<" + bot
elif not edge.domain.min:
top = value2MVEL(edge.domain.max)
nullTest = "" + value + ">=" + top
else:
top = value2MVEL(edge.domain.max)
bot = value2MVEL(edge.domain.min)
nullTest = "(" + value + "<" + bot + ") or (" + value + ">=" + top + ")"
return nullTest
def compileEdges2Term(mvel_compiler, edges, constants):
"""
TERMS ARE ALWAYS ESCAPED SO THEY CAN BE COMPOUNDED WITH PIPE (|)
GIVE MVEL CODE THAT REDUCES A UNIQUE TUPLE OF PARTITIONS DOWN TO A UNIQUE TERM
GIVE LAMBDA THAT WILL CONVERT THE TERM BACK INTO THE TUPLE
RETURNS TUPLE OBJECT WITH "type" and "value" ATTRIBUTES.
"type" CAN HAVE A VALUE OF "script", "field" OR "count"
CAN USE THE constants (name, value pairs)
"""
# IF THE QUERY IS SIMPLE ENOUGH, THEN DO NOT USE TERM PACKING
edge0 = edges[0]
if len(edges) == 1 and edge0.domain.type in ["set", "default"]:
# THE TERM RETURNED WILL BE A MEMBER OF THE GIVEN SET
def temp(term):
return FlatList([edge0.domain.getPartByKey(term)])
if edge0.value and is_variable_name(edge0.value):
return Data(
field=edge0.value,
term2parts=temp
)
elif COUNT(edge0.domain.dimension.fields) == 1:
return Data(
field=edge0.domain.dimension.fields[0],
term2parts=temp
)
elif not edge0.value and edge0.domain.partitions:
script = mvel_compiler.Parts2TermScript(edge0.domain)
return Data(
expression=script,
term2parts=temp
)
else:
return Data(
expression=mvel_compiler.compile_expression(edge0.value, constants),
term2parts=temp
)
mvel_terms = [] # FUNCTION TO PACK TERMS
fromTerm2Part = [] # UNPACK TERMS BACK TO PARTS
for e in edges:
domain = e.domain
fields = domain.dimension.fields
if not e.value and fields:
code, decode = mvel_compiler.Parts2Term(e.domain)
t = Data(
toTerm=code,
fromTerm=decode
)
elif fields:
Log.error("not expected")
elif e.domain.type == "time":
t = compileTime2Term(e)
elif e.domain.type == "duration":
t = compileDuration2Term(e)
elif e.domain.type in domains.ALGEBRAIC:
t = compileNumeric2Term(e)
elif e.domain.type == "set" and not fields:
def fromTerm(term):
return e.domain.getPartByKey(term)
code, decode = mvel_compiler.Parts2Term(e.domain)
t = Data(
toTerm=code,
fromTerm=decode
)
else:
t = compileString2Term(e)
if not t.toTerm.body:
mvel_compiler.Parts2Term(e.domain)
Log.unexpected("what?")
fromTerm2Part.append(t.fromTerm)
mvel_terms.append(t.toTerm.body)
# REGISTER THE DECODE FUNCTION
def temp(term):
terms = term.split('|')
output = FlatList([t2p(t) for t, t2p in zip(terms, fromTerm2Part)])
return output
return Data(
expression=mvel_compiler.compile_expression("+'|'+".join(mvel_terms), constants),
term2parts=temp
)
def fix_es_stats(s):
"""
ES RETURNS BAD DEFAULT VALUES FOR STATS
"""
s = wrap(s)
if s.count == 0:
return stats.zero
return s
# MAP NAME TO SQL FUNCTION
aggregates = {
"none": "none",
"one": "count",
"sum": "total",
"add": "total",
"count": "count",
"maximum": "max",
"minimum": "min",
"max": "max",
"min": "min",
"mean": "mean",
"average": "mean",
"avg": "mean",
"N": "count",
"X0": "count",
"X1": "total",
"X2": "sum_of_squares",
"std": "std_deviation",
"stddev": "std_deviation",
"var": "variance",
"variance": "variance"
}

238
vendor/jx_elasticsearch/es14/__init__.py поставляемый
Просмотреть файл

@ -1,238 +0,0 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http:# mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from collections import Mapping
from jx_base import container
from jx_base.container import Container
from jx_base.dimensions import Dimension
from jx_base.expressions import jx_expression
from jx_base.queries import is_variable_name
from jx_base.query import QueryOp
from jx_elasticsearch.es14.aggs import es_aggsop, is_aggsop
from jx_elasticsearch.es14.deep import is_deepop, es_deepop
from jx_elasticsearch.es14.setop import is_setop, es_setop
from jx_elasticsearch.es14.util import aggregates
from jx_elasticsearch.meta import ElasticsearchMetadata, Table
from jx_python import jx
from mo_dots import Data, Null, unwrap, coalesce, split_field, literal_field, unwraplist, join_field, wrap, listwrap, FlatList
from mo_json import scrub, value2json
from mo_json.typed_encoder import TYPE_PREFIX, EXISTS_TYPE
from mo_kwargs import override
from mo_logs import Log, Except
from pyLibrary.env import elasticsearch, http
class ES14(Container):
"""
SEND jx QUERIES TO ElasticSearch
"""
def __new__(cls, *args, **kwargs):
if (len(args) == 1 and args[0].get("index") == "meta") or kwargs.get("index") == "meta":
output = ElasticsearchMetadata.__new__(ElasticsearchMetadata, *args, **kwargs)
output.__init__(*args, **kwargs)
return output
else:
return Container.__new__(cls)
@override
def __init__(
self,
host,
index,
type=None,
name=None,
port=9200,
read_only=True,
timeout=None, # NUMBER OF SECONDS TO WAIT FOR RESPONSE, OR SECONDS TO WAIT FOR DOWNLOAD (PASSED TO requests)
wait_for_active_shards=1, # ES WRITE CONSISTENCY (https://www.elastic.co/guide/en/elasticsearch/reference/1.7/docs-index_.html#index-consistency)
typed=None,
kwargs=None
):
Container.__init__(self)
if not container.config.default:
container.config.default = {
"type": "elasticsearch",
"settings": unwrap(kwargs)
}
self.settings = kwargs
self.name = name = coalesce(name, alias, index)
if read_only:
self.es = elasticsearch.Alias(alias=coalesce(alias, index), kwargs=kwargs)
else:
self.es = elasticsearch.Cluster(kwargs=kwargs).get_index(read_only=read_only, kwargs=kwargs)
self._namespace = ElasticsearchMetadata(kwargs=kwargs)
self.settings.type = self.es.settings.type
self.edges = Data()
self.worker = None
columns = self._namespace.get_snowflake(self.es.settings.alias).columns # ABSOLUTE COLUMNS
is_typed = any(c.es_column == EXISTS_TYPE for c in columns)
if typed == None:
# SWITCH ON TYPED MODE
self.typed = is_typed
else:
if is_typed != typed:
Log.error("Expecting given typed {{typed}} to match {{is_typed}}", typed=typed, is_typed=is_typed)
self.typed = typed
@property
def snowflake(self):
return self._namespace.get_snowflake(self.es.settings.alias)
@property
def namespace(self):
return self._namespace
def get_table(self, full_name):
return Table(full_name, self)
def get_schema(self, query_path):
return self._namespace.get_schema(query_path)
def __data__(self):
settings = self.settings.copy()
settings.settings = None
return settings
def __enter__(self):
Log.error("No longer used")
return self
def __exit__(self, type, value, traceback):
if not self.worker:
return
if isinstance(value, Exception):
self.worker.stop()
self.worker.join()
else:
self.worker.join()
@property
def query_path(self):
return join_field(split_field(self.name)[1:])
@property
def url(self):
return self.es.url
def query(self, _query):
try:
query = QueryOp.wrap(_query, container=self, namespace=self.namespace)
for s in listwrap(query.select):
if s.aggregate != None and not aggregates.get(s.aggregate):
Log.error(
"ES can not aggregate {{name}} because {{aggregate|quote}} is not a recognized aggregate",
name=s.name,
aggregate=s.aggregate
)
frum = query["from"]
if isinstance(frum, QueryOp):
result = self.query(frum)
q2 = query.copy()
q2.frum = result
return jx.run(q2)
if is_deepop(self.es, query):
return es_deepop(self.es, query)
if is_aggsop(self.es, query):
return es_aggsop(self.es, frum, query)
if is_setop(self.es, query):
return es_setop(self.es, query)
Log.error("Can not handle")
except Exception as e:
e = Except.wrap(e)
if "Data too large, data for" in e:
http.post(self.es.cluster.url / "_cache/clear")
Log.error("Problem (Tried to clear Elasticsearch cache)", e)
Log.error("problem", e)
def addDimension(self, dim):
if isinstance(dim, list):
Log.error("Expecting dimension to be a object, not a list:\n{{dim}}", dim= dim)
self._addDimension(dim, [])
def _addDimension(self, dim, path):
dim.full_name = dim.name
for e in dim.edges:
d = Dimension(e, dim, self)
self.edges[d.full_name] = d
def __getitem__(self, item):
c = self.get_columns(table_name=self.name, column_name=item)
if c:
if len(c) > 1:
Log.error("Do not know how to handle multipole matches")
return c[0]
e = self.edges[item]
if not c:
Log.warning("Column with name {{column|quote}} can not be found in {{table}}", column=item, table=self.name)
return e
def __getattr__(self, item):
return self.edges[item]
def update(self, command):
"""
EXPECTING command == {"set":term, "where":where}
THE set CLAUSE IS A DICT MAPPING NAMES TO VALUES
THE where CLAUSE IS AN ES FILTER
"""
command = wrap(command)
schema = self.es.get_properties()
# GET IDS OF DOCUMENTS
results = self.es.search({
"fields": listwrap(schema._routing.path),
"query": {"filtered": {
"filter": jx_expression(command.where).to_esfilter(Null)
}},
"size": 10000
})
# SCRIPT IS SAME FOR ALL (CAN ONLY HANDLE ASSIGNMENT TO CONSTANT)
scripts = FlatList()
for k, v in command.set.items():
if not is_variable_name(k):
Log.error("Only support simple paths for now")
if isinstance(v, Mapping) and v.doc:
scripts.append({"doc": v.doc})
else:
v = scrub(v)
scripts.append({"script": "ctx._source." + k + " = " + jx_expression(v).to_es_script(schema).script(schema)})
if results.hits.hits:
updates = []
for h in results.hits.hits:
for s in scripts:
updates.append({"update": {"_id": h._id, "_routing": unwraplist(h.fields[literal_field(schema._routing.path)])}})
updates.append(s)
content = ("\n".join(value2json(c) for c in updates) + "\n")
response = self.es.cluster.post(
self.es.path + "/_bulk",
data=content,
headers={"Content-Type": "application/json"},
timeout=self.settings.timeout,
params={"wait_for_active_shards": self.settings.wait_for_active_shards}
)
if response.errors:
Log.error("could not update: {{error}}", error=[e.error for i in response["items"] for e in i.values() if e.status not in (200, 201)])

469
vendor/jx_elasticsearch/es14/aggs.py поставляемый
Просмотреть файл

@ -1,469 +0,0 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http:# mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from jx_base.domains import SetDomain
from jx_base.expressions import TupleOp, NULL
from jx_base.query import DEFAULT_LIMIT, MAX_LIMIT
from jx_elasticsearch import post as es_post
from jx_elasticsearch.es14.decoders import DefaultDecoder, AggsDecoder, ObjectDecoder, DimFieldListDecoder
from jx_elasticsearch.es14.expressions import split_expression_by_depth, AndOp, Variable, NullOp
from jx_elasticsearch.es14.setop import get_pull_stats
from jx_elasticsearch.es14.util import aggregates
from jx_python import jx
from jx_python.expressions import jx_expression_to_function
from mo_dots import listwrap, Data, wrap, literal_field, set_default, coalesce, Null, split_field, FlatList, unwrap, unwraplist
from mo_future import text_type
from mo_json.typed_encoder import EXISTS
from mo_json.typed_encoder import encode_property
from mo_logs import Log
from mo_math import Math, MAX, UNION
from mo_times.timer import Timer
def is_aggsop(es, query):
if query.edges or query.groupby or any(a != None and a != "none" for a in listwrap(query.select).aggregate):
return True
return False
def get_decoders_by_depth(query):
"""
RETURN A LIST OF DECODER ARRAYS, ONE ARRAY FOR EACH NESTED DEPTH
"""
schema = query.frum.schema
output = FlatList()
if query.edges:
if query.sort and query.format != "cube":
# REORDER EDGES/GROUPBY TO MATCH THE SORT
query.edges = sort_edges(query, "edges")
elif query.groupby:
if query.sort and query.format != "cube":
query.groupby = sort_edges(query, "groupby")
for edge in wrap(coalesce(query.edges, query.groupby, [])):
limit = coalesce(edge.domain.limit, query.limit, DEFAULT_LIMIT)
if edge.value != None and not isinstance(edge.value, NullOp):
edge = edge.copy()
vars_ = edge.value.vars()
for v in vars_:
if not schema.leaves(v.var):
Log.error("{{var}} does not exist in schema", var=v)
elif edge.range:
vars_ = edge.range.min.vars() | edge.range.max.vars()
for v in vars_:
if not schema[v.var]:
Log.error("{{var}} does not exist in schema", var=v)
elif edge.domain.dimension:
vars_ = edge.domain.dimension.fields
edge.domain.dimension = edge.domain.dimension.copy()
edge.domain.dimension.fields = [schema[v].es_column for v in vars_]
elif all(edge.domain.partitions.where):
vars_ = set()
for p in edge.domain.partitions:
vars_ |= p.where.vars()
try:
vars_ |= edge.value.vars()
depths = set(len(c.nested_path) - 1 for v in vars_ for c in schema.leaves(v.var))
if -1 in depths:
Log.error(
"Do not know of column {{column}}",
column=unwraplist([v for v in vars_ if schema[v] == None])
)
if len(depths) > 1:
Log.error("expression {{expr|quote}} spans tables, can not handle", expr=edge.value)
max_depth = MAX(depths)
while len(output) <= max_depth:
output.append([])
except Exception as e:
# USUALLY THE SCHEMA IS EMPTY, SO WE ASSUME THIS IS A SIMPLE QUERY
max_depth = 0
output.append([])
output[max_depth].append(AggsDecoder(edge, query, limit))
return output
def sort_edges(query, prop):
ordered_edges = []
remaining_edges = getattr(query, prop)
for s in query.sort:
for e in remaining_edges:
if e.value == s.value:
if isinstance(e.domain, SetDomain):
pass # ALREADY SORTED?
else:
e.domain.sort = s.sort
ordered_edges.append(e)
remaining_edges.remove(e)
break
else:
Log.error("Can not sort by {{expr}}, can only sort by an existing edge expression", expr=s.value)
ordered_edges.extend(remaining_edges)
return ordered_edges
def es_aggsop(es, frum, query):
query = query.copy() # WE WILL MARK UP THIS QUERY
schema = frum.schema
select = listwrap(query.select)
es_query = Data()
new_select = Data() # MAP FROM canonical_name (USED FOR NAMES IN QUERY) TO SELECT MAPPING
formula = []
for s in select:
if s.aggregate == "count" and isinstance(s.value, Variable) and s.value.var == ".":
if schema.query_path == ".":
s.pull = jx_expression_to_function("doc_count")
else:
s.pull = jx_expression_to_function({"coalesce": ["_nested.doc_count", "doc_count", 0]})
elif isinstance(s.value, Variable):
if s.aggregate == "count":
new_select["count_"+literal_field(s.value.var)] += [s]
else:
new_select[literal_field(s.value.var)] += [s]
elif s.aggregate:
formula.append(s)
for canonical_name, many in new_select.items():
for s in many:
columns = frum.schema.values(s.value.var)
if s.aggregate == "count":
canonical_names = []
for column in columns:
cn = literal_field(column.es_column + "_count")
if column.jx_type == EXISTS:
canonical_names.append(cn + ".doc_count")
es_query.aggs[cn].filter.range = {column.es_column: {"gt": 0}}
else:
canonical_names.append(cn+ ".value")
es_query.aggs[cn].value_count.field = column.es_column
if len(canonical_names) == 1:
s.pull = jx_expression_to_function(canonical_names[0])
else:
s.pull = jx_expression_to_function({"add": canonical_names})
elif s.aggregate == "median":
if len(columns) > 1:
Log.error("Do not know how to count columns with more than one type (script probably)")
# ES USES DIFFERENT METHOD FOR PERCENTILES
key = literal_field(canonical_name + " percentile")
es_query.aggs[key].percentiles.field = columns[0].es_column
es_query.aggs[key].percentiles.percents += [50]
s.pull = jx_expression_to_function(key + ".values.50\\.0")
elif s.aggregate == "percentile":
if len(columns) > 1:
Log.error("Do not know how to count columns with more than one type (script probably)")
# ES USES DIFFERENT METHOD FOR PERCENTILES
key = literal_field(canonical_name + " percentile")
if isinstance(s.percentile, text_type) or s.percetile < 0 or 1 < s.percentile:
Log.error("Expecting percentile to be a float from 0.0 to 1.0")
percent = Math.round(s.percentile * 100, decimal=6)
es_query.aggs[key].percentiles.field = columns[0].es_column
es_query.aggs[key].percentiles.percents += [percent]
es_query.aggs[key].percentiles.compression = 2
s.pull = jx_expression_to_function(key + ".values." + literal_field(text_type(percent)))
elif s.aggregate == "cardinality":
canonical_names = []
for column in columns:
cn = literal_field(column.es_column + "_cardinality")
canonical_names.append(cn)
es_query.aggs[cn].cardinality.field = column.es_column
if len(columns) == 1:
s.pull = jx_expression_to_function(canonical_names[0] + ".value")
else:
s.pull = jx_expression_to_function({"add": [cn + ".value" for cn in canonical_names], "default": 0})
elif s.aggregate == "stats":
if len(columns) > 1:
Log.error("Do not know how to count columns with more than one type (script probably)")
# REGULAR STATS
stats_name = literal_field(canonical_name)
es_query.aggs[stats_name].extended_stats.field = columns[0].es_column
# GET MEDIAN TOO!
median_name = literal_field(canonical_name + "_percentile")
es_query.aggs[median_name].percentiles.field = columns[0].es_column
es_query.aggs[median_name].percentiles.percents += [50]
s.pull = get_pull_stats(stats_name, median_name)
elif s.aggregate == "union":
pulls = []
for column in columns:
stats_name = encode_property(column.es_column)
if column.nested_path[0] == ".":
es_query.aggs[stats_name] = {"terms": {
"field": column.es_column,
"size": Math.min(s.limit, MAX_LIMIT)
}}
pulls.append(get_bucket_keys(stats_name))
else:
es_query.aggs[stats_name] = {
"nested": {"path": column.nested_path[0]},
"aggs": {"_nested": {"terms": {
"field": column.es_column,
"size": Math.min(s.limit, MAX_LIMIT)
}}}
}
pulls.append(get_bucket_keys(stats_name+"._nested"))
if len(pulls) == 0:
s.pull = NULL
elif len(pulls) == 1:
s.pull = pulls[0]
else:
s.pull = lambda row: UNION(
p(row)
for p in pulls
)
else:
if len(columns) > 1:
Log.error("Do not know how to count columns with more than one type (script probably)")
# PULL VALUE OUT OF THE stats AGGREGATE
es_query.aggs[literal_field(canonical_name)].extended_stats.field = columns[0].es_column
s.pull = jx_expression_to_function({"coalesce": [literal_field(canonical_name) + "." + aggregates[s.aggregate], s.default]})
for i, s in enumerate(formula):
canonical_name = literal_field(s.name)
if isinstance(s.value, TupleOp):
if s.aggregate == "count":
# TUPLES ALWAYS EXIST, SO COUNTING THEM IS EASY
s.pull = "doc_count"
else:
Log.error("{{agg}} is not a supported aggregate over a tuple", agg=s.aggregate)
elif s.aggregate == "count":
es_query.aggs[literal_field(canonical_name)].value_count.script = s.value.partial_eval().to_es_script(schema).script(schema)
s.pull = jx_expression_to_function(literal_field(canonical_name) + ".value")
elif s.aggregate == "median":
# ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT
key = literal_field(canonical_name + " percentile")
es_query.aggs[key].percentiles.script = s.value.to_es_script(schema).script(schema)
es_query.aggs[key].percentiles.percents += [50]
s.pull = jx_expression_to_function(key + ".values.50\\.0")
elif s.aggregate == "percentile":
# ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT
key = literal_field(canonical_name + " percentile")
percent = Math.round(s.percentile * 100, decimal=6)
es_query.aggs[key].percentiles.script = s.value.to_es_script(schema).script(schema)
es_query.aggs[key].percentiles.percents += [percent]
s.pull = jx_expression_to_function(key + ".values." + literal_field(text_type(percent)))
elif s.aggregate == "cardinality":
# ES USES DIFFERENT METHOD FOR CARDINALITY
key = canonical_name + " cardinality"
es_query.aggs[key].cardinality.script = s.value.to_es_script(schema).script(schema)
s.pull = jx_expression_to_function(key + ".value")
elif s.aggregate == "stats":
# REGULAR STATS
stats_name = literal_field(canonical_name)
es_query.aggs[stats_name].extended_stats.script = s.value.to_es_script(schema).script(schema)
# GET MEDIAN TOO!
median_name = literal_field(canonical_name + " percentile")
es_query.aggs[median_name].percentiles.script = s.value.to_es_script(schema).script(schema)
es_query.aggs[median_name].percentiles.percents += [50]
s.pull = get_pull_stats(stats_name, median_name)
elif s.aggregate=="union":
# USE TERMS AGGREGATE TO SIMULATE union
stats_name = literal_field(canonical_name)
es_query.aggs[stats_name].terms.script_field = s.value.to_es_script(schema).script(schema)
s.pull = jx_expression_to_function(stats_name + ".buckets.key")
else:
# PULL VALUE OUT OF THE stats AGGREGATE
s.pull = jx_expression_to_function(canonical_name + "." + aggregates[s.aggregate])
es_query.aggs[canonical_name].extended_stats.script = s.value.to_es_script(schema).script(schema)
decoders = get_decoders_by_depth(query)
start = 0
#<TERRIBLE SECTION> THIS IS WHERE WE WEAVE THE where CLAUSE WITH nested
split_where = split_expression_by_depth(query.where, schema=frum.schema)
if len(split_field(frum.name)) > 1:
if any(split_where[2::]):
Log.error("Where clause is too deep")
for d in decoders[1]:
es_query = d.append_query(es_query, start)
start += d.num_columns
if split_where[1]:
#TODO: INCLUDE FILTERS ON EDGES
filter_ = AndOp("and", split_where[1]).to_esfilter(schema)
es_query = Data(
aggs={"_filter": set_default({"filter": filter_}, es_query)}
)
es_query = wrap({
"aggs": {"_nested": set_default(
{"nested": {"path": schema.query_path[0]}},
es_query
)}
})
else:
if any(split_where[1::]):
Log.error("Where clause is too deep")
if decoders:
for d in jx.reverse(decoders[0]):
es_query = d.append_query(es_query, start)
start += d.num_columns
if split_where[0]:
#TODO: INCLUDE FILTERS ON EDGES
filter = AndOp("and", split_where[0]).to_esfilter(schema)
es_query = Data(
aggs={"_filter": set_default({"filter": filter}, es_query)}
)
# </TERRIBLE SECTION>
if not es_query:
es_query = wrap({"query": {"match_all": {}}})
es_query.size = 0
with Timer("ES query time") as es_duration:
result = es_post(es, es_query, query.limit)
try:
format_time = Timer("formatting")
with format_time:
decoders = [d for ds in decoders for d in ds]
result.aggregations.doc_count = coalesce(result.aggregations.doc_count, result.hits.total) # IT APPEARS THE OLD doc_count IS GONE
formatter, groupby_formatter, aggop_formatter, mime_type = format_dispatch[query.format]
if query.edges:
output = formatter(decoders, result.aggregations, start, query, select)
elif query.groupby:
output = groupby_formatter(decoders, result.aggregations, start, query, select)
else:
output = aggop_formatter(decoders, result.aggregations, start, query, select)
output.meta.timing.formatting = format_time.duration
output.meta.timing.es_search = es_duration.duration
output.meta.content_type = mime_type
output.meta.es_query = es_query
return output
except Exception as e:
if query.format not in format_dispatch:
Log.error("Format {{format|quote}} not supported yet", format=query.format, cause=e)
Log.error("Some problem", cause=e)
EMPTY = {}
EMPTY_LIST = []
def get_bucket_keys(stats_name):
buckets = jx_expression_to_function(stats_name + ".buckets")
def output(row):
return [b['key'] for b in listwrap(buckets(row))]
return output
def drill(agg):
deeper = agg.get("_filter") or agg.get("_nested")
while deeper:
agg = deeper
deeper = agg.get("_filter") or agg.get("_nested")
return agg
def aggs_iterator(aggs, decoders, coord=True):
"""
DIG INTO ES'S RECURSIVE aggs DATA-STRUCTURE:
RETURN AN ITERATOR OVER THE EFFECTIVE ROWS OF THE RESULTS
:param aggs: ES AGGREGATE OBJECT
:param decoders:
:param coord: TURN ON LOCAL COORDINATE LOOKUP
"""
depth = max(d.start + d.num_columns for d in decoders)
def _aggs_iterator(agg, d):
agg = drill(agg)
if d > 0:
for k, v in agg.items():
if k == "_match":
v = drill(v)
for i, b in enumerate(v.get("buckets", EMPTY_LIST)):
b["_index"] = i
for a, parts in _aggs_iterator(b, d - 1):
yield a, parts + (b,)
elif k == "_other":
for b in v.get("buckets", EMPTY_LIST):
for a, parts in _aggs_iterator(b, d - 1):
yield a, parts + (Null,)
elif k == "_missing":
b = drill(v)
for a, parts in _aggs_iterator(b, d - 1):
yield a, parts + (b,)
elif k.startswith("_join_"):
v["key"] = int(k[6:])
for a, parts in _aggs_iterator(v, d - 1):
yield a, parts + (v,)
else:
for k, v in agg.items():
if k == "_match":
v = drill(v)
for i, b in enumerate(v.get("buckets", EMPTY_LIST)):
b["_index"] = i
yield b, (b,)
elif k == "_other":
for b in v.get("buckets", EMPTY_LIST):
yield b, (Null,)
elif k == "_missing":
b = drill(v,)
yield b, (v,)
elif k.startswith("_join_"):
v["_index"] = int(k[6:])
yield v, (v,)
if coord:
for a, parts in _aggs_iterator(unwrap(aggs), depth - 1):
coord = tuple(d.get_index(parts) for d in decoders)
if any(c is None for c in coord):
continue
yield parts, coord, a
else:
for a, parts in _aggs_iterator(unwrap(aggs), depth - 1):
yield parts, None, a
def count_dim(aggs, decoders):
if any(isinstance(d, (DefaultDecoder, DimFieldListDecoder, ObjectDecoder)) for d in decoders):
# ENUMERATE THE DOMAINS, IF UNKNOWN AT QUERY TIME
for row, coord, agg in aggs_iterator(aggs, decoders, coord=False):
for d in decoders:
d.count(row)
for d in decoders:
d.done_count()
new_edges = wrap([d.edge for d in decoders])
return new_edges
format_dispatch = {}
from jx_elasticsearch.es14.format import format_cube
_ = format_cube

753
vendor/jx_elasticsearch/es14/decoders.py поставляемый
Просмотреть файл

@ -1,753 +0,0 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http:# mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from collections import Mapping
from jx_base.dimensions import Dimension
from jx_base.domains import SimpleSetDomain, DefaultDomain, PARTITION
from jx_base.expressions import TupleOp, TRUE
from jx_base.query import MAX_LIMIT, DEFAULT_LIMIT
from jx_elasticsearch.es14.expressions import Variable, NotOp, InOp, Literal, AndOp, InequalityOp, LeavesOp, LIST_TO_PIPE
from jx_python import jx
from mo_dots import wrap, set_default, coalesce, literal_field, Data, relative_field, unwraplist
from mo_future import text_type
from mo_json.typed_encoder import STRING, NUMBER, BOOLEAN
from mo_json.typed_encoder import untype_path
from mo_logs import Log
from mo_logs.strings import quote, expand_template
from mo_math import MAX, MIN, Math
from pyLibrary.convert import string2boolean
class AggsDecoder(object):
def __new__(cls, e=None, query=None, *args, **kwargs):
e.allowNulls = coalesce(e.allowNulls, True)
if e.value and e.domain.type == "default":
# if query.groupby:
# return object.__new__(DefaultDecoder, e)
if isinstance(e.value, text_type):
Log.error("Expecting Variable or Expression, not plain string")
if isinstance(e.value, LeavesOp):
return object.__new__(ObjectDecoder, e)
elif isinstance(e.value, TupleOp):
# THIS domain IS FROM A dimension THAT IS A SIMPLE LIST OF fields
# JUST PULL THE FIELDS
if not all(isinstance(t, Variable) for t in e.value.terms):
Log.error("Can only handle variables in tuples")
e.domain = Data(
dimension={"fields": e.value.terms}
)
return object.__new__(DimFieldListDecoder, e)
elif isinstance(e.value, Variable):
schema = query.frum.schema
cols = schema.leaves(e.value.var)
if not cols:
return object.__new__(DefaultDecoder, e)
if len(cols) != 1:
return object.__new__(ObjectDecoder, e)
col = cols[0]
limit = coalesce(e.domain.limit, query.limit, DEFAULT_LIMIT)
if col.partitions != None:
if col.multi > 1 and len(col.partitions) < 6:
return object.__new__(MultivalueDecoder)
partitions = col.partitions[:limit:]
if e.domain.sort==-1:
partitions = list(reversed(sorted(partitions)))
else:
partitions = sorted(partitions)
e.domain = SimpleSetDomain(partitions=partitions, limit=limit)
else:
e.domain = set_default(DefaultDomain(limit=limit), e.domain.__data__())
return object.__new__(DefaultDecoder, e)
else:
return object.__new__(DefaultDecoder, e)
if e.value and e.domain.type in PARTITION:
return object.__new__(SetDecoder, e)
if isinstance(e.domain.dimension, Dimension):
e.domain = e.domain.dimension.getDomain()
return object.__new__(SetDecoder, e)
if e.value and e.domain.type == "time":
return object.__new__(TimeDecoder, e)
if e.range:
return object.__new__(GeneralRangeDecoder, e)
if e.value and e.domain.type == "duration":
return object.__new__(DurationDecoder, e)
elif e.value and e.domain.type == "range":
return object.__new__(RangeDecoder, e)
elif not e.value and e.domain.dimension.fields:
# THIS domain IS FROM A dimension THAT IS A SIMPLE LIST OF fields
# JUST PULL THE FIELDS
fields = e.domain.dimension.fields
if isinstance(fields, Mapping):
Log.error("No longer allowed: All objects are expressions")
else:
return object.__new__(DimFieldListDecoder, e)
elif not e.value and all(e.domain.partitions.where):
return object.__new__(GeneralSetDecoder, e)
else:
Log.error("domain type of {{type}} is not supported yet", type=e.domain.type)
def __init__(self, edge, query, limit):
self.start = None
self.edge = edge
self.name = literal_field(self.edge.name)
self.query = query
self.limit = limit
self.schema = self.query.frum.schema
def append_query(self, es_query, start):
Log.error("Not supported")
def count(self, row):
pass
def done_count(self):
pass
def get_value_from_row(self, row):
raise NotImplementedError()
def get_value(self, index):
raise NotImplementedError()
def get_index(self, row):
raise NotImplementedError()
@property
def num_columns(self):
return 0
class SetDecoder(AggsDecoder):
def __init__(self, edge, query, limit):
AggsDecoder.__init__(self, edge, query, limit)
domain = self.domain = edge.domain
self.sorted = None
self.pull = pull_functions[STRING]
# WE ASSUME IF THE VARIABLES MATCH, THEN THE SORT TERM AND EDGE TERM MATCH, AND WE SORT BY TERM
# self.sorted = {1: "asc", -1: "desc", None: None}[getattr(edge.domain, 'sort', None)]
edge_var = set(v.var for v in edge.value.vars())
if query.sort:
for s in query.sort:
if not edge_var - set(v.var for v in s.value.vars()):
self.sorted = {1: "asc", -1: "desc"}[s.sort]
parts = jx.sort(domain.partitions, {"value": domain.key, "sort": s.sort})
edge.domain = self.domain = SimpleSetDomain(key=domain.key, label=domain.label, partitions=parts)
def append_query(self, es_query, start):
self.start = start
domain = self.domain
domain_key = domain.key
include, text_include = transpose(*(
(
float(v) if isinstance(v, (int, float)) else v,
text_type(float(v)) if isinstance(v, (int, float)) else v
)
for v in (p[domain_key] for p in domain.partitions)
))
value = self.edge.value
exists = AndOp("and", [
value.exists(),
InOp("in", [value, Literal("literal", include)])
]).partial_eval()
limit = coalesce(self.limit, len(domain.partitions))
if isinstance(value, Variable):
es_field = self.query.frum.schema.leaves(value.var)[0].es_column # ALREADY CHECKED THERE IS ONLY ONE
terms = set_default({"terms": {
"field": es_field,
"size": limit,
"order": {"_term": self.sorted} if self.sorted else None
}}, es_query)
else:
terms = set_default({"terms": {
"script": value.to_es_script(self.schema).script(self.schema),
"size": limit
}}, es_query)
if self.edge.allowNulls:
missing = set_default(
{"filter": NotOp("not", exists).to_esfilter(self.schema)},
es_query
)
else:
missing = None
return wrap({"aggs": {
"_match": {
"filter": exists.to_esfilter(self.schema),
"aggs": {
"_filter": terms
}
},
"_missing": missing
}})
def get_value(self, index):
return self.domain.getKeyByIndex(index)
def get_value_from_row(self, row):
return self.pull(row[self.start].get('key'))
def get_index(self, row):
try:
part = row[self.start]
return self.domain.getIndexByKey(part.get('key'))
except Exception as e:
Log.error("problem", cause=e)
@property
def num_columns(self):
return 1
def _range_composer(edge, domain, es_query, to_float, schema):
# USE RANGES
_min = coalesce(domain.min, MIN(domain.partitions.min))
_max = coalesce(domain.max, MAX(domain.partitions.max))
if edge.allowNulls:
missing_filter = set_default(
{
"filter": NotOp("not", AndOp("and", [
edge.value.exists(),
InequalityOp("gte", [edge.value, Literal(None, to_float(_min))]),
InequalityOp("lt", [edge.value, Literal(None, to_float(_max))])
]).partial_eval()).to_esfilter(schema)
},
es_query
)
else:
missing_filter = None
if isinstance(edge.value, Variable):
calc = {"field": schema.leaves(edge.value.var)[0].es_column}
else:
calc = {"script": edge.value.to_es_script(schema).script(schema)}
return wrap({"aggs": {
"_match": set_default(
{"range": calc},
{"range": {"ranges": [{"from": to_float(p.min), "to": to_float(p.max)} for p in domain.partitions]}},
es_query
),
"_missing": missing_filter
}})
class TimeDecoder(AggsDecoder):
def append_query(self, es_query, start):
self.start = start
schema = self.query.frum.schema
return _range_composer(self.edge, self.edge.domain, es_query, lambda x: x.unix, schema)
def get_value(self, index):
return self.edge.domain.getKeyByIndex(index)
def get_index(self, row):
domain = self.edge.domain
part = row[self.start]
if part == None:
return len(domain.partitions)
f = coalesce(part.get('from'), part.get('key'))
t = coalesce(part.get('to'), part.get('key'))
if f == None or t == None:
return len(domain.partitions)
else:
for p in domain.partitions:
if p.min.unix <= f < p.max.unix:
return p.dataIndex
sample = part.copy
sample.buckets = None
Log.error("Expecting to find {{part}}", part=sample)
@property
def num_columns(self):
return 1
class GeneralRangeDecoder(AggsDecoder):
"""
Accept an algebraic domain, and an edge with a `range` attribute
This class assumes the `snapshot` version - where we only include
partitions that have their `min` value in the range.
"""
def __init__(self, edge, query, limit):
AggsDecoder.__init__(self, edge, query, limit)
if edge.domain.type == "time":
self.to_float = lambda x: x.unix
elif edge.domain.type == "range":
self.to_float = lambda x: x
else:
Log.error("Unknown domain of type {{type}} for range edge", type=edge.domain.type)
def append_query(self, es_query, start):
self.start = start
edge = self.edge
range = edge.range
domain = edge.domain
aggs = {}
for i, p in enumerate(domain.partitions):
filter_ = AndOp("and", [
InequalityOp("lte", [range.min, Literal("literal", self.to_float(p.min))]),
InequalityOp("gt", [range.max, Literal("literal", self.to_float(p.min))])
])
aggs["_join_" + text_type(i)] = set_default(
{"filter": filter_.to_esfilter(self.schema)},
es_query
)
return wrap({"aggs": aggs})
def get_value(self, index):
return self.edge.domain.getKeyByIndex(index)
def get_index(self, row):
domain = self.edge.domain
part = row[self.start]
if part == None:
return len(domain.partitions)
return part["_index"]
@property
def num_columns(self):
return 1
class GeneralSetDecoder(AggsDecoder):
"""
EXPECTING ALL PARTS IN partitions TO HAVE A where CLAUSE
"""
def append_query(self, es_query, start):
self.start = start
parts = self.edge.domain.partitions
filters = []
notty = []
for p in parts:
w = p.where
filters.append(AndOp("and", [w] + notty).to_esfilter(self.schema))
notty.append(NotOp("not", w))
missing_filter = None
if self.edge.allowNulls: # TODO: Use Expression.missing().esfilter() TO GET OPTIMIZED FILTER
missing_filter = set_default(
{"filter": AndOp("and", notty).to_esfilter(self.schema)},
es_query
)
return wrap({"aggs": {
"_match": set_default(
{"filters": {"filters": filters}},
es_query
),
"_missing": missing_filter
}})
def get_value(self, index):
return self.edge.domain.getKeyByIndex(index)
def get_index(self, row):
domain = self.edge.domain
part = row[self.start]
# if part == None:
# return len(domain.partitions)
return part.get("_index", len(domain.partitions))
@property
def num_columns(self):
return 1
class DurationDecoder(AggsDecoder):
def append_query(self, es_query, start):
self.start = start
return _range_composer(self.edge, self.edge.domain, es_query, lambda x: x.seconds, self.schema)
def get_value(self, index):
return self.edge.domain.getKeyByIndex(index)
def get_index(self, row):
domain = self.edge.domain
part = row[self.start]
if part == None:
return len(domain.partitions)
f = coalesce(part.get('from'), part.get('key'))
t = coalesce(part.get('to'), part.get('key'))
if f == None or t == None:
return len(domain.partitions)
else:
for p in domain.partitions:
if p.min.seconds <= f < p.max.seconds:
return p.dataIndex
sample = part.copy
sample.buckets = None
Log.error("Expecting to find {{part}}", part=sample)
@property
def num_columns(self):
return 1
class RangeDecoder(AggsDecoder):
def append_query(self, es_query, start):
self.start = start
return _range_composer(self.edge, self.edge.domain, es_query, lambda x: x, self.schema)
def get_value(self, index):
return self.edge.domain.getKeyByIndex(index)
def get_index(self, row):
domain = self.edge.domain
part = row[self.start]
if part == None:
return len(domain.partitions)
f = coalesce(part.get('from'), part.get('key'))
t = coalesce(part.get('to'), part.get('key'))
if f == None or t == None:
return len(domain.partitions)
else:
for p in domain.partitions:
if p.min <= f < p.max:
return p.dataIndex
sample = part.copy
sample.buckets = None
Log.error("Expecting to find {{part}}", part=sample)
@property
def num_columns(self):
return 1
class MultivalueDecoder(SetDecoder):
def __init__(self, edge, query, limit):
AggsDecoder.__init__(self, edge, query, limit)
self.var = edge.value.var
self.values = query.frum.schema[edge.value.var][0].partitions
self.parts = []
def append_query(self, es_query, start):
self.start = start
es_field = self.query.frum.schema.leaves(self.var)[0].es_column
es_query = wrap({"aggs": {
"_match": set_default({"terms": {
"script": expand_template(LIST_TO_PIPE, {"expr": 'doc[' + quote(es_field) + '].values'})
}}, es_query)
}})
return es_query
def get_value_from_row(self, row):
values = row[self.start]['key'].replace("||", "\b").split("|")
if len(values) == 2:
return None
return unwraplist([v.replace("\b", "|") for v in values[1:-1]])
def get_index(self, row):
find = self.get_value_from_row(row)
try:
return self.parts.index(find)
except Exception:
self.parts.append(find)
return len(self.parts)-1
@property
def num_columns(self):
return 1
class ObjectDecoder(AggsDecoder):
def __init__(self, edge, query, limit):
AggsDecoder.__init__(self, edge, query, limit)
if isinstance(edge.value, LeavesOp):
prefix = edge.value.term.var
flatter = lambda k: literal_field(relative_field(k, prefix))
else:
prefix = edge.value.var
flatter = lambda k: relative_field(k, prefix)
self.put, self.fields = transpose(*[
(flatter(untype_path(c.names["."])), c.es_column)
for c in query.frum.schema.leaves(prefix)
])
self.domain = self.edge.domain = wrap({"dimension": {"fields": self.fields}})
self.domain.limit = Math.min(coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT)
self.parts = list()
self.key2index = {}
self.computed_domain = False
def append_query(self, es_query, start):
self.start = start
for i, v in enumerate(self.fields):
nest = wrap({"aggs": {
"_match": set_default({"terms": {
"field": v,
"size": self.domain.limit
}}, es_query),
"_missing": set_default(
{"filter": {"missing": {"field": v}}},
es_query
)
}})
es_query = nest
return es_query
def count(self, row):
value = self.get_value_from_row(row)
i = self.key2index.get(value)
if i is None:
i = self.key2index[value] = len(self.parts)
self.parts.append(value)
def done_count(self):
self.computed_domain = True
self.edge.domain = self.domain = SimpleSetDomain(
key="value",
partitions=[{"value": p, "dataIndex": i} for i, p in enumerate(self.parts)]
)
def get_index(self, row):
value = self.get_value_from_row(row)
if self.computed_domain:
return self.domain.getIndexByKey(value)
if value is None:
return -1
i = self.key2index.get(value)
if i is None:
i = self.key2index[value] = len(self.parts)
self.parts.append(value)
return i
def get_value_from_row(self, row):
part = row[self.start:self.start + self.num_columns:]
if not part[0]['doc_count']:
return None
output = Data()
for k, v in zip(self.put, part):
output[k] = v.get('key')
return output
@property
def num_columns(self):
return len(self.fields)
class DefaultDecoder(SetDecoder):
# FOR DECODING THE default DOMAIN TYPE (UNKNOWN-AT-QUERY-TIME SET OF VALUES)
def __init__(self, edge, query, limit):
AggsDecoder.__init__(self, edge, query, limit)
self.domain = edge.domain
self.domain.limit = Math.min(coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT)
self.parts = list()
self.key2index = {}
self.computed_domain = False
self.script = self.edge.value.partial_eval().to_es_script(self.schema)
self.pull = pull_functions[self.script.data_type]
self.missing = self.script.miss.partial_eval()
self.exists = NotOp("not", self.missing).partial_eval()
# WHEN SORT VALUE AND EDGE VALUE MATCHES, WE SORT BY TERM
sort_candidates = [s for s in self.query.sort if s.value == self.edge.value]
if sort_candidates:
self.es_order = {"_term": {1: "asc", -1: "desc"}[sort_candidates[0].sort]}
else:
self.es_order = None
def append_query(self, es_query, start):
self.start = start
if not isinstance(self.edge.value, Variable):
if self.exists is TRUE:
# IF True THEN WE DO NOT NEED THE _filter OR THE _missing (THIS RARELY HAPPENS THOUGH)
output = wrap({"aggs": {
"_match": set_default(
{"terms": {
"script": self.script.expr,
"size": self.domain.limit,
"order": self.es_order
}},
es_query
)
}})
else:
output = wrap({"aggs": {
"_match": { # _match AND _filter REVERSED SO _match LINES UP WITH _missing
"filter": self.exists.to_esfilter(self.schema),
"aggs": {
"_filter": set_default(
{"terms": {
"script": self.script.expr,
"size": self.domain.limit,
"order": self.es_order
}},
es_query
)
}
},
"_missing": set_default(
{"filter": self.missing.to_esfilter(self.schema)},
es_query
)
}})
return output
else:
output = wrap({"aggs": {
"_match": set_default(
{"terms": {
"field": self.schema.leaves(self.edge.value.var)[0].es_column,
"size": self.domain.limit,
"order": self.es_order
}},
es_query
),
"_missing": set_default(
{"filter": self.missing.to_esfilter(self.schema)},
es_query
)
}})
return output
def count(self, row):
part = row[self.start]
if part['doc_count']:
if part.get('key') != None:
self.parts.append(self.pull(part.get('key')))
else:
self.edge.allowNulls = True # OK! WE WILL ALLOW NULLS
def done_count(self):
self.edge.domain = self.domain = SimpleSetDomain(
partitions=jx.sort(set(self.parts))
)
self.parts = None
self.computed_domain = True
def get_index(self, row):
if self.computed_domain:
try:
part = row[self.start]
return self.domain.getIndexByKey(self.pull(part.get('key')))
except Exception as e:
Log.error("problem", cause=e)
else:
try:
part = row[self.start]
key = self.pull(part.get('key'))
i = self.key2index.get(key)
if i is None:
i = len(self.parts)
part = {"key": key, "dataIndex": i}
self.parts.append(part)
self.key2index[key] = i
return i
except Exception as e:
Log.error("problem", cause=e)
@property
def num_columns(self):
return 1
class DimFieldListDecoder(SetDecoder):
def __init__(self, edge, query, limit):
AggsDecoder.__init__(self, edge, query, limit)
edge.allowNulls = False
self.fields = edge.domain.dimension.fields
self.domain = self.edge.domain
self.domain.limit = Math.min(coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT)
self.parts = list()
def append_query(self, es_query, start):
# TODO: USE "reverse_nested" QUERY TO PULL THESE
self.start = start
for i, v in enumerate(self.fields):
exists = v.exists().partial_eval()
nest = wrap({"aggs": {"_match": {
"filter": exists.to_esfilter(self.schema),
"aggs": {"_filter": set_default({"terms": {
"field": self.schema.leaves(v.var)[0].es_column,
"size": self.domain.limit
}}, es_query)}
}}})
nest.aggs._missing = set_default(
{"filter": NotOp("not", exists).to_esfilter(self.schema)},
es_query
)
es_query = nest
if self.domain.where:
filter_ = self.domain.where.partial_eval().to_esfilter(self.schema)
es_query = {"aggs": {"_filter": set_default({"filter": filter_}, es_query)}}
return es_query
def count(self, row):
part = row[self.start:self.start + len(self.fields):]
if part[0]['doc_count']:
value = tuple(p.get("key") for p in part)
self.parts.append(value)
def done_count(self):
columns = map(text_type, range(len(self.fields)))
parts = wrap([{text_type(i): p for i, p in enumerate(part)} for part in set(self.parts)])
self.parts = None
sorted_parts = jx.sort(parts, columns)
self.edge.domain = self.domain = SimpleSetDomain(
key="value",
partitions=[{"value": tuple(v[k] for k in columns), "dataIndex": i} for i, v in enumerate(sorted_parts)]
)
def get_index(self, row):
part = row[self.start:self.start + len(self.fields):]
if part[0]['doc_count']==0:
return None
find = tuple(p.get("key") for p in part)
output = self.domain.getIndexByKey(find)
return output
@property
def num_columns(self):
return len(self.fields)
pull_functions = {
STRING: lambda x: x,
NUMBER: lambda x: float(x) if x !=None else None,
BOOLEAN: string2boolean
}

238
vendor/jx_elasticsearch/es14/deep.py поставляемый
Просмотреть файл

@ -1,238 +0,0 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http:# mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from jx_base.expressions import NULL
from jx_base.query import DEFAULT_LIMIT
from jx_elasticsearch import post as es_post
from jx_elasticsearch.es14.expressions import split_expression_by_depth, AndOp, Variable, LeavesOp
from jx_elasticsearch.es14.setop import format_dispatch, get_pull_function, get_pull
from jx_elasticsearch.es14.util import jx_sort_to_es_sort, es_query_template
from jx_python.expressions import compile_expression, jx_expression_to_function
from mo_dots import split_field, FlatList, listwrap, literal_field, coalesce, Data, concat_field, set_default, relative_field, startswith_field
from mo_json.typed_encoder import NESTED
from mo_json.typed_encoder import untype_path, EXISTS_TYPE
from mo_logs import Log
from mo_threads import Thread
from mo_times.timer import Timer
from pyLibrary import convert
EXPRESSION_PREFIX = "_expr."
_ = convert
def is_deepop(es, query):
if query.edges or query.groupby:
return False
if all(s.aggregate not in (None, "none") for s in listwrap(query.select)):
return False
if len(split_field(query.frum.name)) > 1:
return True
# ASSUME IT IS NESTED IF WE ARE ASKING FOR NESTED COLUMNS
# vars_ = query_get_all_vars(query)
# columns = query.frum.get_columns()
# if any(c for c in columns if len(c.nested_path) != 1 and c.name in vars_):
# return True
return False
def es_deepop(es, query):
schema = query.frum.schema
query_path = schema.query_path[0]
# TODO: FIX THE GREAT SADNESS CAUSED BY EXECUTING post_expressions
# THE EXPRESSIONS SHOULD BE PUSHED TO THE CONTAINER: ES ALLOWS
# {"inner_hit":{"script_fields":[{"script":""}...]}}, BUT THEN YOU
# LOOSE "_source" BUT GAIN "fields", FORCING ALL FIELDS TO BE EXPLICIT
post_expressions = {}
es_query, es_filters = es_query_template(query_path)
# SPLIT WHERE CLAUSE BY DEPTH
wheres = split_expression_by_depth(query.where, schema)
for i, f in enumerate(es_filters):
script = AndOp("and", wheres[i]).partial_eval().to_esfilter(schema)
set_default(f, script)
if not wheres[1]:
# WITHOUT NESTED CONDITIONS, WE MUST ALSO RETURN DOCS WITH NO NESTED RECORDS
more_filter = {
"and": [
es_filters[0],
{"missing": {"field": untype_path(query_path) + "." + EXISTS_TYPE}}
]
}
else:
more_filter = None
es_query.size = coalesce(query.limit, DEFAULT_LIMIT)
# es_query.sort = jx_sort_to_es_sort(query.sort)
map_to_es_columns = schema.map_to_es()
# {c.names["."]: c.es_column for c in schema.leaves(".")}
query_for_es = query.map(map_to_es_columns)
es_query.sort = jx_sort_to_es_sort(query_for_es.sort, schema)
es_query.fields = []
is_list = isinstance(query.select, list)
new_select = FlatList()
i = 0
for s in listwrap(query.select):
if isinstance(s.value, LeavesOp) and isinstance(s.value.term, Variable):
# IF THERE IS A *, THEN INSERT THE EXTRA COLUMNS
leaves = schema.leaves(s.value.term.var)
col_names = set()
for c in leaves:
if c.nested_path[0] == ".":
if c.jx_type == NESTED:
continue
es_query.fields += [c.es_column]
c_name = untype_path(c.names[query_path])
col_names.add(c_name)
new_select.append({
"name": concat_field(s.name, c_name),
"nested_path": c.nested_path[0],
"put": {"name": concat_field(s.name, literal_field(c_name)), "index": i, "child": "."},
"pull": get_pull_function(c)
})
i += 1
# REMOVE DOTS IN PREFIX IF NAME NOT AMBIGUOUS
for n in new_select:
if n.name.startswith("..") and n.name.lstrip(".") not in col_names:
n.put.name = n.name = n.name.lstrip(".")
col_names.add(n.name)
elif isinstance(s.value, Variable):
net_columns = schema.leaves(s.value.var)
if not net_columns:
new_select.append({
"name": s.name,
"nested_path": ".",
"put": {"name": s.name, "index": i, "child": "."},
"pull": NULL
})
else:
for n in net_columns:
pull = get_pull_function(n)
if n.nested_path[0] == ".":
if n.jx_type == NESTED:
continue
es_query.fields += [n.es_column]
# WE MUST FIGURE OUT WHICH NAMESSPACE s.value.var IS USING SO WE CAN EXTRACT THE child
for np in n.nested_path:
c_name = untype_path(n.names[np])
if startswith_field(c_name, s.value.var):
child = relative_field(c_name, s.value.var)
break
else:
child = relative_field(untype_path(n.names[n.nested_path[0]]), s.value.var)
new_select.append({
"name": s.name,
"pull": pull,
"nested_path": n.nested_path[0],
"put": {
"name": s.name,
"index": i,
"child": child
}
})
i += 1
else:
expr = s.value
for v in expr.vars():
for c in schema[v.var]:
if c.nested_path[0] == ".":
es_query.fields += [c.es_column]
# else:
# Log.error("deep field not expected")
pull_name = EXPRESSION_PREFIX + s.name
map_to_local = MapToLocal(schema)
pull = jx_expression_to_function(pull_name)
post_expressions[pull_name] = compile_expression(expr.map(map_to_local).to_python())
new_select.append({
"name": s.name if is_list else ".",
"pull": pull,
"value": expr.__data__(),
"put": {"name": s.name, "index": i, "child": "."}
})
i += 1
# <COMPLICATED> ES needs two calls to get all documents
more = []
def get_more(please_stop):
more.append(es_post(
es,
Data(
query={"filtered": {"filter": more_filter}},
fields=es_query.fields
),
query.limit
))
if more_filter:
need_more = Thread.run("get more", target=get_more)
with Timer("call to ES") as call_timer:
data = es_post(es, es_query, query.limit)
# EACH A HIT IS RETURNED MULTIPLE TIMES FOR EACH INNER HIT, WITH INNER HIT INCLUDED
def inners():
for t in data.hits.hits:
for i in t.inner_hits[literal_field(query_path)].hits.hits:
t._inner = i._source
for k, e in post_expressions.items():
t[k] = e(t)
yield t
if more_filter:
Thread.join(need_more)
for t in more[0].hits.hits:
yield t
#</COMPLICATED>
try:
formatter, groupby_formatter, mime_type = format_dispatch[query.format]
output = formatter(inners(), new_select, query)
output.meta.timing.es = call_timer.duration
output.meta.content_type = mime_type
output.meta.es_query = es_query
return output
except Exception as e:
Log.error("problem formatting", e)
class MapToLocal(object):
"""
MAP FROM RELATIVE/ABSOLUTE NAMESPACE TO PYTHON THAT WILL EXTRACT RESULT
"""
def __init__(self, map_to_columns):
self.map_to_columns = map_to_columns
def __getitem__(self, item):
return self.get(item)
def get(self, item):
cs = self.map_to_columns[item]
if len(cs) == 0:
return "Null"
elif len(cs) == 1:
return get_pull(cs[0])
else:
return "coalesce(" + (",".join(get_pull(c) for c in cs)) + ")"

1429
vendor/jx_elasticsearch/es14/expressions.py поставляемый

Разница между файлами не показана из-за своего большого размера Загрузить разницу

316
vendor/jx_elasticsearch/es14/format.py поставляемый
Просмотреть файл

@ -1,316 +0,0 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http:# mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from jx_base.expressions import TupleOp
from jx_elasticsearch.es14.aggs import count_dim, aggs_iterator, format_dispatch, drill
from jx_python.containers.cube import Cube
from mo_collections.matrix import Matrix
from mo_dots import Data, set_default, wrap, split_field, coalesce
from mo_future import sort_using_key
from mo_logs import Log
from mo_logs.strings import quote
from pyLibrary import convert
FunctionType = type(lambda: 1)
def format_cube(decoders, aggs, start, query, select):
# decoders = sorted(decoders, key=lambda d: -d.edge.dim) # REVERSE DECODER ORDER, BECAUSE ES QUERY WAS BUILT IN REVERSE ORDER
new_edges = count_dim(aggs, decoders)
dims = []
for e in new_edges:
if isinstance(e.value, TupleOp):
e.allowNulls = False
extra = 0 if e.allowNulls is False else 1
dims.append(len(e.domain.partitions) + extra)
dims = tuple(dims)
matricies = [(s, Matrix(dims=dims, zeros=s.default)) for s in select]
for row, coord, agg in aggs_iterator(aggs, decoders):
for s, m in matricies:
try:
v = s.pull(agg)
m[coord] = v
except Exception as e:
# THIS HAPPENS WHEN ES RETURNS MORE TUPLE COMBINATIONS THAN DOCUMENTS
if agg.get('doc_count') != 0:
Log.error("Programmer error", cause=e)
cube = Cube(
query.select,
sort_using_key(new_edges, key=lambda e: e.dim), # ENSURE EDGES ARE IN SAME ORDER AS QUERY
{s.name: m for s, m in matricies}
)
cube.frum = query
return cube
def format_cube_from_aggop(decoders, aggs, start, query, select):
agg = drill(aggs)
matricies = [(s, Matrix(dims=[], zeros=s.default)) for s in select]
for s, m in matricies:
m[tuple()] = s.pull(agg)
cube = Cube(query.select, [], {s.name: m for s, m in matricies})
cube.frum = query
return cube
def format_table(decoders, aggs, start, query, select):
new_edges = count_dim(aggs, decoders)
header = new_edges.name + select.name
def data():
dims = tuple(len(e.domain.partitions) + (0 if e.allowNulls is False else 1) for e in new_edges)
is_sent = Matrix(dims=dims, zeros=0)
if query.sort and not query.groupby:
all_coord = is_sent._all_combos() # TRACK THE EXPECTED COMBINATIONS
for row, coord, agg in aggs_iterator(aggs, decoders):
missing_coord = all_coord.next()
while coord != missing_coord:
record = [d.get_value(missing_coord[i]) for i, d in enumerate(decoders)]
for s in select:
if s.aggregate == "count":
record.append(0)
else:
record.append(None)
yield record
missing_coord = all_coord.next()
output = [d.get_value(c) for c, d in zip(coord, decoders)]
for s in select:
output.append(s.pull(agg))
yield output
else:
for row, coord, agg in aggs_iterator(aggs, decoders):
is_sent[coord] = 1
output = [d.get_value(c) for c, d in zip(coord, decoders)]
for s in select:
output.append(s.pull(agg))
yield output
# EMIT THE MISSING CELLS IN THE CUBE
if not query.groupby:
for c, v in is_sent:
if not v:
record = [d.get_value(c[i]) for i, d in enumerate(decoders)]
for s in select:
if s.aggregate == "count":
record.append(0)
else:
record.append(None)
yield record
return Data(
meta={"format": "table"},
header=header,
data=list(data())
)
def format_table_from_groupby(decoders, aggs, start, query, select):
header = [d.edge.name.replace("\\.", ".") for d in decoders] + select.name
def data():
for row, coord, agg in aggs_iterator(aggs, decoders):
if agg.get('doc_count', 0) == 0:
continue
output = [d.get_value_from_row(row) for d in decoders]
for s in select:
output.append(s.pull(agg))
yield output
return Data(
meta={"format": "table"},
header=header,
data=list(data())
)
def format_table_from_aggop(decoders, aggs, start, query, select):
header = select.name
agg = drill(aggs)
row = []
for s in select:
row.append(s.pull(agg))
return Data(
meta={"format": "table"},
header=header,
data=[row]
)
def format_tab(decoders, aggs, start, query, select):
table = format_table(decoders, aggs, start, query, select)
def data():
yield "\t".join(map(quote, table.header))
for d in table.data:
yield "\t".join(map(quote, d))
return data()
def format_csv(decoders, aggs, start, query, select):
table = format_table(decoders, aggs, start, query, select)
def data():
yield ", ".join(map(quote, table.header))
for d in table.data:
yield ", ".join(map(quote, d))
return data()
def format_list_from_groupby(decoders, aggs, start, query, select):
def data():
for row, coord, agg in aggs_iterator(aggs, decoders):
if agg.get('doc_count', 0) == 0:
continue
output = Data()
for g, d in zip(query.groupby, decoders):
output[coalesce(g.put.name, g.name)] = d.get_value_from_row(row)
for s in select:
output[s.name] = s.pull(agg)
yield output
for g in query.groupby:
g.put.name = coalesce(g.put.name, g.name)
output = Data(
meta={"format": "list"},
data=list(data())
)
return output
def format_list(decoders, aggs, start, query, select):
new_edges = count_dim(aggs, decoders)
def data():
dims = tuple(len(e.domain.partitions) + (0 if e.allowNulls is False else 1) for e in new_edges)
is_sent = Matrix(dims=dims, zeros=0)
if query.sort and not query.groupby:
# TODO: USE THE format_table() TO PRODUCE THE NEEDED VALUES INSTEAD OF DUPLICATING LOGIC HERE
all_coord = is_sent._all_combos() # TRACK THE EXPECTED COMBINATIONS
for _, coord, agg in aggs_iterator(aggs, decoders):
missing_coord = all_coord.next()
while coord != missing_coord:
# INSERT THE MISSING COORDINATE INTO THE GENERATION
output = Data()
for i, d in enumerate(decoders):
output[query.edges[i].name] = d.get_value(missing_coord[i])
for s in select:
if s.aggregate == "count":
output[s.name] = 0
yield output
missing_coord = all_coord.next()
output = Data()
for e, c, d in zip(query.edges, coord, decoders):
output[e.name] = d.get_value(c)
for s in select:
output[s.name] = s.pull(agg)
yield output
else:
for row, coord, agg in aggs_iterator(aggs, decoders):
is_sent[coord] = 1
output = Data()
for e, c, d in zip(query.edges, coord, decoders):
output[e.name] = d.get_value(c)
for s in select:
output[s.name] = s.pull(agg)
yield output
# EMIT THE MISSING CELLS IN THE CUBE
if not query.groupby:
for c, v in is_sent:
if not v:
output = Data()
for i, d in enumerate(decoders):
output[query.edges[i].name] = d.get_value(c[i])
for s in select:
if s.aggregate == "count":
output[s.name] = 0
yield output
output = Data(
meta={"format": "list"},
data=list(data())
)
return output
def format_list_from_aggop(decoders, aggs, start, query, select):
agg = drill(aggs)
if isinstance(query.select, list):
item = Data()
for s in select:
item[s.name] = s.pull(agg)
else:
item = select[0].pull(agg)
if query.edges or query.groupby:
return wrap({
"meta": {"format": "list"},
"data": [item]
})
else:
return wrap({
"meta": {"format": "value"},
"data": item
})
def format_line(decoders, aggs, start, query, select):
list = format_list(decoders, aggs, start, query, select)
def data():
for d in list.data:
yield convert.value2json(d)
return data()
set_default(format_dispatch, {
None: (format_cube, format_table_from_groupby, format_cube_from_aggop, "application/json"),
"cube": (format_cube, format_cube, format_cube_from_aggop, "application/json"),
"table": (format_table, format_table_from_groupby, format_table_from_aggop, "application/json"),
"list": (format_list, format_list_from_groupby, format_list_from_aggop, "application/json"),
# "csv": (format_csv, format_csv_from_groupby, "text/csv"),
# "tab": (format_tab, format_tab_from_groupby, "text/tab-separated-values"),
# "line": (format_line, format_line_from_groupby, "application/json")
})
def _get(v, k, d):
for p in split_field(k):
try:
v = v.get(p)
if v is None:
return d
except Exception:
v = [vv.get(p) for vv in v]
return v

378
vendor/jx_elasticsearch/es14/setop.py поставляемый
Просмотреть файл

@ -1,378 +0,0 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http:# mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from collections import Mapping
from jx_base.domains import ALGEBRAIC
from jx_base.expressions import IDENTITY
from jx_base.query import DEFAULT_LIMIT
from jx_elasticsearch import post as es_post
from jx_elasticsearch.es14.expressions import Variable, LeavesOp
from jx_elasticsearch.es14.util import jx_sort_to_es_sort, es_query_template, es_and, es_or, es_script
from jx_python.containers.cube import Cube
from jx_python.expressions import jx_expression_to_function
from mo_collections.matrix import Matrix
from mo_dots import coalesce, split_field, set_default, Data, unwraplist, literal_field, unwrap, wrap, concat_field, relative_field, join_field, listwrap
from mo_dots.lists import FlatList
from mo_json.typed_encoder import NESTED
from mo_json.typed_encoder import untype_path, unnest_path, untyped
from mo_logs import Log
from mo_math import AND
from mo_math import MAX
from mo_times.timer import Timer
format_dispatch = {}
def is_setop(es, query):
select = listwrap(query.select)
if not query.edges:
isDeep = len(split_field(query.frum.name)) > 1 # LOOKING INTO NESTED WILL REQUIRE A SCRIPT
simpleAgg = AND([s.aggregate in ("count", "none") for s in select]) # CONVERTING esfilter DEFINED PARTS WILL REQUIRE SCRIPT
# NO EDGES IMPLIES SIMPLER QUERIES: EITHER A SET OPERATION, OR RETURN SINGLE AGGREGATE
if simpleAgg or isDeep:
return True
else:
isSmooth = AND((e.domain.type in ALGEBRAIC and e.domain.interval == "none") for e in query.edges)
if isSmooth:
return True
return False
def es_setop(es, query):
schema = query.frum.schema
es_query, filters = es_query_template(schema.query_path[0])
nested_filter = None
set_default(filters[0], query.where.partial_eval().to_esfilter(schema))
es_query.size = coalesce(query.limit, DEFAULT_LIMIT)
es_query.fields = FlatList()
selects = wrap([s.copy() for s in listwrap(query.select)])
new_select = FlatList()
schema = query.frum.schema
# columns = schema.columns
# nested_columns = set(c.names["."] for c in columns if c.nested_path[0] != ".")
es_query.sort = jx_sort_to_es_sort(query.sort, schema)
put_index = 0
for select in selects:
# IF THERE IS A *, THEN INSERT THE EXTRA COLUMNS
if isinstance(select.value, LeavesOp) and isinstance(select.value.term, Variable):
term = select.value.term
leaves = schema.leaves(term.var)
for c in leaves:
full_name = concat_field(select.name, relative_field(untype_path(c.names["."]), term.var))
if c.jx_type == NESTED:
es_query.fields = ["_source"]
new_select.append({
"name": full_name,
"value": Variable(c.es_column),
"put": {"name": literal_field(full_name), "index": put_index, "child": "."},
"pull": get_pull_source(c.es_column)
})
put_index += 1
elif c.nested_path[0] != ".":
pass # THE NESTED PARENT WILL CAPTURE THIS
else:
es_query.fields += [c.es_column]
new_select.append({
"name": full_name,
"value": Variable(c.es_column),
"put": {"name": literal_field(full_name), "index": put_index, "child": "."}
})
put_index += 1
elif isinstance(select.value, Variable):
s_column = select.value.var
# LEAVES OF OBJECT
leaves = schema.leaves(s_column)
nested_selects = {}
if leaves:
if s_column == '.' or any(c.jx_type == NESTED for c in leaves):
# PULL WHOLE NESTED ARRAYS
es_query.fields = ["_source"]
for c in leaves:
if len(c.nested_path) == 1:
jx_name = untype_path(c.names["."])
new_select.append({
"name": select.name,
"value": Variable(c.es_column),
"put": {"name": select.name, "index": put_index, "child": relative_field(jx_name, s_column)},
"pull": get_pull_source(c.es_column)
})
else:
# PULL ONLY WHAT'S NEEDED
for c in leaves:
if len(c.nested_path) == 1:
jx_name = untype_path(c.names["."])
if c.jx_type == NESTED:
es_query.fields = ["_source"]
new_select.append({
"name": select.name,
"value": Variable(c.es_column),
"put": {"name": select.name, "index": put_index, "child": relative_field(jx_name, s_column)},
"pull": get_pull_source(c.es_column)
})
else:
es_query.fields += [c.es_column]
new_select.append({
"name": select.name,
"value": Variable(c.es_column),
"put": {"name": select.name, "index": put_index, "child": relative_field(jx_name, s_column)}
})
else:
if not nested_filter:
where = filters[0].copy()
nested_filter = [where]
for k in filters[0].keys():
filters[0][k] = None
set_default(
filters[0],
es_and([where, es_or(nested_filter)])
)
nested_path = c.nested_path[0]
if nested_path not in nested_selects:
where = nested_selects[nested_path] = Data()
nested_filter += [where]
where.nested.path = nested_path
where.nested.query.match_all = {}
where.nested.inner_hits._source = False
where.nested.inner_hits.fields += [c.es_column]
child = relative_field(untype_path(c.names[schema.query_path[0]]), s_column)
pull = accumulate_nested_doc(nested_path, Variable(relative_field(s_column, unnest_path(nested_path))))
new_select.append({
"name": select.name,
"value": select.value,
"put": {
"name": select.name,
"index": put_index,
"child": child
},
"pull": pull
})
else:
nested_selects[nested_path].nested.inner_hits.fields += [c.es_column]
else:
new_select.append({
"name": select.name,
"value": Variable("$dummy"),
"put": {"name": select.name, "index": put_index, "child": "."}
})
put_index += 1
else:
painless = select.value.partial_eval().to_es_script(schema)
es_query.script_fields[literal_field(select.name)] = es_script(painless.script(schema))
new_select.append({
"name": select.name,
"pull": jx_expression_to_function("fields." + literal_field(select.name)),
"put": {"name": select.name, "index": put_index, "child": "."}
})
put_index += 1
for n in new_select:
if n.pull:
continue
elif isinstance(n.value, Variable):
if es_query.fields[0] == "_source":
es_query.fields = ["_source"]
n.pull = get_pull_source(n.value.var)
else:
n.pull = jx_expression_to_function(concat_field("fields", literal_field(n.value.var)))
else:
Log.error("Do not know what to do")
with Timer("call to ES") as call_timer:
Log.note("{{data}}", data=es_query)
data = es_post(es, es_query, query.limit)
T = data.hits.hits
try:
formatter, groupby_formatter, mime_type = format_dispatch[query.format]
output = formatter(T, new_select, query)
output.meta.timing.es = call_timer.duration
output.meta.content_type = mime_type
output.meta.es_query = es_query
return output
except Exception as e:
Log.error("problem formatting", e)
def accumulate_nested_doc(nested_path, expr=IDENTITY):
"""
:param nested_path: THE PATH USED TO EXTRACT THE NESTED RECORDS
:param expr: FUNCTION USED ON THE NESTED OBJECT TO GET SPECIFIC VALUE
:return: THE DE_TYPED NESTED OBJECT ARRAY
"""
name = literal_field(nested_path)
def output(doc):
acc = []
for h in doc.inner_hits[name].hits.hits:
i = h._nested.offset
obj = Data()
for f, v in h.fields.items():
local_path = untype_path(relative_field(f, nested_path))
obj[local_path] = unwraplist(v)
# EXTEND THE LIST TO THE LENGTH WE REQUIRE
for _ in range(len(acc), i+1):
acc.append(None)
acc[i] = expr(obj)
return acc
return output
def format_list(T, select, query=None):
data = []
if isinstance(query.select, list):
for row in T:
r = Data()
for s in select:
v = s.pull(row)
r[s.put.name][s.put.child] = unwraplist(v)
data.append(r if r else None)
elif isinstance(query.select.value, LeavesOp):
for row in T:
r = Data()
for s in select:
r[s.put.name][s.put.child] = unwraplist(s.pull(row))
data.append(r if r else None)
else:
for row in T:
r = None
for s in select:
v = unwraplist(s.pull(row))
if v is None:
continue
if s.put.child == ".":
r = v
else:
if r is None:
r = Data()
r[s.put.child] = v
data.append(r)
return Data(
meta={"format": "list"},
data=data
)
def format_table(T, select, query=None):
data = []
num_columns = (MAX(select.put.index) + 1)
for row in T:
r = [None] * num_columns
for s in select:
value = unwraplist(s.pull(row))
if value == None:
continue
index, child = s.put.index, s.put.child
if child == ".":
r[index] = value
else:
if r[index] is None:
r[index] = Data()
r[index][child] = value
data.append(r)
header = [None] * num_columns
if isinstance(query.select, Mapping) and not isinstance(query.select.value, LeavesOp):
for s in select:
header[s.put.index] = s.name
else:
for s in select:
if header[s.put.index]:
continue
if s.name == ".":
header[s.put.index] = "."
else:
header[s.put.index] = s.name
return Data(
meta={"format": "table"},
header=header,
data=data
)
def format_cube(T, select, query=None):
table = format_table(T, select, query)
if len(table.data) == 0:
return Cube(
select,
edges=[{"name": "rownum", "domain": {"type": "rownum", "min": 0, "max": 0, "interval": 1}}],
data={h: Matrix(list=[]) for i, h in enumerate(table.header)}
)
cols = transpose(*unwrap(table.data))
return Cube(
select,
edges=[{"name": "rownum", "domain": {"type": "rownum", "min": 0, "max": len(table.data), "interval": 1}}],
data={h: Matrix(list=cols[i]) for i, h in enumerate(table.header)}
)
set_default(format_dispatch, {
None: (format_cube, None, "application/json"),
"cube": (format_cube, None, "application/json"),
"table": (format_table, None, "application/json"),
"list": (format_list, None, "application/json")
})
def get_pull(column):
if column.nested_path[0] == ".":
return concat_field("fields", literal_field(column.es_column))
else:
depth = len(split_field(column.nested_path[0]))
rel_name = split_field(column.es_column)[depth:]
return join_field(["_inner"] + rel_name)
def get_pull_function(column):
return jx_expression_to_function(get_pull(column))
def get_pull_source(es_column):
def output(row):
return untyped(row._source[es_column])
return output
def get_pull_stats(stats_name, median_name):
return jx_expression_to_function({"select": [
{"name": "count", "value": stats_name + ".count"},
{"name": "sum", "value": stats_name + ".sum"},
{"name": "min", "value": stats_name + ".min"},
{"name": "max", "value": stats_name + ".max"},
{"name": "avg", "value": stats_name + ".avg"},
{"name": "sos", "value": stats_name + ".sum_of_squares"},
{"name": "std", "value": stats_name + ".std_deviation"},
{"name": "var", "value": stats_name + ".variance"},
{"name": "median", "value": median_name + ".values.50\\.0"}
]})

135
vendor/jx_elasticsearch/es14/util.py поставляемый
Просмотреть файл

@ -1,135 +0,0 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http:# mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from jx_elasticsearch.es14.expressions import Variable
from mo_dots import wrap
from mo_future import text_type
from mo_json.typed_encoder import STRING, BOOLEAN, NUMBER, OBJECT
from mo_logs import Log
def es_query_template(path):
"""
RETURN TEMPLATE AND PATH-TO-FILTER AS A 2-TUPLE
:param path: THE NESTED PATH (NOT INCLUDING TABLE NAME)
:return:
"""
if not isinstance(path, text_type):
Log.error("expecting path to be a string")
if path != ".":
f0 = {}
f1 = {}
output = wrap({
"query": {"filtered": {"filter": es_and([
f0,
{"nested": {
"path": path,
"filter": f1,
"inner_hits": {"size": 100000}
}}
])}},
"from": 0,
"size": 0,
"sort": []
})
return output, wrap([f0, f1])
else:
f0 = {}
output = wrap({
"query": {"filtered": {"filter": es_and([f0])}},
"from": 0,
"size": 0,
"sort": []
})
return output, wrap([f0])
def jx_sort_to_es_sort(sort, schema):
if not sort:
return []
output = []
for s in sort:
if isinstance(s.value, Variable):
cols = schema.leaves(s.value.var)
if s.sort == -1:
types = OBJECT, STRING, NUMBER, BOOLEAN
else:
types = BOOLEAN, NUMBER, STRING, OBJECT
for type in types:
for c in cols:
if c.jx_type == type:
if s.sort == -1:
output.append({c.es_column: "desc"})
else:
output.append(c.es_column)
else:
from mo_logs import Log
Log.error("do not know how to handle")
return output
# FOR ELASTICSEARCH aggs
aggregates = {
"none": "none",
"one": "count",
"cardinality": "cardinality",
"sum": "sum",
"add": "sum",
"count": "value_count",
"maximum": "max",
"minimum": "min",
"max": "max",
"min": "min",
"mean": "avg",
"average": "avg",
"avg": "avg",
"median": "median",
"percentile": "percentile",
"N": "count",
"s0": "count",
"s1": "sum",
"s2": "sum_of_squares",
"std": "std_deviation",
"stddev": "std_deviation",
"union": "union",
"var": "variance",
"variance": "variance",
"stats": "stats"
}
NON_STATISTICAL_AGGS = {"none", "one"}
def es_and(terms):
return wrap({"and": terms})
def es_or(terms):
return wrap({"or": terms})
def es_not(term):
return wrap({"not": term})
def es_script(term):
return wrap({"script": term})
def es_missing(term):
return {"missing": {"field": term}}

65
vendor/jx_elasticsearch/es52/__init__.py поставляемый
Просмотреть файл

@ -7,26 +7,28 @@
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from jx_base import container
from mo_future import is_text, is_binary
from jx_base import Column, container
from jx_base.container import Container
from jx_base.dimensions import Dimension
from jx_base.expressions import jx_expression
from jx_base.query import QueryOp
from jx_base.language import is_op
from jx_elasticsearch.es52.aggs import es_aggsop, is_aggsop
from jx_elasticsearch.es52.deep import is_deepop, es_deepop
from jx_elasticsearch.es52.setop import is_setop, es_setop
from jx_elasticsearch.es52.deep import es_deepop, is_deepop
from jx_elasticsearch.es52.setop import es_setop, is_setop
from jx_elasticsearch.es52.util import aggregates
from jx_elasticsearch.meta import ElasticsearchMetadata, Table
from jx_python import jx
from mo_dots import Data, unwrap, coalesce, split_field, join_field, wrap, listwrap
from mo_json import value2json
from mo_dots import Data, coalesce, is_list, join_field, listwrap, split_field, startswith_field, unwrap, wrap
from mo_future import sort_using_key
from mo_json import EXISTS, OBJECT, value2json
from mo_json.typed_encoder import EXISTS_TYPE
from mo_kwargs import override
from mo_logs import Log, Except
from mo_logs import Except, Log
from mo_times import Date
from pyLibrary.env import elasticsearch, http
@ -86,6 +88,41 @@ class ES52(Container):
Log.error("Expecting given typed {{typed}} to match {{is_typed}}", typed=typed, is_typed=is_typed)
self.typed = typed
if not typed:
# ADD EXISTENCE COLUMNS
all_paths = {".": None} # MAP FROM path TO parent TO MAKE A TREE
def nested_path_of(v):
if not v:
return []
else:
return [v] + nested_path_of(all_paths[v])
all = sort_using_key(set(step for path in self.snowflake.query_paths for step in path), key=lambda p: len(split_field(p)))
for step in sorted(all):
if step in all_paths:
continue
else:
best = '.'
for candidate in all_paths.keys():
if startswith_field(step, candidate):
if startswith_field(candidate, best):
best = candidate
all_paths[step] = best
for p in all_paths.keys():
nested_path = nested_path_of(all_paths[p])
if not nested_path:
nested_path = ['.']
self.namespace.meta.columns.add(Column(
name=p,
es_column=p,
es_index=self.name,
es_type=OBJECT,
jx_type=EXISTS,
nested_path=nested_path,
last_updated=Date.now()
))
@property
def snowflake(self):
return self._namespace.get_snowflake(self.es.settings.alias)
@ -140,7 +177,7 @@ class ES52(Container):
)
frum = query["from"]
if isinstance(frum, QueryOp):
if is_op(frum, QueryOp):
result = self.query(frum)
q2 = query.copy()
q2.frum = result
@ -161,7 +198,7 @@ class ES52(Container):
Log.error("problem", e)
def addDimension(self, dim):
if isinstance(dim, list):
if is_list(dim):
Log.error("Expecting dimension to be a object, not a list:\n{{dim}}", dim= dim)
self._addDimension(dim, [])
@ -198,12 +235,11 @@ class ES52(Container):
es_index = self.es.cluster.get_index(read_only=False, alias=None, kwargs=self.es.settings)
schema = table.schema
es_filter = jx_expression(command.where).to_esfilter(schema)
# GET IDS OF DOCUMENTS
query = {
"from": command['update'],
"select": ["_id"] + [
"select": [{"value": "_id"}] + [
{"name": k, "value": v}
for k, v in command.set.items()
],
@ -234,7 +270,8 @@ class ES52(Container):
Log.error("could not update: {{error}}", error=[e.error for i in response["items"] for e in i.values() if e.status not in (200, 201)])
# DELETE BY QUERY, IF NEEDED
if '.' in listwrap(command.clear):
if "." in listwrap(command.clear):
es_filter = self.es.cluster.lang[jx_expression(command.where)].to_esfilter(schema)
self.es.delete_record(es_filter)
return

560
vendor/jx_elasticsearch/es52/aggs.py поставляемый
Просмотреть файл

@ -7,34 +7,45 @@
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from collections import deque
from jx_base.domains import SetDomain
from jx_base.expressions import TupleOp, NULL
from jx_base.query import DEFAULT_LIMIT, MAX_LIMIT
from jx_base.expressions import NULL, TupleOp, Variable as Variable_
from jx_base.query import DEFAULT_LIMIT
from jx_base.language import is_op
from jx_elasticsearch import post as es_post
from jx_elasticsearch.es52.decoders import DefaultDecoder, AggsDecoder, ObjectDecoder, DimFieldListDecoder
from jx_elasticsearch.es52.expressions import split_expression_by_depth, AndOp, Variable, NullOp
from jx_elasticsearch.es52.decoders import AggsDecoder
from jx_elasticsearch.es52.es_query import Aggs, ComplexAggs, ExprAggs, FilterAggs, NestedAggs, TermsAggs, simplify
from jx_elasticsearch.es52.expressions import AndOp, ES52, split_expression_by_path
from jx_elasticsearch.es52.painless import Painless
from jx_elasticsearch.es52.setop import get_pull_stats
from jx_elasticsearch.es52.util import aggregates
from jx_python import jx
from jx_python.expressions import jx_expression_to_function
from mo_dots import listwrap, Data, wrap, literal_field, set_default, coalesce, Null, split_field, FlatList, unwrap, unwraplist
from mo_future import text_type
from mo_json.typed_encoder import encode_property, EXISTS
from mo_dots import Data, Null, coalesce, join_field, listwrap, literal_field, unwrap, unwraplist, wrap
from mo_future import first, is_text, text_type
from mo_json import EXISTS, NESTED, OBJECT
from mo_json.typed_encoder import encode_property
from mo_logs import Log
from mo_logs.strings import quote, expand_template
from mo_math import Math, MAX, UNION
from mo_logs.strings import expand_template, quote
import mo_math
from mo_times.timer import Timer
DEBUG = False
COMPARE_TUPLE = """
(a, b)->{
int i=0;
for (dummy in a){ //ONLY THIS FOR LOOP IS ACCEPTED (ALL OTHER FORMS THROW NullPointerException)
if (a[i]==null) return -1*({{dir}});
if (b[i]==null) return 1*({{dir}});
if (a[i]==null){
if (b[i]==null){
return 0;
}else{
return -1*({{dir}});
}//endif
}else if (b[i]==null) return {{dir}};
if (a[i]!=b[i]) {
if (a[i] instanceof Boolean){
@ -82,12 +93,15 @@ def is_aggsop(es, query):
return False
def get_decoders_by_depth(query):
def get_decoders_by_path(query):
"""
RETURN A LIST OF DECODER ARRAYS, ONE ARRAY FOR EACH NESTED DEPTH
RETURN MAP FROM QUERY PATH TO LIST OF DECODER ARRAYS
:param query:
:return:
"""
schema = query.frum.schema
output = FlatList()
output = Data()
if query.edges:
if query.sort and query.format != "cube":
@ -99,7 +113,7 @@ def get_decoders_by_depth(query):
for edge in wrap(coalesce(query.edges, query.groupby, [])):
limit = coalesce(edge.domain.limit, query.limit, DEFAULT_LIMIT)
if edge.value != None and not isinstance(edge.value, NullOp):
if edge.value != None and not edge.value is NULL:
edge = edge.copy()
vars_ = edge.value.vars()
for v in vars_:
@ -119,32 +133,25 @@ def get_decoders_by_depth(query):
for p in edge.domain.partitions:
vars_ |= p.where.vars()
try:
vars_ |= edge.value.vars()
depths = set(len(c.nested_path) - 1 for v in vars_ for c in schema.leaves(v.var))
if -1 in depths:
Log.error(
"Do not know of column {{column}}",
column=unwraplist([v for v in vars_ if schema[v] == None])
)
if len(depths) > 1:
Log.error("expression {{expr|quote}} spans tables, can not handle", expr=edge.value)
max_depth = MAX(depths)
while len(output) <= max_depth:
output.append([])
except Exception as e:
# USUALLY THE SCHEMA IS EMPTY, SO WE ASSUME THIS IS A SIMPLE QUERY
max_depth = 0
output.append([])
vars_ |= edge.value.vars()
depths = set(c.nested_path[0] for v in vars_ for c in schema.leaves(v.var))
if not depths:
Log.error(
"Do not know of column {{column}}",
column=unwraplist([v for v in vars_ if schema[v] == None])
)
if len(depths) > 1:
Log.error("expression {{expr|quote}} spans tables, can not handle", expr=edge.value)
output[max_depth].append(AggsDecoder(edge, query, limit))
decoder = AggsDecoder(edge, query, limit)
output[literal_field(first(depths))] += [decoder]
return output
def sort_edges(query, prop):
ordered_edges = []
remaining_edges = getattr(query, prop)
for s in query.sort:
for s in jx.reverse(query.sort):
for e in remaining_edges:
if e.value == s.value:
if isinstance(e.domain, SetDomain):
@ -158,45 +165,59 @@ def sort_edges(query, prop):
Log.error("Can not sort by {{expr}}, can only sort by an existing edge expression", expr=s.value)
ordered_edges.extend(remaining_edges)
for i, o in enumerate(ordered_edges):
o.dim = i # REORDER THE EDGES
return ordered_edges
def es_aggsop(es, frum, query):
query = query.copy() # WE WILL MARK UP THIS QUERY
schema = frum.schema
query_path = schema.query_path[0]
select = listwrap(query.select)
es_query = Data()
new_select = Data() # MAP FROM canonical_name (USED FOR NAMES IN QUERY) TO SELECT MAPPING
formula = []
for s in select:
if s.aggregate == "count" and isinstance(s.value, Variable) and s.value.var == ".":
if schema.query_path == ".":
s.pull = jx_expression_to_function("doc_count")
else:
s.pull = jx_expression_to_function({"coalesce": ["_nested.doc_count", "doc_count", 0]})
elif isinstance(s.value, Variable):
if is_op(s.value, Variable_):
s.query_path = query_path
if s.aggregate == "count":
new_select["count_"+literal_field(s.value.var)] += [s]
else:
new_select[literal_field(s.value.var)] += [s]
elif s.aggregate:
split_select = split_expression_by_path(s.value, schema, lang=Painless)
for si_key, si_value in split_select.items():
if si_value:
if s.query_path:
Log.error("can not handle more than one depth per select")
s.query_path = si_key
formula.append(s)
for canonical_name, many in new_select.items():
acc = Aggs()
for _, many in new_select.items():
for s in many:
columns = frum.schema.values(s.value.var)
canonical_name = s.name
if s.aggregate in ("value_count", "count"):
columns = frum.schema.values(s.value.var, exclude_type=(OBJECT, NESTED))
else:
columns = frum.schema.values(s.value.var)
if s.aggregate == "count":
canonical_names = []
for column in columns:
cn = literal_field(column.es_column + "_count")
es_name = column.es_column + "_count"
if column.jx_type == EXISTS:
canonical_names.append(cn + ".doc_count")
es_query.aggs[cn].filter.range = {column.es_column: {"gt": 0}}
if column.nested_path[0] == query_path:
canonical_names.append("doc_count")
acc.add(NestedAggs(column.nested_path[0]).add(
ComplexAggs(s)
))
else:
canonical_names.append(cn+ ".value")
es_query.aggs[cn].value_count.field = column.es_column
canonical_names.append("value")
acc.add(NestedAggs(column.nested_path[0]).add(
ExprAggs(es_name, {"value_count": {"field": column.es_column}}, s)
))
if len(canonical_names) == 1:
s.pull = jx_expression_to_function(canonical_names[0])
else:
@ -205,49 +226,48 @@ def es_aggsop(es, frum, query):
if len(columns) > 1:
Log.error("Do not know how to count columns with more than one type (script probably)")
# ES USES DIFFERENT METHOD FOR PERCENTILES
key = literal_field(canonical_name + " percentile")
es_query.aggs[key].percentiles.field = columns[0].es_column
es_query.aggs[key].percentiles.percents += [50]
s.pull = jx_expression_to_function(key + ".values.50\\.0")
key = canonical_name + " percentile"
acc.add(ExprAggs(key, {"percentiles": {
"field": first(columns).es_column,
"percents": [50]
}}, s))
s.pull = jx_expression_to_function("values.50\\.0")
elif s.aggregate == "percentile":
if len(columns) > 1:
Log.error("Do not know how to count columns with more than one type (script probably)")
# ES USES DIFFERENT METHOD FOR PERCENTILES
key = literal_field(canonical_name + " percentile")
if isinstance(s.percentile, text_type) or s.percetile < 0 or 1 < s.percentile:
key = canonical_name + " percentile"
if is_text(s.percentile) or s.percetile < 0 or 1 < s.percentile:
Log.error("Expecting percentile to be a float from 0.0 to 1.0")
percent = Math.round(s.percentile * 100, decimal=6)
percent = mo_math.round(s.percentile * 100, decimal=6)
es_query.aggs[key].percentiles.field = columns[0].es_column
es_query.aggs[key].percentiles.percents += [percent]
es_query.aggs[key].percentiles.tdigest.compression = 2
s.pull = jx_expression_to_function(key + ".values." + literal_field(text_type(percent)))
acc.add(ExprAggs(key, {"percentiles": {
"field": first(columns).es_column,
"percents": [percent],
"tdigest": {"compression": 2}
}}, s))
s.pull = jx_expression_to_function(join_field(["values", text_type(percent)]))
elif s.aggregate == "cardinality":
canonical_names = []
for column in columns:
cn = literal_field(column.es_column + "_cardinality")
canonical_names.append(cn)
es_query.aggs[cn].cardinality.field = column.es_column
if len(columns) == 1:
s.pull = jx_expression_to_function(canonical_names[0] + ".value")
else:
s.pull = jx_expression_to_function({"add": [cn + ".value" for cn in canonical_names], "default": 0})
path = column.es_column + "_cardinality"
acc.add(ExprAggs(path, {"cardinality": {"field": column.es_column}}, s))
s.pull = jx_expression_to_function("value")
elif s.aggregate == "stats":
if len(columns) > 1:
Log.error("Do not know how to count columns with more than one type (script probably)")
# REGULAR STATS
stats_name = literal_field(canonical_name)
es_query.aggs[stats_name].extended_stats.field = columns[0].es_column
complex = ComplexAggs(s).add(ExprAggs(canonical_name, {"extended_stats": {"field": first(columns).es_column}}, None))
# GET MEDIAN TOO!
median_name = literal_field(canonical_name + "_percentile")
es_query.aggs[median_name].percentiles.field = columns[0].es_column
es_query.aggs[median_name].percentiles.percents += [50]
complex.add(ExprAggs(canonical_name + "_percentile", {"percentiles": {
"field": first(columns).es_column,
"percents": [50]
}}, None))
acc.add(complex)
s.pull = get_pull_stats(stats_name, median_name)
elif s.aggregate == "union":
pulls = []
for column in columns:
script = {"scripted_metric": {
'init_script': 'params._agg.terms = new HashSet()',
@ -255,41 +275,61 @@ def es_aggsop(es, frum, query):
'combine_script': 'return params._agg.terms.toArray()',
'reduce_script': 'HashSet output = new HashSet(); for (a in params._aggs) { if (a!=null) for (v in a) {output.add(v)} } return output.toArray()',
}}
stats_name = column.es_column
acc.add(NestedAggs(column.nested_path[0]).add(ExprAggs(stats_name, script, s)))
s.pull = jx_expression_to_function("value")
elif s.aggregate == "count_values":
# RETURN MAP FROM VALUE TO THE NUMBER OF TIMES FOUND IN THE DOCUMENTS
# NOT A NESTED DOC, RATHER A MULTIVALUE FIELD
for column in columns:
script = {"scripted_metric": {
'params': {"_agg": {}},
'init_script': 'params._agg.terms = new HashMap()',
'map_script': 'for (v in doc['+quote(column.es_column)+'].values) params._agg.terms.put(v, Optional.ofNullable(params._agg.terms.get(v)).orElse(0)+1);',
'combine_script': 'return params._agg.terms',
'reduce_script': '''
HashMap output = new HashMap();
for (agg in params._aggs) {
if (agg!=null){
for (e in agg.entrySet()) {
String key = String.valueOf(e.getKey());
output.put(key, e.getValue() + Optional.ofNullable(output.get(key)).orElse(0));
}
}
}
return output;
'''
}}
stats_name = encode_property(column.es_column)
if column.nested_path[0] == ".":
es_query.aggs[stats_name] = script
pulls.append(jx_expression_to_function(stats_name + ".value"))
else:
es_query.aggs[stats_name] = {
"nested": {"path": column.nested_path[0]},
"aggs": {"_nested": script}
}
pulls.append(jx_expression_to_function(stats_name + "._nested.value"))
if len(pulls) == 0:
s.pull = NULL
elif len(pulls) == 1:
s.pull = pulls[0]
else:
s.pull = lambda row: UNION(p(row) for p in pulls)
acc.add(NestedAggs(column.nested_path[0]).add(ExprAggs(stats_name, script, s)))
s.pull = jx_expression_to_function("value")
else:
if len(columns) > 1:
Log.error("Do not know how to count columns with more than one type (script probably)")
elif len(columns) <1:
# PULL VALUE OUT OF THE stats AGGREGATE
s.pull = jx_expression_to_function({"null":{}})
if not columns:
s.pull = jx_expression_to_function(NULL)
else:
# PULL VALUE OUT OF THE stats AGGREGATE
es_query.aggs[literal_field(canonical_name)].extended_stats.field = columns[0].es_column
s.pull = jx_expression_to_function({"coalesce": [literal_field(canonical_name) + "." + aggregates[s.aggregate], s.default]})
for c in columns:
acc.add(NestedAggs(c.nested_path[0]).add(
ExprAggs(canonical_name, {"extended_stats": {"field": c.es_column}}, s)
))
s.pull = jx_expression_to_function(aggregates[s.aggregate])
for i, s in enumerate(formula):
canonical_name = literal_field(s.name)
s_path = [k for k, v in split_expression_by_path(s.value, schema=schema, lang=Painless).items() if v]
if len(s_path) == 0:
# FOR CONSTANTS
nest = NestedAggs(query_path)
acc.add(nest)
elif len(s_path) == 1:
nest = NestedAggs(first(s_path))
acc.add(nest)
else:
Log.error("do not know how to handle")
if isinstance(s.value, TupleOp):
canonical_name = s.name
if is_op(s.value, TupleOp):
if s.aggregate == "count":
# TUPLES ALWAYS EXIST, SO COUNTING THEM IS EASY
s.pull = "doc_count"
s.pull = jx_expression_to_function("doc_count")
elif s.aggregate in ('max', 'maximum', 'min', 'minimum'):
if s.aggregate in ('max', 'maximum'):
dir = 1
@ -298,136 +338,110 @@ def es_aggsop(es, frum, query):
dir = -1
op = 'min'
nully = TupleOp("tuple", [NULL]*len(s.value.terms)).partial_eval().to_es_script(schema).expr
selfy = s.value.partial_eval().to_es_script(schema).expr
nully = Painless[TupleOp([NULL]*len(s.value.terms))].partial_eval().to_es_script(schema)
selfy = text_type(Painless[s.value].partial_eval().to_es_script(schema))
script = {"scripted_metric": {
'init_script': 'params._agg.best = ' + nully + ';',
'map_script': 'params._agg.best = ' + expand_template(MAX_OF_TUPLE, {"expr1": "params._agg.best", "expr2": selfy, "dir": dir, "op": op}) + ";",
'combine_script': 'return params._agg.best',
'reduce_script': 'return params._aggs.stream().max(' + expand_template(COMPARE_TUPLE, {"dir": dir, "op": op}) + ').get()',
'reduce_script': 'return params._aggs.stream().'+op+'(' + expand_template(COMPARE_TUPLE, {"dir": dir, "op": op}) + ').get()',
}}
if schema.query_path[0] == ".":
es_query.aggs[canonical_name] = script
s.pull = jx_expression_to_function(literal_field(canonical_name) + ".value")
else:
es_query.aggs[canonical_name] = {
"nested": {"path": schema.query_path[0]},
"aggs": {"_nested": script}
}
s.pull = jx_expression_to_function(literal_field(canonical_name) + "._nested.value")
nest.add(NestedAggs(query_path).add(
ExprAggs(canonical_name, script, s)
))
s.pull = jx_expression_to_function("value")
else:
Log.error("{{agg}} is not a supported aggregate over a tuple", agg=s.aggregate)
Log.error("{{agg}} is not a supported aggregate over a tuple", agg=s.aggregate)
elif s.aggregate == "count":
es_query.aggs[literal_field(canonical_name)].value_count.script = s.value.partial_eval().to_es_script(schema).script(schema)
s.pull = jx_expression_to_function(literal_field(canonical_name) + ".value")
nest.add(ExprAggs(canonical_name, {"value_count": {"script": text_type(Painless[s.value].partial_eval().to_es_script(schema))}}, s))
s.pull = jx_expression_to_function("value")
elif s.aggregate == "median":
# ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT
key = literal_field(canonical_name + " percentile")
es_query.aggs[key].percentiles.script = s.value.to_es_script(schema).script(schema)
es_query.aggs[key].percentiles.percents += [50]
s.pull = jx_expression_to_function(key + ".values.50\\.0")
nest.add(ExprAggs(key, {"percentiles": {
"script": text_type(Painless[s.value].to_es_script(schema)),
"percents": [50]
}}, s))
s.pull = jx_expression_to_function(join_field(["50.0"]))
elif s.aggregate == "percentile":
# ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT
key = literal_field(canonical_name + " percentile")
percent = Math.round(s.percentile * 100, decimal=6)
es_query.aggs[key].percentiles.script = s.value.to_es_script(schema).script(schema)
es_query.aggs[key].percentiles.percents += [percent]
s.pull = jx_expression_to_function(key + ".values." + literal_field(text_type(percent)))
percent = mo_math.round(s.percentile * 100, decimal=6)
nest.add(ExprAggs(key, {"percentiles": {
"script": text_type(Painless[s.value].to_es_script(schema)),
"percents": [percent]
}}, s))
s.pull = jx_expression_to_function(join_field(["values", text_type(percent)]))
elif s.aggregate == "cardinality":
# ES USES DIFFERENT METHOD FOR CARDINALITY
key = canonical_name + " cardinality"
es_query.aggs[key].cardinality.script = s.value.to_es_script(schema).script(schema)
s.pull = jx_expression_to_function(key + ".value")
nest.add(ExprAggs(key, {"cardinality": {"script": text_type(Painless[s.value].to_es_script(schema))}}, s))
s.pull = jx_expression_to_function("value")
elif s.aggregate == "stats":
# REGULAR STATS
stats_name = literal_field(canonical_name)
es_query.aggs[stats_name].extended_stats.script = s.value.to_es_script(schema).script(schema)
stats_name = canonical_name
nest.add(ComplexAggs(s).add(ExprAggs(stats_name, {"extended_stats": {"script": text_type(Painless[s.value].to_es_script(schema))}}, None)))
# GET MEDIAN TOO!
median_name = literal_field(canonical_name + " percentile")
es_query.aggs[median_name].percentiles.script = s.value.to_es_script(schema).script(schema)
es_query.aggs[median_name].percentiles.percents += [50]
s.pull = get_pull_stats(stats_name, median_name)
median_name = canonical_name + " percentile"
nest.add(ExprAggs(median_name, {"percentiles": {
"script": text_type(Painless[s.value].to_es_script(schema)),
"percents": [50]
}}, s))
s.pull = get_pull_stats(None, stats_name, median_name)
elif s.aggregate == "union":
# USE TERMS AGGREGATE TO SIMULATE union
stats_name = literal_field(canonical_name)
es_query.aggs[stats_name].terms.script_field = s.value.to_es_script(schema).script(schema)
s.pull = jx_expression_to_function(stats_name + ".buckets.key")
nest.add(TermsAggs(canonical_name, {"script_field": text_type(Painless[s.value].to_es_script(schema))}, s))
s.pull = jx_expression_to_function("key")
else:
# PULL VALUE OUT OF THE stats AGGREGATE
s.pull = jx_expression_to_function(canonical_name + "." + aggregates[s.aggregate])
es_query.aggs[canonical_name].extended_stats.script = s.value.to_es_script(schema).script(schema)
s.pull = jx_expression_to_function(aggregates[s.aggregate])
nest.add(ExprAggs(canonical_name, {"extended_stats": {"script": text_type(Painless[s.value].to_es_script(schema))}}, s))
acc = NestedAggs(query_path).add(acc)
split_decoders = get_decoders_by_path(query)
split_wheres = split_expression_by_path(query.where, schema=frum.schema, lang=ES52)
decoders = get_decoders_by_depth(query)
start = 0
decoders = [None] * (len(query.edges) + len(query.groupby))
paths = list(reversed(sorted(split_wheres.keys() | split_decoders.keys())))
for path in paths:
literal_path = literal_field(path)
decoder = split_decoders[literal_path]
where = split_wheres[literal_path]
# <TERRIBLE SECTION> THIS IS WHERE WE WEAVE THE where CLAUSE WITH nested
split_where = split_expression_by_depth(query.where, schema=frum.schema)
if len(split_field(frum.name)) > 1:
if any(split_where[2::]):
Log.error("Where clause is too deep")
for d in decoders[1]:
es_query = d.append_query(es_query, start)
for d in decoder:
decoders[d.edge.dim] = d
acc = d.append_query(path, acc)
start += d.num_columns
if split_where[1]:
#TODO: INCLUDE FILTERS ON EDGES
filter_ = AndOp("and", split_where[1]).to_esfilter(schema)
es_query = Data(
aggs={"_filter": set_default({"filter": filter_}, es_query)}
)
if where:
acc = FilterAggs("_filter", AndOp(where), None).add(acc)
acc = NestedAggs(path).add(acc)
es_query = wrap({
"aggs": {"_nested": set_default(
{"nested": {"path": schema.query_path[0]}},
es_query
)}
})
else:
if any(split_where[1::]):
Log.error("Where clause is too deep")
if decoders:
for d in jx.reverse(decoders[0]):
es_query = d.append_query(es_query, start)
start += d.num_columns
if split_where[0]:
#TODO: INCLUDE FILTERS ON EDGES
filter = AndOp("and", split_where[0]).to_esfilter(schema)
es_query = Data(
aggs={"_filter": set_default({"filter": filter}, es_query)}
)
# </TERRIBLE SECTION>
if not es_query:
es_query = wrap({"query": {"match_all": {}}})
acc = NestedAggs('.').add(acc)
acc = simplify(acc)
es_query = wrap(acc.to_es(schema))
es_query.size = 0
with Timer("ES query time") as es_duration:
with Timer("ES query time", silent=not DEBUG) as es_duration:
result = es_post(es, es_query, query.limit)
try:
format_time = Timer("formatting")
format_time = Timer("formatting", silent=not DEBUG)
with format_time:
decoders = [d for ds in decoders for d in ds]
result.aggregations.doc_count = coalesce(result.aggregations.doc_count, result.hits.total) # IT APPEARS THE OLD doc_count IS GONE
# result.aggregations.doc_count = coalesce(result.aggregations.doc_count, result.hits.total) # IT APPEARS THE OLD doc_count IS GONE
aggs = unwrap(result.aggregations)
formatter, groupby_formatter, aggop_formatter, mime_type = format_dispatch[query.format]
if query.edges:
output = formatter(decoders, result.aggregations, start, query, select)
output = formatter(aggs, acc, query, decoders, select)
elif query.groupby:
output = groupby_formatter(decoders, result.aggregations, start, query, select)
output = groupby_formatter(aggs, acc, query, decoders, select)
else:
output = aggop_formatter(decoders, result.aggregations, start, query, select)
output = aggop_formatter(aggs, acc, query, decoders, select)
output.meta.timing.formatting = format_time.duration
output.meta.timing.es_search = es_duration.duration
@ -445,89 +459,121 @@ EMPTY_LIST = []
def drill(agg):
deeper = agg.get("_filter") or agg.get("_nested")
while deeper:
agg = deeper
deeper = agg.get("_filter") or agg.get("_nested")
return agg
while True:
deeper = agg.get("_filter")
if deeper:
agg = deeper
continue
return agg
def aggs_iterator(aggs, decoders, coord=True):
def _children(agg, children):
for child in children:
name = child.name
v = agg[name]
if name == "_match":
for i, b in enumerate(v.get("buckets", EMPTY_LIST)):
yield i, b, child, b
elif name.startswith("_match"):
i = int(name[6:])
yield i, v, child, v
elif name.startswith("_missing"):
if len(name) == 8:
i = None
else:
i = int(name[8:])
yield None, v, child, v
else:
yield None, v, child, None
def aggs_iterator(aggs, es_query, decoders, give_me_zeros=False):
"""
DIG INTO ES'S RECURSIVE aggs DATA-STRUCTURE:
RETURN AN ITERATOR OVER THE EFFECTIVE ROWS OF THE RESULTS
:param aggs: ES AGGREGATE OBJECT
:param decoders:
:param coord: TURN ON LOCAL COORDINATE LOOKUP
:param es_query: THE ABSTRACT ES QUERY WE WILL TRACK ALONGSIDE aggs
:param decoders: TO CONVERT PARTS INTO COORDINATES
"""
depth = max(d.start + d.num_columns for d in decoders)
coord = [0] * len(decoders)
parts = deque()
stack = []
def _aggs_iterator(agg, d):
agg = drill(agg)
gen = _children(aggs, es_query.children)
while True:
try:
index, c_agg, c_query, part = gen.next()
except StopIteration:
try:
gen = stack.pop()
except IndexError:
return
parts.popleft()
continue
if d > 0:
for k, v in agg.items():
if k == "_match":
v = drill(v)
for i, b in enumerate(v.get("buckets", EMPTY_LIST)):
b["_index"] = i
for a, parts in _aggs_iterator(b, d - 1):
yield a, parts + (b,)
elif k == "_other":
for b in v.get("buckets", EMPTY_LIST):
for a, parts in _aggs_iterator(b, d - 1):
yield a, parts + (Null,)
elif k == "_missing":
b = drill(v)
for a, parts in _aggs_iterator(b, d - 1):
yield a, parts + (b,)
elif k.startswith("_join_"):
v["key"] = int(k[6:])
for a, parts in _aggs_iterator(v, d - 1):
yield a, parts + (v,)
else:
for k, v in agg.items():
if k == "_match":
v = drill(v)
for i, b in enumerate(v.get("buckets", EMPTY_LIST)):
b["_index"] = i
yield b, (b,)
elif k == "_other":
for b in v.get("buckets", EMPTY_LIST):
yield b, (Null,)
elif k == "_missing":
b = drill(v,)
yield b, (v,)
elif k.startswith("_join_"):
v["_index"] = int(k[6:])
yield v, (v,)
if c_agg.get('doc_count') == 0 and not give_me_zeros:
continue
parts.appendleft(part)
for d in c_query.decoders:
coord[d.edge.dim] = d.get_index(tuple(p for p in parts if p is not None), c_query, index)
if coord:
for a, parts in _aggs_iterator(unwrap(aggs), depth - 1):
coord = tuple(d.get_index(parts) for d in decoders)
if any(c is None for c in coord):
children = c_query.children
selects = c_query.selects
if selects or not children:
parts.popleft() # c_agg WAS ON TOP
yield (
tuple(p for p in parts if p is not None),
tuple(coord),
c_agg,
selects
)
continue
stack.append(gen)
gen = _children(c_agg, children)
def count_dim(aggs, es_query, decoders):
if not any(hasattr(d, "done_count") for d in decoders):
return [d.edge for d in decoders]
def _count_dim(parts, aggs, es_query):
children = es_query.children
if not children:
return
for child in children:
name = child.name
agg = aggs[name]
if agg.get('doc_count') == 0:
continue
yield parts, coord, a
else:
for a, parts in _aggs_iterator(unwrap(aggs), depth - 1):
yield parts, None, a
elif name == "_match":
for i, b in enumerate(agg.get("buckets", EMPTY_LIST)):
if not b.get('doc_count'):
continue
b["_index"] = i
new_parts = (b,) + parts
for d in child.decoders:
d.count(new_parts)
_count_dim(new_parts, b, child)
elif name.startswith("_missing"):
new_parts = (agg,) + parts
for d in child.decoders:
d.count(new_parts)
_count_dim(new_parts, agg, child)
else:
_count_dim(parts, agg, child)
def count_dim(aggs, decoders):
if any(isinstance(d, (DefaultDecoder, DimFieldListDecoder, ObjectDecoder)) for d in decoders):
# ENUMERATE THE DOMAINS, IF UNKNOWN AT QUERY TIME
for row, coord, agg in aggs_iterator(aggs, decoders, coord=False):
for d in decoders:
d.count(row)
for d in decoders:
d.done_count()
new_edges = wrap([d.edge for d in decoders])
return new_edges
_count_dim(tuple(), aggs, es_query)
for d in decoders:
done_count = getattr(d, "done_count", Null)
done_count()
return [d.edge for d in decoders]
format_dispatch = {}
from jx_elasticsearch.es52.format import format_cube
from jx_elasticsearch.es52.format import format_cube
_ = format_cube

568
vendor/jx_elasticsearch/es52/decoders.py поставляемый
Просмотреть файл

@ -7,26 +7,29 @@
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from collections import Mapping
from __future__ import absolute_import, division, unicode_literals
from mo_future import is_text, is_binary
from jx_base.dimensions import Dimension
from jx_base.domains import SimpleSetDomain, DefaultDomain, PARTITION
from jx_base.expressions import TupleOp, TRUE
from jx_base.query import MAX_LIMIT, DEFAULT_LIMIT
from jx_elasticsearch.es52.expressions import Variable, NotOp, InOp, Literal, AndOp, InequalityOp, LeavesOp, LIST_TO_PIPE
from jx_elasticsearch.es52.util import es_missing
from jx_base.domains import DefaultDomain, PARTITION, SimpleSetDomain
from jx_base.expressions import ExistsOp, FirstOp, GtOp, GteOp, LeavesOp, LtOp, LteOp, MissingOp, TupleOp, Variable
from jx_base.query import DEFAULT_LIMIT, MAX_LIMIT
from jx_base.language import is_op
from jx_elasticsearch.es52.es_query import Aggs, FilterAggs, FiltersAggs, NestedAggs, RangeAggs, TermsAggs
from jx_elasticsearch.es52.expressions import AndOp, InOp, Literal, NotOp
from jx_elasticsearch.es52.painless import LIST_TO_PIPE, Painless
from jx_elasticsearch.es52.util import pull_functions
from jx_python import jx
from mo_dots import wrap, set_default, coalesce, literal_field, Data, relative_field, unwraplist
from mo_future import text_type, transpose
from mo_json.typed_encoder import untype_path, STRING, NUMBER, BOOLEAN
from mo_dots import Data, coalesce, concat_field, is_data, literal_field, relative_field, set_default, wrap
from mo_future import first, text_type, transpose
from mo_json import EXISTS, OBJECT, STRING
from mo_json.typed_encoder import EXISTS_TYPE, NESTED_TYPE, untype_path
from mo_logs import Log
from mo_logs.strings import quote, expand_template
from mo_math import MAX, MIN, Math
from pyLibrary.convert import string2boolean
from mo_logs.strings import expand_template, quote
import mo_math
from mo_math import MAX, MIN
DEBUG = False
class AggsDecoder(object):
@ -37,91 +40,100 @@ class AggsDecoder(object):
# if query.groupby:
# return object.__new__(DefaultDecoder, e)
if isinstance(e.value, text_type):
if is_text(e.value):
Log.error("Expecting Variable or Expression, not plain string")
if isinstance(e.value, LeavesOp):
return object.__new__(ObjectDecoder, e)
elif isinstance(e.value, TupleOp):
if is_op(e.value, LeavesOp):
return object.__new__(ObjectDecoder)
elif is_op(e.value, TupleOp):
# THIS domain IS FROM A dimension THAT IS A SIMPLE LIST OF fields
# JUST PULL THE FIELDS
if not all(isinstance(t, Variable) for t in e.value.terms):
if not all(is_op(t, Variable) for t in e.value.terms):
Log.error("Can only handle variables in tuples")
e.domain = Data(
dimension={"fields": e.value.terms}
)
return object.__new__(DimFieldListDecoder, e)
return object.__new__(DimFieldListDecoder)
elif isinstance(e.value, Variable):
elif is_op(e.value, Variable):
schema = query.frum.schema
cols = schema.leaves(e.value.var)
if not cols:
return object.__new__(DefaultDecoder, e)
return object.__new__(DefaultDecoder)
if len(cols) != 1:
return object.__new__(ObjectDecoder, e)
col = cols[0]
return object.__new__(ObjectDecoder)
col = first(cols)
limit = coalesce(e.domain.limit, query.limit, DEFAULT_LIMIT)
if col.partitions != None:
if col.multi > 1 and len(col.partitions) < 6:
if col.cardinality == None:
DEBUG and Log.warning(
"metadata for column {{name|quote}} (id={{id}}) is not ready",
name=concat_field(col.es_index, col.es_column),
id=id(col)
)
e.domain = set_default(DefaultDomain(limit=limit), e.domain.__data__())
return object.__new__(DefaultDecoder)
elif col.partitions == None:
e.domain = set_default(DefaultDomain(limit=limit), e.domain.__data__())
return object.__new__(DefaultDecoder)
else:
DEBUG and Log.note("id={{id}} has parts!!!", id=id(col))
if col.multi > 1 and len(col.partitions) < 10:
return object.__new__(MultivalueDecoder)
partitions = col.partitions[:limit:]
if e.domain.sort==-1:
if e.domain.sort == -1:
partitions = list(reversed(sorted(partitions)))
else:
partitions = sorted(partitions)
e.domain = SimpleSetDomain(partitions=partitions, limit=limit)
else:
e.domain = set_default(DefaultDomain(limit=limit), e.domain.__data__())
return object.__new__(DefaultDecoder, e)
else:
return object.__new__(DefaultDecoder, e)
return object.__new__(DefaultDecoder)
if e.value and e.domain.type in PARTITION:
return object.__new__(SetDecoder, e)
return object.__new__(SetDecoder)
if isinstance(e.domain.dimension, Dimension):
e.domain = e.domain.dimension.getDomain()
return object.__new__(SetDecoder, e)
return object.__new__(SetDecoder)
if e.value and e.domain.type == "time":
return object.__new__(TimeDecoder, e)
return object.__new__(TimeDecoder)
if e.range:
return object.__new__(GeneralRangeDecoder, e)
return object.__new__(GeneralRangeDecoder)
if e.value and e.domain.type == "duration":
return object.__new__(DurationDecoder, e)
return object.__new__(DurationDecoder)
elif e.value and e.domain.type == "range":
return object.__new__(RangeDecoder, e)
return object.__new__(RangeDecoder)
elif not e.value and e.domain.dimension.fields:
# THIS domain IS FROM A dimension THAT IS A SIMPLE LIST OF fields
# JUST PULL THE FIELDS
fields = e.domain.dimension.fields
if isinstance(fields, Mapping):
if is_data(fields):
Log.error("No longer allowed: All objects are expressions")
else:
return object.__new__(DimFieldListDecoder, e)
return object.__new__(DimFieldListDecoder)
elif not e.value and all(e.domain.partitions.where):
return object.__new__(GeneralSetDecoder, e)
return object.__new__(GeneralSetDecoder)
else:
Log.error("domain type of {{type}} is not supported yet", type=e.domain.type)
def __init__(self, edge, query, limit):
self.start = None
self.edge = edge
self.name = literal_field(self.edge.name)
self.query = query
self.limit = limit
self.schema = self.query.frum.schema
def append_query(self, es_query, start):
def append_query(self, query_path, es_query):
Log.error("Not supported")
def count(self, row):
pass
def done_count(self):
pass
# DO NOT IMPLEMENT IF domain HAS KNOWN PARTITIONS
# def done_count(self):
# pass
def get_value_from_row(self, row):
raise NotImplementedError()
@ -129,7 +141,7 @@ class AggsDecoder(object):
def get_value(self, index):
raise NotImplementedError()
def get_index(self, row):
def get_index(self, row, es_query=None, index=None):
raise NotImplementedError()
@property
@ -155,70 +167,74 @@ class SetDecoder(AggsDecoder):
parts = jx.sort(domain.partitions, {"value": domain.key, "sort": s.sort})
edge.domain = self.domain = SimpleSetDomain(key=domain.key, label=domain.label, partitions=parts)
def append_query(self, es_query, start):
self.start = start
def append_query(self, query_path, es_query):
domain = self.domain
domain_key = domain.key
include, text_include = transpose(*(
(
float(v) if isinstance(v, (int, float)) else v,
text_type(float(v)) if isinstance(v, (int, float)) else v
)
for v in (p[domain_key] for p in domain.partitions)
))
value = self.edge.value
exists = AndOp("and", [
value.exists(),
InOp("in", [value, Literal("literal", include)])
]).partial_eval()
value = Painless[self.edge.value]
cnv = pull_functions[value.type]
include = tuple(cnv(p[domain_key]) for p in domain.partitions)
exists = Painless[AndOp([
InOp([value, Literal(include)])
])].partial_eval()
limit = coalesce(self.limit, len(domain.partitions))
if isinstance(value, Variable):
es_field = self.query.frum.schema.leaves(value.var)[0].es_column # ALREADY CHECKED THERE IS ONLY ONE
terms = set_default({"terms": {
"field": es_field,
"size": limit,
"order": {"_term": self.sorted} if self.sorted else None
}}, es_query)
else:
terms = set_default({"terms": {
"script": {
"lang": "painless",
"inline": value.to_es_script(self.schema).script(self.schema)
if is_op(value, Variable):
es_field = first(self.query.frum.schema.leaves(value.var)).es_column # ALREADY CHECKED THERE IS ONLY ONE
match = TermsAggs(
"_match",
{
"field": es_field,
"size": limit,
"order": {"_term": self.sorted} if self.sorted else None
},
"size": limit
}}, es_query)
if self.edge.allowNulls:
missing = set_default(
{"filter": NotOp("not", exists).to_esfilter(self.schema)},
es_query
self
)
else:
missing = None
match = TermsAggs(
"_match",
{
"script": text_type(value.to_es_script(self.schema)),
"size": limit
},
self
)
output = Aggs().add(FilterAggs("_filter", exists, None).add(match.add(es_query)))
return wrap({"aggs": {
"_match": {
"filter": exists.to_esfilter(self.schema),
"aggs": {
"_filter": terms
}
},
"_missing": missing
}})
if self.edge.allowNulls:
# FIND NULLS AT EACH NESTED LEVEL
for p in self.schema.query_path:
if p == query_path:
# MISSING AT THE QUERY DEPTH
output.add(
NestedAggs(p).add(FilterAggs("_missing0", NotOp(exists), self).add(es_query))
)
else:
# PARENT HAS NO CHILDREN, SO MISSING
column = first(self.schema.values(query_path, (OBJECT, EXISTS)))
output.add(
NestedAggs(column.nested_path[0]).add(
FilterAggs(
"_missing1",
NotOp(ExistsOp(Variable(column.es_column.replace(NESTED_TYPE, EXISTS_TYPE)))),
self
).add(es_query)
)
)
return output
def get_value(self, index):
return self.domain.getKeyByIndex(index)
def get_value_from_row(self, row):
return self.pull(row[self.start].get('key'))
def get_value_from_row(self, parts):
key = parts[0].get('key')
return self.pull(key)
def get_index(self, row):
def get_index(self, row, es_query=None, index=None):
try:
part = row[self.start]
return self.domain.getIndexByKey(part.get('key'))
key = row[0].get('key')
return self.domain.getIndexByKey(key)
except Exception as e:
Log.error("problem", cause=e)
@ -227,52 +243,43 @@ class SetDecoder(AggsDecoder):
return 1
def _range_composer(edge, domain, es_query, to_float, schema):
def _range_composer(self, edge, domain, es_query, to_float, schema):
# USE RANGES
_min = coalesce(domain.min, MIN(domain.partitions.min))
_max = coalesce(domain.max, MAX(domain.partitions.max))
output = Aggs()
if edge.allowNulls:
missing_filter = set_default(
{
"filter": NotOp("not", AndOp("and", [
edge.value.exists(),
InequalityOp("gte", [edge.value, Literal(None, to_float(_min))]),
InequalityOp("lt", [edge.value, Literal(None, to_float(_max))])
]).partial_eval()).to_esfilter(schema)
},
es_query
)
else:
missing_filter = None
output.add(FilterAggs(
"_missing",
NotOp(AndOp([
edge.value.exists(),
GteOp([edge.value, Literal(to_float(_min))]),
LtOp([edge.value, Literal(to_float(_max))])
]).partial_eval()),
self
).add(es_query))
if isinstance(edge.value, Variable):
calc = {"field": schema.leaves(edge.value.var)[0].es_column}
if is_op(edge.value, Variable):
calc = {"field": first(schema.leaves(edge.value.var)).es_column}
else:
calc = {"script": edge.value.to_es_script(schema).script(schema)}
calc = {"script": text_type(Painless[edge.value].to_es_script(schema))}
calc['ranges'] = [{"from": to_float(p.min), "to": to_float(p.max)} for p in domain.partitions]
return wrap({"aggs": {
"_match": set_default(
{"range": calc},
{"range": {"ranges": [{"from": to_float(p.min), "to": to_float(p.max)} for p in domain.partitions]}},
es_query
),
"_missing": missing_filter
}})
return output.add(RangeAggs("_match", calc, self).add(es_query))
class TimeDecoder(AggsDecoder):
def append_query(self, es_query, start):
self.start = start
def append_query(self, query_path, es_query):
schema = self.query.frum.schema
return _range_composer(self.edge, self.edge.domain, es_query, lambda x: x.unix, schema)
return _range_composer(self, self.edge, self.edge.domain, es_query, lambda x: x.unix, schema)
def get_value(self, index):
return self.edge.domain.getKeyByIndex(index)
def get_index(self, row):
def get_index(self, row, es_query=None, index=None):
domain = self.edge.domain
part = row[self.start]
part = row[0]
if part == None:
return len(domain.partitions)
@ -309,35 +316,31 @@ class GeneralRangeDecoder(AggsDecoder):
else:
Log.error("Unknown domain of type {{type}} for range edge", type=edge.domain.type)
def append_query(self, es_query, start):
self.start = start
def append_query(self, query_path, es_query):
edge = self.edge
range = edge.range
domain = edge.domain
aggs = {}
aggs = Aggs()
for i, p in enumerate(domain.partitions):
filter_ = AndOp("and", [
InequalityOp("lte", [range.min, Literal("literal", self.to_float(p.min))]),
InequalityOp("gt", [range.max, Literal("literal", self.to_float(p.min))])
filter_ = AndOp([
LteOp([range.min, Literal(self.to_float(p.min))]),
GtOp([range.max, Literal(self.to_float(p.min))])
])
aggs["_join_" + text_type(i)] = set_default(
{"filter": filter_.to_esfilter(self.schema)},
es_query
)
aggs.add(FilterAggs("_match" + text_type(i), filter_, self).add(es_query))
return wrap({"aggs": aggs})
return aggs
def get_value(self, index):
return self.edge.domain.getKeyByIndex(index)
def get_index(self, row):
def get_index(self, row, es_query=None, index=None):
domain = self.edge.domain
part = row[self.start]
part = row[0]
if part == None:
return len(domain.partitions)
return part["_index"]
index = int(es_query.name[6:])
return index
@property
def num_columns(self):
@ -349,42 +352,30 @@ class GeneralSetDecoder(AggsDecoder):
EXPECTING ALL PARTS IN partitions TO HAVE A where CLAUSE
"""
def append_query(self, es_query, start):
self.start = start
def append_query(self, query_path, es_query):
parts = self.edge.domain.partitions
filters = []
notty = []
for p in parts:
w = p.where
filters.append(AndOp("and", [w] + notty).to_esfilter(self.schema))
notty.append(NotOp("not", w))
filters.append(AndOp([w] + notty))
notty.append(NotOp(w))
missing_filter = None
output = Aggs().add(FiltersAggs("_match", filters, self).add(es_query))
if self.edge.allowNulls: # TODO: Use Expression.missing().esfilter() TO GET OPTIMIZED FILTER
missing_filter = set_default(
{"filter": AndOp("and", notty).to_esfilter(self.schema)},
es_query
)
output.add(FilterAggs("_missing", AndOp(notty), self).add(es_query))
return wrap({"aggs": {
"_match": set_default(
{"filters": {"filters": filters}},
es_query
),
"_missing": missing_filter
}})
return output
def get_value(self, index):
return self.edge.domain.getKeyByIndex(index)
def get_index(self, row):
def get_index(self, row, es_query=None, index=None):
domain = self.edge.domain
part = row[self.start]
# if part == None:
# return len(domain.partitions)
return part.get("_index", len(domain.partitions))
if index == None:
return len(domain.partitions)
else:
return index
@property
def num_columns(self):
@ -392,16 +383,15 @@ class GeneralSetDecoder(AggsDecoder):
class DurationDecoder(AggsDecoder):
def append_query(self, es_query, start):
self.start = start
return _range_composer(self.edge, self.edge.domain, es_query, lambda x: x.seconds, self.schema)
def append_query(self, query_path, es_query):
return _range_composer(self, self.edge, self.edge.domain, es_query, lambda x: x.seconds, self.schema)
def get_value(self, index):
return self.edge.domain.getKeyByIndex(index)
def get_index(self, row):
def get_index(self, row, es_query=None, index=None):
domain = self.edge.domain
part = row[self.start]
part = row[0]
if part == None:
return len(domain.partitions)
@ -423,16 +413,15 @@ class DurationDecoder(AggsDecoder):
class RangeDecoder(AggsDecoder):
def append_query(self, es_query, start):
self.start = start
return _range_composer(self.edge, self.edge.domain, es_query, lambda x: x, self.schema)
def append_query(self, query_path, es_query):
return _range_composer(self, self.edge, self.edge.domain, es_query, lambda x: x, self.schema)
def get_value(self, index):
return self.edge.domain.getKeyByIndex(index)
def get_index(self, row):
def get_index(self, row, es_query=None, index=None):
domain = self.edge.domain
part = row[self.start]
part = row[0]
if part == None:
return len(domain.partitions)
@ -460,31 +449,40 @@ class MultivalueDecoder(SetDecoder):
self.values = query.frum.schema[edge.value.var][0].partitions
self.parts = []
def append_query(self, es_query, start):
self.start = start
def append_query(self, query_path, es_query):
es_field = first(self.query.frum.schema.leaves(self.var)).es_column
es_field = self.query.frum.schema.leaves(self.var)[0].es_column
es_query = wrap({"aggs": {
"_match": set_default({"terms": {
"script": expand_template(LIST_TO_PIPE, {"expr": 'doc[' + quote(es_field) + '].values'})
}}, es_query)
}})
return es_query
return Aggs().add(TermsAggs("_match", {
"script": expand_template(LIST_TO_PIPE, {"expr": 'doc[' + quote(es_field) + '].values'})
}, self).add(es_query))
def get_value_from_row(self, row):
values = row[self.start]['key'].replace("||", "\b").split("|")
values = row[0]['key'].replace("||", "\b").split("|")
if len(values) == 2:
return None
return unwraplist([v.replace("\b", "|") for v in values[1:-1]])
t = tuple(v.replace("\b", "|") for v in sorted(values[1:-1]))
def get_index(self, row):
if len(t) == 0:
return None
elif len(t) == 1:
return t[0]
else:
return t
def get_index(self, row, es_query=None, index=None):
find = self.get_value_from_row(row)
try:
return self.parts.index(find)
except Exception:
self.parts.append(find)
return len(self.parts)-1
return self.domain.getIndexByKey(find)
def count(self, row):
value = self.get_value_from_row(row)
self.parts.append(value)
def done_count(self):
self.edge.allowNulls = False
self.edge.domain = self.domain = SimpleSetDomain(
partitions=jx.sort(set(self.parts))
)
self.parts = None
@property
def num_columns(self):
@ -494,7 +492,7 @@ class MultivalueDecoder(SetDecoder):
class ObjectDecoder(AggsDecoder):
def __init__(self, edge, query, limit):
AggsDecoder.__init__(self, edge, query, limit)
if isinstance(edge.value, LeavesOp):
if is_op(edge.value, LeavesOp):
prefix = edge.value.term.var
flatter = lambda k: literal_field(relative_field(k, prefix))
else:
@ -502,30 +500,29 @@ class ObjectDecoder(AggsDecoder):
flatter = lambda k: relative_field(k, prefix)
self.put, self.fields = transpose(*[
(flatter(untype_path(c.names["."])), c.es_column)
(flatter(untype_path(c.name)), c.es_column)
for c in query.frum.schema.leaves(prefix)
])
self.domain = self.edge.domain = wrap({"dimension": {"fields": self.fields}})
self.domain.limit = Math.min(coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT)
self.domain.limit = mo_math.min(coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT)
self.parts = list()
self.key2index = {}
self.computed_domain = False
def append_query(self, es_query, start):
self.start = start
def append_query(self, query_path, es_query):
decoder = self
for i, v in enumerate(self.fields):
nest = wrap({"aggs": {
"_match": set_default({"terms": {
nest = Aggs().add(
TermsAggs("_match", {
"field": v,
"size": self.domain.limit
}}, es_query),
"_missing": set_default(
{"filter": es_missing(v)},
es_query
)
}})
}, decoder).add(es_query)
).add(
FilterAggs("_missing", MissingOp(Variable(v)), decoder).add(es_query)
)
es_query = nest
decoder = None
return es_query
def count(self, row):
@ -542,7 +539,7 @@ class ObjectDecoder(AggsDecoder):
partitions=[{"value": p, "dataIndex": i} for i, p in enumerate(self.parts)]
)
def get_index(self, row):
def get_index(self, row, es_query=None, index=None):
value = self.get_value_from_row(row)
if self.computed_domain:
return self.domain.getIndexByKey(value)
@ -555,16 +552,18 @@ class ObjectDecoder(AggsDecoder):
self.parts.append(value)
return i
def get_value_from_row(self, row):
part = row[self.start:self.start + self.num_columns:]
if not part[0]['doc_count']:
def get_value_from_row(self, parts):
if not parts[0]['doc_count']:
return None
output = Data()
for k, v in transpose(self.put, part):
for k, v in transpose(self.put, parts):
output[k] = v.get('key')
return output
def get_value(self, index):
return self.parts[index]
@property
def num_columns(self):
return len(self.fields)
@ -576,81 +575,55 @@ class DefaultDecoder(SetDecoder):
def __init__(self, edge, query, limit):
AggsDecoder.__init__(self, edge, query, limit)
self.domain = edge.domain
self.domain.limit = Math.min(coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT)
self.domain.limit = mo_math.min(coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT)
self.parts = list()
self.key2index = {}
self.computed_domain = False
self.script = self.edge.value.partial_eval().to_es_script(self.schema)
self.script = Painless[self.edge.value].partial_eval().to_es_script(self.schema)
self.pull = pull_functions[self.script.data_type]
self.missing = self.script.miss.partial_eval()
self.exists = NotOp("not", self.missing).partial_eval()
self.exists = NotOp(self.missing).partial_eval()
# WHEN SORT VALUE AND EDGE VALUE MATCHES, WE SORT BY TERM
sort_candidates = [s for s in self.query.sort if s.value == self.edge.value]
sort_candidates = [s for s in query.sort if s.value == edge.value]
if sort_candidates:
self.es_order = {"_term": {1: "asc", -1: "desc"}[sort_candidates[0].sort]}
else:
self.es_order = None
def append_query(self, es_query, start):
self.start = start
if not isinstance(self.edge.value, Variable):
if self.exists is TRUE:
# IF True THEN WE DO NOT NEED THE _filter OR THE _missing (THIS RARELY HAPPENS THOUGH)
output = wrap({"aggs": {
"_match": set_default(
{"terms": {
"script": {"lang": "painless", "inline": self.script.expr},
"size": self.domain.limit,
"order": self.es_order
}},
es_query
)
}})
else:
output = wrap({"aggs": {
"_match": { # _match AND _filter REVERSED SO _match LINES UP WITH _missing
"filter": self.exists.to_esfilter(self.schema),
"aggs": {
"_filter": set_default(
{"terms": {
"script": {"lang": "painless", "inline": self.script.expr},
"size": self.domain.limit,
"order": self.es_order
}},
es_query
)
}
},
"_missing": set_default(
{"filter": self.missing.to_esfilter(self.schema)},
es_query
)
}})
return output
def append_query(self, query_path, es_query):
if is_op(self.edge.value, FirstOp) and is_op(self.edge.value.term, Variable):
self.edge.value = self.edge.value.term # ES USES THE FIRST TERM FOR {"terms": } AGGREGATION
if not is_op(self.edge.value, Variable):
terms = TermsAggs(
"_match",
{
"script": {"lang": "painless", "inline": self.script.expr},
"size": self.domain.limit,
"order": self.es_order
},
self
)
else:
output = wrap({"aggs": {
"_match": set_default(
{"terms": {
"field": self.schema.leaves(self.edge.value.var)[0].es_column,
"size": self.domain.limit,
"order": self.es_order
}},
es_query
),
"_missing": set_default(
{"filter": self.missing.to_esfilter(self.schema)},
es_query
)
}})
return output
terms = TermsAggs(
"_match", {
"field": first(self.schema.leaves(self.edge.value.var)).es_column,
"size": self.domain.limit,
"order": self.es_order
},
self
)
output = Aggs()
output.add(FilterAggs("_filter", self.exists, None).add(terms.add(es_query)))
output.add(FilterAggs("_missing", self.missing, self).add(es_query))
return output
def count(self, row):
part = row[self.start]
part = row[0]
if part['doc_count']:
if part.get('key') != None:
self.parts.append(self.pull(part.get('key')))
key = part.get('key')
if key != None:
self.parts.append(self.pull(key))
else:
self.edge.allowNulls = True # OK! WE WILL ALLOW NULLS
@ -661,16 +634,16 @@ class DefaultDecoder(SetDecoder):
self.parts = None
self.computed_domain = True
def get_index(self, row):
def get_index(self, row, es_query=None, index=None):
if self.computed_domain:
try:
part = row[self.start]
part = row[0]
return self.domain.getIndexByKey(self.pull(part.get('key')))
except Exception as e:
Log.error("problem", cause=e)
else:
try:
part = row[self.start]
part = row[0]
key = self.pull(part.get('key'))
i = self.key2index.get(key)
if i is None:
@ -693,37 +666,30 @@ class DimFieldListDecoder(SetDecoder):
edge.allowNulls = False
self.fields = edge.domain.dimension.fields
self.domain = self.edge.domain
self.domain.limit = Math.min(coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT)
self.domain.limit = mo_math.min(coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT)
self.parts = list()
def append_query(self, es_query, start):
# TODO: USE "reverse_nested" QUERY TO PULL THESE
self.start = start
def append_query(self, query_path, es_query):
decoder = self
for i, v in enumerate(self.fields):
exists = v.exists().partial_eval()
nest = wrap({"aggs": {"_match": {
"filter": exists.to_esfilter(self.schema),
"aggs": {"_filter": set_default({"terms": {
"field": self.schema.leaves(v.var)[0].es_column,
"size": self.domain.limit
}}, es_query)}
}}})
nest.aggs._missing = set_default(
{"filter": NotOp("not", exists).to_esfilter(self.schema)},
es_query
)
nest = Aggs()
nest.add(TermsAggs("_match", {
"field": first(self.schema.leaves(v.var)).es_column,
"size": self.domain.limit
}, decoder).add(es_query))
nest.add(FilterAggs("_missing", NotOp(exists), decoder).add(es_query))
es_query = nest
decoder = None
if self.domain.where:
filter_ = self.domain.where.partial_eval().to_esfilter(self.schema)
es_query = {"aggs": {"_filter": set_default({"filter": filter_}, es_query)}}
es_query = FilterAggs("_filter", self.domain.where, None).add(es_query)
return es_query
def count(self, row):
part = row[self.start:self.start + len(self.fields):]
if part[0]['doc_count']:
value = tuple(p.get("key") for p in part)
def count(self, parts):
if parts[0]['doc_count']:
value = tuple(p.get("key") for p, f in zip(parts, self.fields))
self.parts.append(value)
def done_count(self):
@ -737,20 +703,14 @@ class DimFieldListDecoder(SetDecoder):
partitions=[{"value": tuple(v[k] for k in columns), "dataIndex": i} for i, v in enumerate(sorted_parts)]
)
def get_index(self, row):
part = row[self.start:self.start + len(self.fields):]
if part[0]['doc_count']==0:
return None
find = tuple(p.get("key") for p in part)
output = self.domain.getIndexByKey(find)
return output
def get_index(self, row, es_query=None, index=None):
if row[0]['doc_count']:
find = tuple(p.get("key") for p, f in zip(row, self.fields))
output = self.domain.getIndexByKey(find)
return output
@property
def num_columns(self):
return len(self.fields)
pull_functions = {
STRING: lambda x: x,
NUMBER: lambda x: float(x) if x !=None else None,
BOOLEAN: string2boolean
}

89
vendor/jx_elasticsearch/es52/deep.py поставляемый
Просмотреть файл

@ -7,19 +7,20 @@
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from jx_base.expressions import NULL
from mo_future import is_text, is_binary
from jx_base.expressions import LeavesOp, NULL, Variable
from jx_base.query import DEFAULT_LIMIT
from jx_base.language import is_op
from jx_elasticsearch import post as es_post
from jx_elasticsearch.es52.expressions import split_expression_by_depth, AndOp, Variable, LeavesOp
from jx_elasticsearch.es52.setop import format_dispatch, get_pull_function, get_pull
from jx_elasticsearch.es52.util import jx_sort_to_es_sort, es_query_template
from jx_python.expressions import compile_expression, jx_expression_to_function
from mo_dots import split_field, FlatList, listwrap, literal_field, coalesce, Data, concat_field, set_default, relative_field, startswith_field
from mo_json.typed_encoder import NESTED
from jx_elasticsearch.es52.expressions import AndOp, ES52, split_expression_by_depth
from jx_elasticsearch.es52.setop import format_dispatch, get_pull, get_pull_function
from jx_elasticsearch.es52.util import es_query_template, jx_sort_to_es_sort
from jx_python.expressions import jx_expression_to_function
from mo_dots import Data, FlatList, coalesce, concat_field, is_list as is_list_, listwrap, literal_field, relative_field, set_default, split_field, startswith_field, unwrap, wrap
from mo_future import zip_longest
from mo_json import NESTED
from mo_json.typed_encoder import untype_path
from mo_logs import Log
from mo_threads import Thread
@ -60,14 +61,15 @@ def es_deepop(es, query):
# SPLIT WHERE CLAUSE BY DEPTH
wheres = split_expression_by_depth(query.where, schema)
for i, f in enumerate(es_filters):
script = AndOp("and", wheres[i]).partial_eval().to_esfilter(schema)
for f, w in zip_longest(es_filters, wheres):
script = ES52[AndOp(w)].partial_eval().to_esfilter(schema)
set_default(f, script)
if not wheres[1]:
# INCLUDE DOCS WITH NO NESTED DOCS
more_filter = {
"bool": {
"filter": [AndOp("and", wheres[0]).partial_eval().to_esfilter(schema)],
"filter": [AndOp(wheres[0]).partial_eval().to_esfilter(schema)],
"must_not": {
"nested": {
"path": query_path,
@ -85,48 +87,49 @@ def es_deepop(es, query):
# es_query.sort = jx_sort_to_es_sort(query.sort)
map_to_es_columns = schema.map_to_es()
# {c.names["."]: c.es_column for c in schema.leaves(".")}
# {c.name: c.es_column for c in schema.leaves(".")}
query_for_es = query.map(map_to_es_columns)
es_query.sort = jx_sort_to_es_sort(query_for_es.sort, schema)
es_query.stored_fields = []
is_list = isinstance(query.select, list)
is_list = is_list_(query.select)
selects = wrap([unwrap(s.copy()) for s in listwrap(query.select)])
new_select = FlatList()
i = 0
for s in listwrap(query.select):
if isinstance(s.value, LeavesOp) and isinstance(s.value.term, Variable):
put_index = 0
for select in selects:
if is_op(select.value, LeavesOp) and is_op(select.value.term, Variable):
# IF THERE IS A *, THEN INSERT THE EXTRA COLUMNS
leaves = schema.leaves(s.value.term.var)
leaves = schema.leaves(select.value.term.var)
col_names = set()
for c in leaves:
if c.nested_path[0] == ".":
if c.jx_type == NESTED:
continue
es_query.stored_fields += [c.es_column]
c_name = untype_path(c.names[query_path])
c_name = untype_path(relative_field(c.name, query_path))
col_names.add(c_name)
new_select.append({
"name": concat_field(s.name, c_name),
"name": concat_field(select.name, c_name),
"nested_path": c.nested_path[0],
"put": {"name": concat_field(s.name, literal_field(c_name)), "index": i, "child": "."},
"put": {"name": concat_field(select.name, literal_field(c_name)), "index": put_index, "child": "."},
"pull": get_pull_function(c)
})
i += 1
put_index += 1
# REMOVE DOTS IN PREFIX IF NAME NOT AMBIGUOUS
for n in new_select:
if n.name.startswith("..") and n.name.lstrip(".") not in col_names:
n.put.name = n.name = n.name.lstrip(".")
col_names.add(n.name)
elif isinstance(s.value, Variable):
net_columns = schema.leaves(s.value.var)
elif is_op(select.value, Variable):
net_columns = schema.leaves(select.value.var)
if not net_columns:
new_select.append({
"name": s.name,
"name": select.name,
"nested_path": ".",
"put": {"name": s.name, "index": i, "child": "."},
"put": {"name": select.name, "index": put_index, "child": "."},
"pull": NULL
})
else:
@ -139,26 +142,28 @@ def es_deepop(es, query):
# WE MUST FIGURE OUT WHICH NAMESSPACE s.value.var IS USING SO WE CAN EXTRACT THE child
for np in n.nested_path:
c_name = untype_path(n.names[np])
if startswith_field(c_name, s.value.var):
child = relative_field(c_name, s.value.var)
c_name = untype_path(relative_field(n.name, np))
if startswith_field(c_name, select.value.var):
child = relative_field(c_name, select.value.var)
break
else:
child = relative_field(untype_path(n.names[n.nested_path[0]]), s.value.var)
continue
# REMOVED BECAUSE SELECTING INNER PROPERTIES IS NOT ALLOWED
# child = relative_field(untype_path(relative_field(n.name, n.nested_path[0])), s.value.var)
new_select.append({
"name": s.name,
"name": select.name,
"pull": pull,
"nested_path": n.nested_path[0],
"put": {
"name": s.name,
"index": i,
"name": select.name,
"index": put_index,
"child": child
}
})
i += 1
put_index += 1
else:
expr = s.value
expr = select.value
for v in expr.vars():
for c in schema[v.var]:
if c.nested_path[0] == ".":
@ -166,18 +171,18 @@ def es_deepop(es, query):
# else:
# Log.error("deep field not expected")
pull_name = EXPRESSION_PREFIX + s.name
pull_name = EXPRESSION_PREFIX + select.name
map_to_local = MapToLocal(schema)
pull = jx_expression_to_function(pull_name)
post_expressions[pull_name] = compile_expression(expr.map(map_to_local).to_python())
post_expressions[pull_name] = jx_expression_to_function(expr.map(map_to_local))
new_select.append({
"name": s.name if is_list else ".",
"name": select.name if is_list else ".",
"pull": pull,
"value": expr.__data__(),
"put": {"name": s.name, "index": i, "child": "."}
"put": {"name": select.name, "index": put_index, "child": "."}
})
i += 1
put_index += 1
# <COMPLICATED> ES needs two calls to get all documents
more = []
@ -208,7 +213,7 @@ def es_deepop(es, query):
Thread.join(need_more)
for t in more[0].hits.hits:
yield t
#</COMPLICATED>
# </COMPLICATED>
try:
formatter, groupby_formatter, mime_type = format_dispatch[query.format]

339
vendor/jx_elasticsearch/es52/es_query.py поставляемый Normal file
Просмотреть файл

@ -0,0 +1,339 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http:# mozilla.org/MPL/2.0/.
#
from __future__ import absolute_import, division, unicode_literals
from jx_elasticsearch.es52.expressions import ES52
from mo_dots import is_data, is_list, startswith_field
from mo_future import text_type
from mo_json import value2json
from mo_logs import Log
_new = object.__new__
class Aggs(object):
def __init__(self, name=None):
self.name = name
self.children = []
self.decoders = []
self.selects = []
def to_es(self, schema, query_path="."):
if self.children:
return {"aggs": {
name: t.to_es(schema, query_path)
for i, t in enumerate(self.children)
for name in [t.name if t.name else "_" + text_type(i)]
}}
else:
return {}
def add(self, child):
self.children.append(child)
return self
def __eq__(self, other):
if self is other:
return True
return isinstance(other, Aggs) and self.name == other.name
def merge(self, other):
if self != other:
return False
self.children.extend(other.children)
self.decoders.extend(other.decoders)
return True
def __str__(self):
return value2json(self.to_es)
def copy(self):
output = _new(self.__class__)
output.name = self.name
output.children = self.children[:]
output.decoders = self.decoders[:]
output.selects = self.selects[:]
return output
class ExprAggs(Aggs):
def __init__(self, name, expr, select):
Aggs.__init__(self, name)
self.expr = expr
self.selects = [select]
def __eq__(self, other):
if self is other:
return True
return isinstance(other, ExprAggs) and self.name == other.name and self.expr == other.expr
def merge(self, other):
if self != other:
return False
self.expr += other.expr
self.children.extend(other.children)
self.decoders.extend(other.decoders)
self.selects.extend(other.selects)
return True
def to_es(self, schema, query_path="."):
self.expr['aggs'] = Aggs.to_es(self, schema, query_path).get('aggs')
return self.expr
def copy(self):
output = Aggs.copy(self)
output.expr = self.expr
return output
class FilterAggs(Aggs):
def __init__(self, name, filter, decoder):
Aggs.__init__(self, name)
self.filter = filter
if is_data(filter):
Log.error("programming error")
self.decoders = [decoder] if decoder else []
def __eq__(self, other):
if self is other:
return True
return isinstance(other, FilterAggs) and self.name == other.name and self.filter == other.filter
def merge(self, other):
if self != other:
return False
self.children.extend(other.children)
self.decoders.extend(other.decoders)
return True
def to_es(self, schema, query_path="."):
output = Aggs.to_es(self, schema, query_path)
output['filter'] = ES52[self.filter].partial_eval().to_esfilter(schema)
return output
def copy(self):
output = Aggs.copy(self)
output.filter = self.filter
return output
class ComplexAggs(FilterAggs):
"""
FOR COMPLICATED AGGREGATIONS
"""
def __init__(self, select):
Aggs.__init__(self, "_filter")
self.expr = {"filter": {"match_all": {}}}
self.selects = [select]
def to_es(self, schema, query_path="."):
self.expr['aggs'] = Aggs.to_es(self, schema, query_path).get('aggs')
return self.expr
def copy(self):
output = Aggs.copy(self)
output.expr = self.expr
return output
class FiltersAggs(Aggs):
def __init__(self, name, filters, decoder):
Aggs.__init__(self, name)
self.filters = filters
self.decoders = [decoder] if decoder else []
if not is_list(filters):
Log.error("expecting a list")
def __eq__(self, other):
if self is other:
return True
return isinstance(other, FiltersAggs) and self.name == other.name and self.filters == other.filters
def merge(self, other):
if self != other:
return False
self.children.extend(other.children)
self.decoders.extend(other.decoders)
return True
def to_es(self, schema, query_path="."):
output = Aggs.to_es(self, schema, query_path)
output['filters'] = {"filters": [f.partial_eval().to_esfilter(schema) for f in self.filters]}
return output
def copy(self):
output = Aggs.copy(self)
output.filters = self.filters
return output
class NestedAggs(Aggs):
def __init__(self, path):
Aggs.__init__(self, "_nested")
self.path = path
def __eq__(self, other):
if self is other:
return True
return isinstance(other, NestedAggs) and self.path == other.path
def to_es(self, schema, query_path="."):
output = Aggs.to_es(self, schema, self.path)
if query_path == self.path:
Log.error("this should have been cancelled out")
elif startswith_field(self.path, query_path):
output['nested'] = {"path": self.path}
else:
output["reverse_nested"] = {"path": None if self.path == "." else self.path}
return output
def __eq__(self, other):
if self is other:
return True
return isinstance(other, NestedAggs) and self.path == other.path
def copy(self):
output = Aggs.copy(self)
output.path = self.path
return output
class TermsAggs(Aggs):
def __init__(self, name, terms, decoder):
Aggs.__init__(self, name)
self.terms = terms
self.decoders = [decoder] if decoder else []
def __eq__(self, other):
if self is other:
return True
return isinstance(other, TermsAggs) and self.name == other.name and self.terms == other.terms
def to_es(self, schema, query_path="."):
output = Aggs.to_es(self, schema, query_path)
output['terms'] = self.terms
return output
def copy(self):
output = Aggs.copy(self)
output.terms = self.terms
return output
class RangeAggs(Aggs):
def __init__(self, name, expr, decoder):
Aggs.__init__(self, name)
self.expr = expr
self.decoders = [decoder] if decoder else []
def __eq__(self, other):
if self is other:
return True
return isinstance(other, RangeAggs) and self.name == other.name and self.expr == other.expr
def to_es(self, schema, query_path="."):
output = Aggs.to_es(self, schema, query_path)
output['range'] = self.expr
return output
def copy(self):
output = Aggs.copy(self)
output.expr = self.expr
return output
def simplify(aggs):
# CONVERT FROM TREE TO UNION OF SEQUENCES
def depth_first(aggr):
if aggr.__class__ == Aggs:
# BASE CLASS Aggs IS ONLY A PLACEHOLDER
if not aggr.children:
yield tuple()
return
for c in aggr.children:
for path in depth_first(c):
yield path
elif not aggr.children:
yield (aggr,)
else:
for c in aggr.children:
for path in depth_first(c):
yield (aggr,) + path
# CANCEL OUT REDUNDANT NESTED AGGS
combined = []
for path in depth_first(aggs):
current_nested = NestedAggs(".")
prev = None
remove = []
for step in path:
if isinstance(step, NestedAggs):
if prev is not None:
remove.append(prev)
prev = None
if current_nested is not None:
if current_nested.path == step.path:
remove.append(step)
continue
else:
pass
prev = step
else:
current_nested = prev if prev else current_nested
prev = None
combined.append(tuple(p for p in path if not any(p is r for r in remove)))
# COMMON FACTOR, CONVERT BACK TO TREE
def merge(aggregations):
output = []
while True:
common_children = []
first_found = None
common = None
for i, terms in enumerate(aggregations):
if not terms:
continue
term, rest = terms[0], terms[1:]
if first_found is None:
first_found = term
common_children.append(rest)
common = first_found.copy()
aggregations[i] = None
elif term == first_found:
common_children.append(rest)
common.selects.extend([t for t in term.selects if not any(t is s for s in common.selects)])
common.decoders.extend([t for t in term.decoders if not any(t is d for d in common.decoders)])
aggregations[i] = None
if first_found is None:
return output
else:
common.children = merge(common_children)
output.append(common)
merged = [trim_root(o) for o in merge(combined)]
output = Aggs()
output.children = merged
return output
def trim_root(agg):
if isinstance(agg, NestedAggs) and agg.path == '.':
if len(agg.children) == 1:
return agg.children[0]
else:
output = Aggs()
output.children = agg.children
return output
else:
return agg

1630
vendor/jx_elasticsearch/es52/expressions.py поставляемый

Разница между файлами не показана из-за своего большого размера Загрузить разницу

394
vendor/jx_elasticsearch/es52/format.py поставляемый
Просмотреть файл

@ -7,110 +7,154 @@
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from jx_base.expressions import TupleOp
from jx_elasticsearch.es52.aggs import count_dim, aggs_iterator, format_dispatch, drill
from jx_base.query import canonical_aggregates
from jx_base.language import is_op
from jx_elasticsearch.es52.aggs import aggs_iterator, count_dim, format_dispatch
from jx_python.containers.cube import Cube
from mo_collections.matrix import Matrix
from mo_dots import Data, set_default, wrap, split_field, coalesce
from mo_dots import Data, coalesce, is_list, set_default, split_field, wrap
from mo_future import sort_using_key
from mo_logs import Log
from mo_logs.strings import quote
from pyLibrary import convert
FunctionType = type(lambda: 1)
def format_cube(decoders, aggs, start, query, select):
# decoders = sorted(decoders, key=lambda d: -d.edge.dim) # REVERSE DECODER ORDER, BECAUSE ES QUERY WAS BUILT IN REVERSE ORDER
new_edges = count_dim(aggs, decoders)
def format_cube(aggs, es_query, query, decoders, all_selects):
new_edges = count_dim(aggs, es_query, decoders)
dims = []
for e in new_edges:
if isinstance(e.value, TupleOp):
if is_op(e.value, TupleOp):
e.allowNulls = False
extra = 0 if e.allowNulls is False else 1
dims.append(len(e.domain.partitions) + extra)
dims = tuple(dims)
matricies = [(s, Matrix(dims=dims, zeros=s.default)) for s in select]
for row, coord, agg in aggs_iterator(aggs, decoders):
for s, m in matricies:
try:
v = s.pull(agg)
m[coord] = v
except Exception as e:
# THIS HAPPENS WHEN ES RETURNS MORE TUPLE COMBINATIONS THAN DOCUMENTS
if agg.get('doc_count') != 0:
Log.error("Programmer error", cause=e)
if any(s.default != canonical_aggregates[s.aggregate].default for s in all_selects):
# UNUSUAL DEFAULT VALUES MESS THE union() FUNCTION
is_default = Matrix(dims=dims, zeros=True)
matricies = {s.name: Matrix(dims=dims) for s in all_selects}
for row, coord, agg, selects in aggs_iterator(aggs, es_query, decoders):
for select in selects:
m = matricies[select.name]
v = select.pull(agg)
if v == None:
continue
is_default[coord] = False
union(m, coord, v, select.aggregate)
# FILL THE DEFAULT VALUES
for c, v in is_default:
if v:
for s in all_selects:
matricies[s.name][c] = s.default
else:
matricies = {s.name: Matrix(dims=dims, zeros=s.default) for s in all_selects}
for row, coord, agg, selects in aggs_iterator(aggs, es_query, decoders):
for select in selects:
m = matricies[select.name]
v = select.pull(agg)
union(m, coord, v, select.aggregate)
cube = Cube(
query.select,
sort_using_key(new_edges, key=lambda e: e.dim), # ENSURE EDGES ARE IN SAME ORDER AS QUERY
{s.name: m for s, m in matricies}
matricies
)
cube.frum = query
return cube
def format_cube_from_aggop(decoders, aggs, start, query, select):
agg = drill(aggs)
matricies = [(s, Matrix(dims=[], zeros=s.default)) for s in select]
for s, m in matricies:
m[tuple()] = s.pull(agg)
cube = Cube(query.select, [], {s.name: m for s, m in matricies})
cube.frum = query
return cube
def _value_drill(agg):
while True:
deeper = agg.get("_nested")
if deeper:
agg = deeper
continue
deeper = agg.get("_filter")
if deeper:
agg = deeper
continue
return agg
def format_table(decoders, aggs, start, query, select):
new_edges = count_dim(aggs, decoders)
header = new_edges.name + select.name
def format_table(aggs, es_query, query, decoders, all_selects):
new_edges = wrap(count_dim(aggs, es_query, decoders))
dims = tuple(len(e.domain.partitions) + (0 if e.allowNulls is False else 1) for e in new_edges)
rank = len(dims)
header = tuple(new_edges.name + all_selects.name)
name2index = {s.name: i + rank for i, s in enumerate(all_selects)}
def data():
dims = tuple(len(e.domain.partitions) + (0 if e.allowNulls is False else 1) for e in new_edges)
is_sent = Matrix(dims=dims, zeros=0)
if query.sort and not query.groupby:
is_sent = Matrix(dims=dims)
give_me_zeros = query.sort and not query.groupby
if give_me_zeros:
# WE REQUIRE THE ZEROS FOR SORTING
all_coord = is_sent._all_combos() # TRACK THE EXPECTED COMBINATIONS
for row, coord, agg in aggs_iterator(aggs, decoders):
missing_coord = all_coord.next()
while coord != missing_coord:
record = [d.get_value(missing_coord[i]) for i, d in enumerate(decoders)]
for s in select:
if s.aggregate == "count":
record.append(0)
else:
record.append(None)
yield record
missing_coord = all_coord.next()
ordered_coord = all_coord.next()[::-1]
output = None
for row, coord, agg, ss in aggs_iterator(aggs, es_query, decoders):
if coord != ordered_coord:
# output HAS BEEN YIELDED, BUT SET THE DEFAULT VALUES
if output is not None:
for s in all_selects:
i = name2index[s.name]
if output[i] is None:
output[i] = s.default
# WE CAN GET THE SAME coord MANY TIMES, SO ONLY ADVANCE WHEN NOT
ordered_coord = all_coord.next()[::-1]
output = [d.get_value(c) for c, d in zip(coord, decoders)]
for s in select:
output.append(s.pull(agg))
while coord != ordered_coord:
# HAPPENS WHEN THE coord IS AHEAD OF ordered_coord
record = [d.get_value(ordered_coord[i]) for i, d in enumerate(decoders)] + [s.default for s in all_selects]
yield record
ordered_coord = all_coord.next()[::-1]
# coord == missing_coord
output = [d.get_value(c) for c, d in zip(coord, decoders)] + [None for s in all_selects]
for select in ss:
v = select.pull(agg)
if v != None:
union(output, name2index[select.name], v, select.aggregate)
yield output
else:
for row, coord, agg in aggs_iterator(aggs, decoders):
is_sent[coord] = 1
last_coord = None # HANG ONTO THE output FOR A BIT WHILE WE FILL THE ELEMENTS
output = None
for row, coord, agg, ss in aggs_iterator(aggs, es_query, decoders):
if coord != last_coord:
if output:
# SET DEFAULTS
for i, s in enumerate(all_selects):
v = output[rank+i]
if v == None:
output[rank+i] = s.default
yield output
output = is_sent[coord]
if output == None:
output = is_sent[coord] = [d.get_value(c) for c, d in zip(coord, decoders)] + [None for _ in all_selects]
last_coord = coord
# THIS IS A TRICK! WE WILL UPDATE A ROW THAT WAS ALREADY YIELDED
for select in ss:
v = select.pull(agg)
if v != None:
union(output, name2index[select.name], v, select.aggregate)
output = [d.get_value(c) for c, d in zip(coord, decoders)]
for s in select:
output.append(s.pull(agg))
if output:
# SET DEFAULTS ON LAST ROW
for i, s in enumerate(all_selects):
v = output[rank+i]
if v == None:
output[rank+i] = s.default
yield output
# EMIT THE MISSING CELLS IN THE CUBE
if not query.groupby:
for c, v in is_sent:
if not v:
record = [d.get_value(c[i]) for i, d in enumerate(decoders)]
for s in select:
if s.aggregate == "count":
record.append(0)
else:
record.append(None)
for coord, output in is_sent:
if output == None:
record = [d.get_value(c) for c, d in zip(coord, decoders)] + [s.default for s in all_selects]
yield record
return Data(
@ -119,42 +163,8 @@ def format_table(decoders, aggs, start, query, select):
data=list(data())
)
def format_table_from_groupby(decoders, aggs, start, query, select):
header = [d.edge.name.replace("\\.", ".") for d in decoders] + select.name
def data():
for row, coord, agg in aggs_iterator(aggs, decoders):
if agg.get('doc_count', 0) == 0:
continue
output = [d.get_value_from_row(row) for d in decoders]
for s in select:
output.append(s.pull(agg))
yield output
return Data(
meta={"format": "table"},
header=header,
data=list(data())
)
def format_table_from_aggop(decoders, aggs, start, query, select):
header = select.name
agg = drill(aggs)
row = []
for s in select:
row.append(s.pull(agg))
return Data(
meta={"format": "table"},
header=header,
data=[row]
)
def format_tab(decoders, aggs, start, query, select):
table = format_table(decoders, aggs, start, query, select)
def format_tab(aggs, es_query, query, decoders, select):
table = format_table(aggs, es_query, query, decoders, select)
def data():
yield "\t".join(map(quote, table.header))
@ -164,8 +174,8 @@ def format_tab(decoders, aggs, start, query, select):
return data()
def format_csv(decoders, aggs, start, query, select):
table = format_table(decoders, aggs, start, query, select)
def format_csv(aggs, es_query, query, decoders, select):
table = format_table(aggs, es_query, query, decoders, select)
def data():
yield ", ".join(map(quote, table.header))
@ -175,18 +185,42 @@ def format_csv(decoders, aggs, start, query, select):
return data()
def format_list_from_groupby(decoders, aggs, start, query, select):
def data():
for row, coord, agg in aggs_iterator(aggs, decoders):
if agg.get('doc_count', 0) == 0:
continue
output = Data()
for g, d in zip(query.groupby, decoders):
output[coalesce(g.put.name, g.name)] = d.get_value_from_row(row)
def format_list_from_groupby(aggs, es_query, query, decoders, all_selects):
new_edges = wrap(count_dim(aggs, es_query, decoders))
for s in select:
output[s.name] = s.pull(agg)
yield output
def data():
groupby = query.groupby
dims = tuple(len(e.domain.partitions) + (0 if e.allowNulls is False else 1) for e in new_edges)
is_sent = Matrix(dims=dims)
give_me_zeros = query.sort and not query.groupby
finishes = []
# IRREGULAR DEFAULTS MESS WITH union(), SET THEM AT END, IF ANY
for s in all_selects:
if s.default != canonical_aggregates[s.aggregate].default:
s.finish = s.default
s.default = None
finishes.append(s)
for row, coord, agg, _selects in aggs_iterator(aggs, es_query, decoders, give_me_zeros=give_me_zeros):
output = is_sent[coord]
if output == None:
output = is_sent[coord] = Data()
for g, d, c in zip(groupby, decoders, coord):
output[g.put.name] = d.get_value(c)
for s in all_selects:
output[s.name] = s.default
yield output
# THIS IS A TRICK! WE WILL UPDATE A ROW THAT WAS ALREADY YIELDED
for s in _selects:
union(output, s.name, s.pull(agg), s.aggregate)
if finishes:
# SET ANY DEFAULTS
for c, o in is_sent:
for s in finishes:
if o[s.name] == None:
o[s.name] = s.finish
for g in query.groupby:
g.put.name = coalesce(g.put.name, g.name)
@ -198,94 +232,36 @@ def format_list_from_groupby(decoders, aggs, start, query, select):
return output
def format_list(decoders, aggs, start, query, select):
new_edges = count_dim(aggs, decoders)
def format_list(aggs, es_query, query, decoders, select):
table = format_table(aggs, es_query, query, decoders, select)
header = table.header
def data():
dims = tuple(len(e.domain.partitions) + (0 if e.allowNulls is False else 1) for e in new_edges)
is_sent = Matrix(dims=dims, zeros=0)
if query.sort and not query.groupby:
# TODO: USE THE format_table() TO PRODUCE THE NEEDED VALUES INSTEAD OF DUPLICATING LOGIC HERE
all_coord = is_sent._all_combos() # TRACK THE EXPECTED COMBINATIONS
for _, coord, agg in aggs_iterator(aggs, decoders):
missing_coord = all_coord.next()
while coord != missing_coord:
# INSERT THE MISSING COORDINATE INTO THE GENERATION
output = Data()
for i, d in enumerate(decoders):
output[query.edges[i].name] = d.get_value(missing_coord[i])
for s in select:
if s.aggregate == "count":
output[s.name] = 0
yield output
missing_coord = all_coord.next()
output = Data()
for e, c, d in zip(query.edges, coord, decoders):
output[e.name] = d.get_value(c)
for s in select:
output[s.name] = s.pull(agg)
yield output
else:
for row, coord, agg in aggs_iterator(aggs, decoders):
is_sent[coord] = 1
output = Data()
for e, c, d in zip(query.edges, coord, decoders):
output[e.name] = d.get_value(c)
for s in select:
output[s.name] = s.pull(agg)
yield output
# EMIT THE MISSING CELLS IN THE CUBE
if not query.groupby:
for c, v in is_sent:
if not v:
output = Data()
for i, d in enumerate(decoders):
output[query.edges[i].name] = d.get_value(c[i])
for s in select:
if s.aggregate == "count":
output[s.name] = 0
yield output
if query.edges or query.groupby:
data = []
for row in table.data:
d = Data()
for h, r in zip(header, row):
d[h] = r
data.append(d)
format = "list"
elif is_list(query.select):
data = Data()
for h, r in zip(header, table.data[0]):
data[h] = r
format = "value"
else:
data = table.data[0][0]
format = "value"
output = Data(
meta={"format": "list"},
data=list(data())
meta={"format": format},
data=data
)
return output
def format_list_from_aggop(decoders, aggs, start, query, select):
agg = drill(aggs)
if isinstance(query.select, list):
item = Data()
for s in select:
item[s.name] = s.pull(agg)
else:
item = select[0].pull(agg)
if query.edges or query.groupby:
return wrap({
"meta": {"format": "list"},
"data": [item]
})
else:
return wrap({
"meta": {"format": "value"},
"data": item
})
def format_line(decoders, aggs, start, query, select):
list = format_list(decoders, aggs, start, query, select)
def format_line(aggs, es_query, query, decoders, select):
list = format_list(aggs, es_query, query, decoders, select)
def data():
for d in list.data:
@ -295,10 +271,10 @@ def format_line(decoders, aggs, start, query, select):
set_default(format_dispatch, {
None: (format_cube, format_table_from_groupby, format_cube_from_aggop, "application/json"),
"cube": (format_cube, format_cube, format_cube_from_aggop, "application/json"),
"table": (format_table, format_table_from_groupby, format_table_from_aggop, "application/json"),
"list": (format_list, format_list_from_groupby, format_list_from_aggop, "application/json"),
None: (format_cube, format_table, format_cube, "application/json"),
"cube": (format_cube, format_cube, format_cube, "application/json"),
"table": (format_table, format_table, format_table, "application/json"),
"list": (format_list, format_list_from_groupby, format_list, "application/json"),
# "csv": (format_csv, format_csv_from_groupby, "text/csv"),
# "tab": (format_tab, format_tab_from_groupby, "text/tab-separated-values"),
# "line": (format_line, format_line_from_groupby, "application/json")
@ -314,3 +290,27 @@ def _get(v, k, d):
except Exception:
v = [vv.get(p) for vv in v]
return v
def union(matrix, coord, value, agg):
# matrix[coord] = existing + value WITH ADDITIONAL CHECKS
existing = matrix[coord]
if existing == None:
matrix[coord] = value
elif value == None:
pass
elif agg not in ['sum', 'count']:
if agg == "cardinality" and (existing == 0 or value == 0):
matrix[coord] = existing + value
return
elif agg == "stats" and (not existing or not value):
matrix[coord] = existing + value
return
elif agg == "union":
matrix[coord] = list(set(existing) | set(value))
return
Log.warning("{{agg}} not ready", agg=agg)
else:
matrix[coord] = existing + value

1305
vendor/jx_elasticsearch/es52/painless.py поставляемый Normal file

Разница между файлами не показана из-за своего большого размера Загрузить разницу

297
vendor/jx_elasticsearch/es52/setop.py поставляемый
Просмотреть файл

@ -7,26 +7,23 @@
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from collections import Mapping
from __future__ import absolute_import, division, unicode_literals
from jx_base.domains import ALGEBRAIC
from jx_base.expressions import IDENTITY
from jx_base.expressions import IDENTITY, LeavesOp, Variable
from jx_base.query import DEFAULT_LIMIT
from jx_base.language import is_op
from jx_elasticsearch import post as es_post
from jx_elasticsearch.es52.expressions import Variable, LeavesOp
from jx_elasticsearch.es52.util import jx_sort_to_es_sort, es_query_template, es_and, es_or, es_script
from jx_elasticsearch.es52.expressions import AndOp, ES52, split_expression_by_path
from jx_elasticsearch.es52.painless import Painless
from jx_elasticsearch.es52.util import MATCH_ALL, es_and, es_or, jx_sort_to_es_sort
from jx_python.containers.cube import Cube
from jx_python.expressions import jx_expression_to_function
from mo_collections.matrix import Matrix
from mo_dots import coalesce, split_field, set_default, Data, unwraplist, literal_field, unwrap, wrap, concat_field, relative_field, join_field, listwrap
from mo_dots.lists import FlatList
from mo_future import transpose
from mo_json.typed_encoder import NESTED
from mo_json.typed_encoder import untype_path, unnest_path, untyped
from mo_dots import Data, FlatList, coalesce, concat_field, is_data, is_list, join_field, listwrap, literal_field, relative_field, set_default, split_field, unwrap, unwraplist, wrap
from mo_future import first, text_type, transpose
from mo_json import NESTED
from mo_json.typed_encoder import decode_property, unnest_path, untype_path, untyped
from mo_logs import Log
from mo_math import AND, MAX
from mo_times.timer import Timer
@ -54,31 +51,30 @@ def is_setop(es, query):
def es_setop(es, query):
schema = query.frum.schema
query_path = schema.query_path[0]
es_query, filters = es_query_template(schema.query_path[0])
nested_filter = None
set_default(filters[0], query.where.partial_eval().to_esfilter(schema))
es_query.size = coalesce(query.limit, DEFAULT_LIMIT)
es_query.stored_fields = FlatList()
split_select = {".": ESSelect('.')}
selects = wrap([s.copy() for s in listwrap(query.select)])
def get_select(path):
es_select = split_select.get(path)
if not es_select:
es_select = split_select[path] = ESSelect(path)
return es_select
selects = wrap([unwrap(s.copy()) for s in listwrap(query.select)])
new_select = FlatList()
schema = query.frum.schema
# columns = schema.columns
# nested_columns = set(c.names["."] for c in columns if c.nested_path[0] != ".")
es_query.sort = jx_sort_to_es_sort(query.sort, schema)
put_index = 0
for select in selects:
# IF THERE IS A *, THEN INSERT THE EXTRA COLUMNS
if isinstance(select.value, LeavesOp) and isinstance(select.value.term, Variable):
if is_op(select.value, LeavesOp) and is_op(select.value.term, Variable):
term = select.value.term
leaves = schema.leaves(term.var)
for c in leaves:
full_name = concat_field(select.name, relative_field(untype_path(c.names["."]), term.var))
full_name = concat_field(select.name, relative_field(untype_path(c.name), term.var))
if c.jx_type == NESTED:
es_query.stored_fields = ["_source"]
get_select('.').use_source = True
new_select.append({
"name": full_name,
"value": Variable(c.es_column),
@ -86,98 +82,88 @@ def es_setop(es, query):
"pull": get_pull_source(c.es_column)
})
put_index += 1
elif c.nested_path[0] != ".":
pass # THE NESTED PARENT WILL CAPTURE THIS
else:
es_query.stored_fields += [c.es_column]
get_select(c.nested_path[0]).fields.append(c.es_column)
new_select.append({
"name": full_name,
"value": Variable(c.es_column),
"put": {"name": literal_field(full_name), "index": put_index, "child": "."}
})
put_index += 1
elif isinstance(select.value, Variable):
elif is_op(select.value, Variable):
s_column = select.value.var
# LEAVES OF OBJECT
leaves = schema.leaves(s_column)
nested_selects = {}
if s_column == ".":
# PULL ALL SOURCE
get_select('.').use_source = True
new_select.append({
"name": select.name,
"value": select.value,
"put": {"name": select.name, "index": put_index, "child": "."},
"pull": get_pull_source(".")
})
continue
leaves = schema.leaves(s_column) # LEAVES OF OBJECT
# nested_selects = {}
if leaves:
if s_column == '.':
# PULL ALL SOURCE
es_query.stored_fields = ["_source"]
new_select.append({
"name": select.name,
"value": select.value,
"put": {"name": select.name, "index": put_index, "child": "."},
"pull": get_pull_source(".")
})
elif any(c.jx_type == NESTED for c in leaves):
if any(c.jx_type == NESTED for c in leaves):
# PULL WHOLE NESTED ARRAYS
es_query.stored_fields = ["_source"]
get_select('.').use_source = True
for c in leaves:
if len(c.nested_path) == 1: # NESTED PROPERTIES ARE IGNORED, CAPTURED BY THESE FIRT LEVEL PROPERTIES
jx_name = untype_path(c.names["."])
if len(c.nested_path) == 1: # NESTED PROPERTIES ARE IGNORED, CAPTURED BY THESE FIRST LEVEL PROPERTIES
pre_child = join_field(decode_property(n) for n in split_field(c.name))
new_select.append({
"name": select.name,
"value": Variable(c.es_column),
"put": {"name": select.name, "index": put_index, "child": relative_field(jx_name, s_column)},
"put": {"name": select.name, "index": put_index, "child": untype_path(relative_field(pre_child, s_column))},
"pull": get_pull_source(c.es_column)
})
else:
# PULL ONLY WHAT'S NEEDED
for c in leaves:
if len(c.nested_path) == 1:
jx_name = untype_path(c.names["."])
if c.jx_type == NESTED:
es_query.stored_fields = ["_source"]
c_nested_path = c.nested_path[0]
if c_nested_path == ".":
if c.es_column == "_id":
new_select.append({
"name": select.name,
"value": Variable(c.es_column),
"put": {"name": select.name, "index": put_index, "child": relative_field(jx_name, s_column)},
"put": {"name": select.name, "index": put_index, "child": "."},
"pull": lambda row: row._id
})
elif c.jx_type == NESTED:
get_select('.').use_source = True
pre_child = join_field(decode_property(n) for n in split_field(c.name))
new_select.append({
"name": select.name,
"value": Variable(c.es_column),
"put": {"name": select.name, "index": put_index, "child": untype_path(relative_field(pre_child, s_column))},
"pull": get_pull_source(c.es_column)
})
else:
es_query.stored_fields += [c.es_column]
get_select(c_nested_path).fields.append(c.es_column)
pre_child = join_field(decode_property(n) for n in split_field(c.name))
new_select.append({
"name": select.name,
"value": Variable(c.es_column),
"put": {"name": select.name, "index": put_index, "child": relative_field(jx_name, s_column)}
"put": {"name": select.name, "index": put_index, "child": untype_path(relative_field(pre_child, s_column))}
})
else:
if not nested_filter:
where = filters[0].copy()
nested_filter = [where]
for k in filters[0].keys():
filters[0][k] = None
set_default(
filters[0],
es_and([where, es_or(nested_filter)])
)
es_select = get_select(c_nested_path)
es_select.fields.append(c.es_column)
nested_path = c.nested_path[0]
if nested_path not in nested_selects:
where = nested_selects[nested_path] = Data()
nested_filter += [where]
where.nested.path = nested_path
where.nested.query.match_all = {}
where.nested.inner_hits._source = False
where.nested.inner_hits.stored_fields += [c.es_column]
child = relative_field(untype_path(c.names[schema.query_path[0]]), s_column)
pull = accumulate_nested_doc(nested_path, Variable(relative_field(s_column, unnest_path(nested_path))))
new_select.append({
child = relative_field(untype_path(relative_field(c.name, schema.query_path[0])), s_column)
pull = accumulate_nested_doc(c_nested_path, Variable(relative_field(s_column, unnest_path(c_nested_path))))
new_select.append({
"name": select.name,
"value": select.value,
"put": {
"name": select.name,
"value": select.value,
"put": {
"name": select.name,
"index": put_index,
"child": child
},
"pull": pull
})
else:
nested_selects[nested_path].nested.inner_hits.stored_fields += [c.es_column]
"index": put_index,
"child": child
},
"pull": pull
})
else:
new_select.append({
"name": select.name,
@ -186,21 +172,22 @@ def es_setop(es, query):
})
put_index += 1
else:
painless = select.value.partial_eval().to_es_script(schema)
es_query.script_fields[literal_field(select.name)] = es_script(painless.script(schema))
new_select.append({
"name": select.name,
"pull": jx_expression_to_function("fields." + literal_field(select.name)),
"put": {"name": select.name, "index": put_index, "child": "."}
})
put_index += 1
split_scripts = split_expression_by_path(select.value, schema, lang=Painless)
for p, script in split_scripts.items():
es_select = get_select(p)
es_select.scripts[select.name] = {"script": text_type(Painless[first(script)].partial_eval().to_es_script(schema))}
new_select.append({
"name": select.name,
"pull": jx_expression_to_function("fields." + literal_field(select.name)),
"put": {"name": select.name, "index": put_index, "child": "."}
})
put_index += 1
for n in new_select:
if n.pull:
continue
elif isinstance(n.value, Variable):
if es_query.stored_fields[0] == "_source":
es_query.stored_fields = ["_source"]
elif is_op(n.value, Variable):
if get_select('.').use_source:
n.pull = get_pull_source(n.value.var)
elif n.value == "_id":
n.pull = jx_expression_to_function("_id")
@ -209,15 +196,22 @@ def es_setop(es, query):
else:
Log.error("Do not know what to do")
with Timer("call to ES") as call_timer:
split_wheres = split_expression_by_path(query.where, schema, lang=ES52)
es_query = es_query_proto(query_path, split_select, split_wheres, schema)
es_query.size = coalesce(query.limit, DEFAULT_LIMIT)
es_query.sort = jx_sort_to_es_sort(query.sort, schema)
with Timer("call to ES", silent=True) as call_timer:
data = es_post(es, es_query, query.limit)
T = data.hits.hits
# Log.note("{{output}}", output=T)
try:
formatter, groupby_formatter, mime_type = format_dispatch[query.format]
with Timer("formatter"):
with Timer("formatter", silent=True):
output = formatter(T, new_select, query)
output.meta.timing.es = call_timer.duration
output.meta.content_type = mime_type
@ -252,14 +246,18 @@ def accumulate_nested_doc(nested_path, expr=IDENTITY):
def format_list(T, select, query=None):
data = []
if isinstance(query.select, list):
if is_list(query.select):
for row in T:
r = Data()
for s in select:
v = s.pull(row)
r[s.put.name][s.put.child] = unwraplist(v)
v = unwraplist(s.pull(row))
if v is not None:
try:
r[s.put.name][s.put.child] = v
except Exception as e:
Log.error("what's happening here?")
data.append(r if r else None)
elif isinstance(query.select.value, LeavesOp):
elif is_op(query.select.value, LeavesOp):
for row in T:
r = Data()
for s in select:
@ -310,7 +308,7 @@ def format_table(T, select, query=None):
header = [None] * num_columns
if isinstance(query.select, Mapping) and not isinstance(query.select.value, LeavesOp):
if is_data(query.select) and not is_op(query.select.value, LeavesOp):
for s in select:
header[s.put.index] = s.name
else:
@ -360,9 +358,8 @@ def get_pull(column):
if column.nested_path[0] == ".":
return concat_field("fields", literal_field(column.es_column))
else:
depth = len(split_field(column.nested_path[0]))
rel_name = split_field(column.es_column)[depth:]
return join_field(["_inner"] + rel_name)
rel_name = relative_field(column.es_column, column.nested_path[0])
return concat_field("_inner", rel_name)
def get_pull_function(column):
@ -377,14 +374,80 @@ def get_pull_source(es_column):
def get_pull_stats(stats_name, median_name):
return jx_expression_to_function({"select": [
{"name": "count", "value": stats_name + ".count"},
{"name": "sum", "value": stats_name + ".sum"},
{"name": "min", "value": stats_name + ".min"},
{"name": "max", "value": stats_name + ".max"},
{"name": "avg", "value": stats_name + ".avg"},
{"name": "sos", "value": stats_name + ".sum_of_squares"},
{"name": "std", "value": stats_name + ".std_deviation"},
{"name": "var", "value": stats_name + ".variance"},
{"name": "median", "value": median_name + ".values.50\\.0"}
{"name": "count", "value": join_field([stats_name, "count"])},
{"name": "sum", "value": join_field([stats_name, "sum"])},
{"name": "min", "value": join_field([stats_name, "min"])},
{"name": "max", "value": join_field([stats_name, "max"])},
{"name": "avg", "value": join_field([stats_name, "avg"])},
{"name": "sos", "value": join_field([stats_name, "sum_of_squares"])},
{"name": "std", "value": join_field([stats_name, "std_deviation"])},
{"name": "var", "value": join_field([stats_name, "variance"])},
{"name": "median", "value": join_field([median_name, "values", "50.0"])}
]})
class ESSelect(object):
"""
ACCUMULATE THE FIELDS WE ARE INTERESTED IN
"""
def __init__(self, path):
self.path = path
self.use_source = False
self.fields = []
self.scripts = {}
def to_es(self):
return {
"_source": self.use_source,
"stored_fields": self.fields if not self.use_source else None,
"script_fields": self.scripts if self.scripts else None
}
def es_query_proto(path, selects, wheres, schema):
"""
RETURN TEMPLATE AND PATH-TO-FILTER AS A 2-TUPLE
:param path: THE NESTED PATH (NOT INCLUDING TABLE NAME)
:param wheres: MAP FROM path TO LIST OF WHERE CONDITIONS
:return: (es_query, filters_map) TUPLE
"""
output = None
last_where = MATCH_ALL
for p in reversed(sorted( wheres.keys() | set(selects.keys()))):
where = wheres.get(p)
select = selects.get(p)
if where:
where = AndOp(where).partial_eval().to_esfilter(schema)
if output:
where = es_or([es_and([output, where]), where])
else:
if output:
if last_where is MATCH_ALL:
where = es_or([output, MATCH_ALL])
else:
where = output
else:
where = MATCH_ALL
if p == ".":
output = set_default(
{
"from": 0,
"size": 0,
"sort": [],
"query": where
},
select.to_es()
)
else:
output = {"nested": {
"path": p,
"inner_hits": set_default({"size": 100000}, select.to_es()) if select else None,
"query": where
}}
last_where = where
return output

40
vendor/jx_elasticsearch/es52/util.py поставляемый
Просмотреть файл

@ -7,25 +7,25 @@
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from jx_elasticsearch.es52.expressions import Variable
from jx_base.expressions import Variable
from jx_base.language import is_op
from mo_dots import wrap
from mo_future import text_type
from mo_json.typed_encoder import STRING, BOOLEAN, NUMBER, OBJECT
from mo_future import is_text
from mo_json import BOOLEAN, IS_NULL, NUMBER, OBJECT, STRING
from mo_logs import Log
from pyLibrary.convert import value2boolean
def es_query_template(path):
"""
RETURN TEMPLATE AND PATH-TO-FILTER AS A 2-TUPLE
:param path: THE NESTED PATH (NOT INCLUDING TABLE NAME)
:return:
:return: (es_query, es_filters) TUPLE
"""
if not isinstance(path, text_type):
if not is_text(path):
Log.error("expecting path to be a string")
if path != ".":
@ -62,7 +62,7 @@ def jx_sort_to_es_sort(sort, schema):
output = []
for s in sort:
if isinstance(s.value, Variable):
if is_op(s.value, Variable):
cols = schema.leaves(s.value.var)
if s.sort == -1:
types = OBJECT, STRING, NUMBER, BOOLEAN
@ -71,7 +71,7 @@ def jx_sort_to_es_sort(sort, schema):
for type in types:
for c in cols:
if c.jx_type == type:
if c.jx_type is type:
if s.sort == -1:
output.append({c.es_column: "desc"})
else:
@ -91,6 +91,7 @@ aggregates = {
"sum": "sum",
"add": "sum",
"count": "value_count",
"count_values": "count_values",
"maximum": "max",
"minimum": "min",
"max": "max",
@ -114,7 +115,6 @@ aggregates = {
NON_STATISTICAL_AGGS = {"none", "one"}
def es_and(terms):
return wrap({"bool": {"filter": terms}})
@ -128,8 +128,24 @@ def es_not(term):
def es_script(term):
return wrap({"script": {"lang": "painless", "inline": term}})
return wrap({"script": {"lang": "painless", "source": term}})
def es_missing(term):
return {"bool": {"must_not": {"exists": {"field": term}}}}
def es_exists(term):
return {"exists": {"field": term}}
MATCH_ALL = wrap({"match_all": {}})
MATCH_NONE = es_not({"match_all": {}})
pull_functions = {
IS_NULL: lambda x: None,
STRING: lambda x: x,
NUMBER: lambda x: float(x) if x !=None else None,
BOOLEAN: value2boolean
}

584
vendor/jx_elasticsearch/meta.py поставляемый
Просмотреть файл

@ -7,12 +7,12 @@
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from mo_future import is_text, is_binary
from datetime import date, datetime
from decimal import Decimal
import itertools
from itertools import product
import jx_base
from jx_base import TableDesc
@ -20,19 +20,20 @@ from jx_base.namespace import Namespace
from jx_base.query import QueryOp
from jx_python import jx
from jx_python.containers.list_usingPythonList import ListContainer
from jx_python.meta import ColumnList, Column
from mo_collections.relation import Relation_usingList
from mo_dots import Data, relative_field, SELF_PATH, ROOT_PATH, coalesce, set_default, Null, split_field, join_field, wrap, concat_field, startswith_field, literal_field
from mo_json.typed_encoder import EXISTS_TYPE, untype_path, unnest_path, OBJECT, EXISTS, STRUCT, BOOLEAN
from jx_python.meta import Column, ColumnList
from mo_dots import Data, FlatList, Null, NullType, ROOT_PATH, coalesce, concat_field, is_list, literal_field, relative_field, set_default, split_field, startswith_field, tail_field, wrap
from mo_files import URL
from mo_future import PY2, none_type, text_type
from mo_json import BOOLEAN, EXISTS, INTEGER, OBJECT, STRING, STRUCT
from mo_json.typed_encoder import BOOLEAN_TYPE, EXISTS_TYPE, NUMBER_TYPE, STRING_TYPE, unnest_path, untype_path
from mo_kwargs import override
from mo_logs import Log
from mo_logs.exceptions import Except
from mo_logs.strings import quote
from mo_math import MAX
from mo_threads import Queue, THREAD_STOP, Thread, Till
from mo_times import HOUR, MINUTE, Timer, Date
from mo_times import Date, HOUR, MINUTE, Timer, WEEK
from pyLibrary.env import elasticsearch
from pyLibrary.env.elasticsearch import es_type_to_json_type, _get_best_type_from_mapping
from pyLibrary.env.elasticsearch import _get_best_type_from_mapping, es_type_to_json_type
MAX_COLUMN_METADATA_AGE = 12 * HOUR
ENABLE_META_SCAN = True
@ -72,17 +73,17 @@ class ElasticsearchMetadata(Namespace):
self.index_does_not_exist = set()
self.todo = Queue("refresh metadata", max=100000, unique=True)
self.index_to_alias = Relation_usingList()
self.index_to_alias = {}
self.es_metadata = Null
self.metadata_last_updated = Date.now() - OLD_METADATA
self.meta = Data()
self.meta.columns = ColumnList()
self.meta.columns = ColumnList(URL(self.es_cluster.settings.host).host)
self.alias_to_query_paths = {
"meta.columns": [['.']],
"meta.tables": [['.']]
"meta.columns": [ROOT_PATH],
"meta.tables": [ROOT_PATH]
}
self.alias_last_updated = {
"meta.columns": Date.now(),
@ -91,10 +92,7 @@ class ElasticsearchMetadata(Namespace):
table_columns = metadata_tables()
self.meta.tables = ListContainer(
"meta.tables",
[
# TableDesc("meta.columns", None, ".", Date.now()),
# TableDesc("meta.tables", None, ".", Date.now())
],
[],
jx_base.Schema(".", table_columns)
)
self.meta.columns.extend(table_columns)
@ -102,9 +100,12 @@ class ElasticsearchMetadata(Namespace):
if ENABLE_META_SCAN:
self.worker = Thread.run("refresh metadata", self.monitor)
else:
self.worker = Thread.run("refresh metadata", self.not_monitor)
self.worker = Thread.run("not refresh metadata", self.not_monitor)
return
@property
def namespace(self):
return self.meta.columns.namespace
@ -123,14 +124,13 @@ class ElasticsearchMetadata(Namespace):
alias = table_desc.name
canonical_index = self.es_cluster.get_best_matching_index(alias).index
update_required = not (table_desc.timestamp < es_last_updated)
metadata = self.es_cluster.get_metadata(force=update_required)
es_metadata_update_required = not (table_desc.timestamp < es_last_updated)
metadata = self.es_cluster.get_metadata(force=es_metadata_update_required)
indexes = self.index_to_alias.get_domain(alias)
props = [
(self.es_cluster.get_index(index=i, type=t, debug=DEBUG), t, m.properties)
for i, d in metadata.indices.items()
if i in indexes
if alias in d.aliases
for t, m in [_get_best_type_from_mapping(d.mappings)]
]
@ -148,16 +148,19 @@ class ElasticsearchMetadata(Namespace):
data_type, mapping = _get_best_type_from_mapping(meta.mappings)
mapping.properties["_id"] = {"type": "string", "index": "not_analyzed"}
self._parse_properties(alias, mapping, meta)
columns = self._parse_properties(alias, mapping)
table_desc.timestamp = es_last_updated
return columns
def _parse_properties(self, alias, mapping, meta):
abs_columns = elasticsearch.parse_properties(alias, None, mapping.properties)
if any(c.cardinality == 0 and c.names['.'] != '_id' for c in abs_columns):
def _parse_properties(self, alias, mapping):
abs_columns = elasticsearch.parse_properties(alias, ".", ROOT_PATH, mapping.properties)
if DEBUG and any(c.cardinality == 0 and c.name != '_id' for c in abs_columns):
Log.warning(
"Some columns are not stored {{names}}",
"Some columns are not stored in {{url}} {{index|quote}} table:\n{{names}}",
url=self.es_cluster.url,
index=alias,
names=[
".".join((c.es_index, c.names['.']))
".".join((c.es_index, c.name))
for c in abs_columns
if c.cardinality == 0
]
@ -178,20 +181,41 @@ class ElasticsearchMetadata(Namespace):
b.insert(i, aa)
break
for q in query_paths:
q.append(SELF_PATH)
q.append(".")
query_paths.append(ROOT_PATH)
self.alias_to_query_paths[alias] = query_paths
for i in self.index_to_alias.get_domain(alias):
self.alias_to_query_paths[i] = query_paths
# ADD RELATIVE NAMES
# ENSURE ALL TABLES HAVE THE QUERY PATHS SET
self.alias_to_query_paths[alias] = query_paths
for i, a in self.index_to_alias.items():
if a == alias:
self.alias_to_query_paths[i] = query_paths
# ENSURE COLUMN HAS CORRECT jx_type
# PICK DEEPEST NESTED PROPERTY AS REPRESENTATIVE
output = []
best = {}
for abs_column in abs_columns:
abs_column.last_updated = None
abs_column.jx_type = jx_type(abs_column)
for query_path in query_paths:
abs_column.names[query_path[0]] = relative_field(abs_column.names["."], query_path[0])
self.todo.add(self.meta.columns.add(abs_column))
pass
if abs_column.jx_type not in STRUCT:
clean_name = unnest_path(abs_column.name)
other = best.get(clean_name)
if other:
if len(other.nested_path) < len(abs_column.nested_path):
output.remove(other)
self.meta.columns.update({"clear": ".", "where": {"eq": {"es_column": other.es_column, "es_index": other.es_index}}})
else:
continue
best[clean_name] = abs_column
output.append(abs_column)
# REGISTER ALL COLUMNS
canonicals = []
for abs_column in output:
canonical = self.meta.columns.add(abs_column)
canonicals.append(canonical)
self.todo.extend(canonicals)
return canonicals
def query(self, _query):
return self.meta.columns.query(QueryOp(set_default(
@ -210,12 +234,19 @@ class ElasticsearchMetadata(Namespace):
if name in self.alias_last_updated:
return name
else:
return self.index_to_alias[name]
return self.index_to_alias.get(name)
def get_columns(self, table_name, column_name=None, force=False):
def get_columns(self, table_name, column_name=None, after=None, timeout=None):
"""
RETURN METADATA COLUMNS
:param table_name: TABLE WE WANT COLUMNS FOR
:param column_name: OPTIONAL NAME, IF INTERESTED IN ONLY ONE COLUMN
:param after: FORCE LOAD, WAITING FOR last_updated TO BE AFTER THIS TIME
:param timeout: Signal; True when should give up
:return:
"""
DEBUG and after and Log.note("getting columns for after {{time}}", time=after)
table_path = split_field(table_name)
root_table_name = table_path[0]
@ -227,39 +258,49 @@ class ElasticsearchMetadata(Namespace):
Log.error("{{table|quote}} does not exist", table=table_name)
try:
last_update = MAX([
self.es_cluster.index_last_updated[i]
for i in self.index_to_alias.get_domain(alias)
])
table = self.get_table(alias)[0]
# LAST TIME WE GOT INFO FOR THIS TABLE
if not table:
table = TableDesc(
name=alias,
url=None,
query_path=['.'],
query_path=["."],
timestamp=Date.MIN
)
with self.meta.tables.locker:
self.meta.tables.add(table)
self._reload_columns(table)
elif force or table.timestamp < last_update:
self._reload_columns(table)
columns = self._reload_columns(table)
DEBUG and Log.note("columns from reload")
elif after or table.timestamp < self.es_cluster.metatdata_last_updated:
columns = self._reload_columns(table)
DEBUG and Log.note("columns from reload")
else:
columns = self.meta.columns.find(alias, column_name)
DEBUG and Log.note("columns from find()")
columns = self.meta.columns.find(alias, column_name)
columns = jx.sort(columns, "names.\\.")
# AT LEAST WAIT FOR THE COLUMNS TO UPDATE
while len(self.todo) and not all(columns.get("last_updated")):
DEBUG and Log.note("columns are {{ids}}", ids=[id(c) for c in columns])
columns = jx.sort(columns, "name")
if after is None:
return columns # DO NOT WAIT FOR COMPLETE COLUMNS
# WAIT FOR THE COLUMNS TO UPDATE
while True:
pending = [c for c in columns if after >= c.last_updated or (c.cardinality == None and c.jx_type not in STRUCT)]
if not pending:
break
if timeout:
Log.error("trying to gets columns timed out")
if DEBUG:
if len(columns) > 10:
Log.note("waiting for {{num}} columns to update", num=len([c for c in columns if not c.last_updated]))
if len(pending) > 10:
Log.note("waiting for {{num}} columns to update by {{timestamp}}", num=len(pending), timestamp=after)
else:
Log.note("waiting for columns to update {{columns|json}}", columns=[c.es_index+"."+c.es_column for c in columns if not c.last_updated])
Log.note("waiting for columns to update by {{timestamp}}; {{columns|json}}", timestamp=after, columns=[c.es_index + "." + c.es_column + " id="+text_type(id(c)) for c in pending])
Till(seconds=1).wait()
return columns
except Exception as e:
Log.error("Not expected", cause=e)
Log.error("Failure to get columns for {{table}}", table=table_name, cause=e)
return []
@ -267,6 +308,7 @@ class ElasticsearchMetadata(Namespace):
"""
QUERY ES TO FIND CARDINALITY AND PARTITIONS FOR A SIMPLE COLUMN
"""
now = Date.now()
if column.es_index in self.index_does_not_exist:
return
@ -281,7 +323,7 @@ class ElasticsearchMetadata(Namespace):
"count": len(self.meta.columns),
"cardinality": len(partitions),
"multi": 1,
"last_updated": Date.now()
"last_updated": now
},
"where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}}
})
@ -294,7 +336,7 @@ class ElasticsearchMetadata(Namespace):
"count": len(self.meta.tables),
"cardinality": len(partitions),
"multi": 1,
"last_updated": Date.now()
"last_updated": now
},
"where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}}
})
@ -330,21 +372,42 @@ class ElasticsearchMetadata(Namespace):
})
count = result.hits.total
cardinality = 2
multi = 1
DEBUG and Log.note("{{table}}.{{field}} has {{num}} parts", table=column.es_index, field=column.es_column, num=cardinality)
self.meta.columns.update({
"set": {
"count": count,
"cardinality": cardinality,
"partitions": [False, True],
"multi": 1,
"last_updated": now
},
"clear": ["partitions"],
"where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}}
})
return
else:
result = self.es_cluster.post("/" + es_index + "/_search", data={
es_query = {
"aggs": {
"count": _counting_query(column),
"multi": {"max": {"script": "doc[" + quote(column.es_column) + "].values.size()"}}
"_filter": {
"aggs": {"multi": {"max": {"script": "doc[" + quote(column.es_column) + "].values.size()"}}},
"filter": {"bool": {"should": [
{"range": {"etl.timestamp.~n~": {"gte": (Date.today() - WEEK)}}},
{"bool": {"must_not": {"exists": {"field": "etl.timestamp.~n~"}}}}
]}}
}
},
"size": 0
})
}
result = self.es_cluster.post("/" + es_index + "/_search", data=es_query)
agg_results = result.aggregations
count = result.hits.total
cardinality = coalesce(agg_results.count.value, agg_results.count._nested.value, agg_results.count.doc_count)
multi = int(coalesce(agg_results.multi.value, 1))
multi = int(coalesce(agg_results._filter.multi.value, 1))
if cardinality == None:
Log.error("logic error")
Log.error("logic error")
query = Data(size=0)
@ -354,7 +417,7 @@ class ElasticsearchMetadata(Namespace):
"count": cardinality,
"cardinality": cardinality,
"multi": 1,
"last_updated": Date.now()
"last_updated": now
},
"clear": ["partitions"],
"where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}}
@ -367,7 +430,7 @@ class ElasticsearchMetadata(Namespace):
"count": count,
"cardinality": cardinality,
"multi": multi,
"last_updated": Date.now()
"last_updated": now
},
"clear": ["partitions"],
"where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}}
@ -380,7 +443,7 @@ class ElasticsearchMetadata(Namespace):
"count": count,
"cardinality": cardinality,
"multi": multi,
"last_updated": Date.now()
"last_updated": now
},
"clear": ["partitions"],
"where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}}
@ -391,7 +454,7 @@ class ElasticsearchMetadata(Namespace):
"nested": {"path": column.nested_path[0]},
"aggs": {"_nested": {"terms": {"field": column.es_column}}}
}
elif cardinality == 0:
elif cardinality == 0: # WHEN DOES THIS HAPPEN?
query.aggs["_"] = {"terms": {"field": column.es_column}}
else:
query.aggs["_"] = {"terms": {"field": column.es_column, "size": cardinality}}
@ -404,13 +467,14 @@ class ElasticsearchMetadata(Namespace):
else:
parts = jx.sort(aggs.buckets.key)
DEBUG and Log.note("update metadata for {{column.es_index}}.{{column.es_column}} (id={{id}}) at {{time}}", id=id(column), column=column, time=now)
self.meta.columns.update({
"set": {
"count": count,
"cardinality": cardinality,
"multi": multi,
"partitions": parts,
"last_updated": Date.now()
"last_updated": now
},
"where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}}
})
@ -421,17 +485,24 @@ class ElasticsearchMetadata(Namespace):
TEST_TABLE = "testdata"
is_missing_index = any(w in e for w in ["IndexMissingException", "index_not_found_exception"])
is_test_table = column.es_index.startswith((TEST_TABLE_PREFIX, TEST_TABLE))
if is_missing_index and is_test_table:
if is_missing_index:
# WE EXPECT TEST TABLES TO DISAPPEAR
Log.warning("Missing index {{col.es_index}}", col=column, cause=e)
self.meta.columns.update({
"clear": ".",
"where": {"eq": {"es_index": column.es_index}}
})
self.index_does_not_exist.add(column.es_index)
elif "No field found for" in e:
self.meta.columns.update({
"clear": ".",
"where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}}
})
Log.warning("Could not get column {{col.es_index}}.{{col.es_column}} info", col=column, cause=e)
else:
self.meta.columns.update({
"set": {
"last_updated": Date.now()
"last_updated": now
},
"clear": [
"count",
@ -451,7 +522,7 @@ class ElasticsearchMetadata(Namespace):
old_columns = [
c
for c in self.meta.columns
if (c.last_updated == None or c.last_updated < Date.now()-TOO_OLD) and c.jx_type not in STRUCT
if ((c.last_updated < Date.now() - MAX_COLUMN_METADATA_AGE) or c.cardinality == None) and c.jx_type not in STRUCT
]
if old_columns:
DEBUG and Log.note(
@ -460,10 +531,6 @@ class ElasticsearchMetadata(Namespace):
dates=[Date(t).format() for t in wrap(old_columns).last_updated]
)
self.todo.extend(old_columns)
# TEST CONSISTENCY
for c, d in product(list(self.todo.queue), list(self.todo.queue)):
if c.es_column == d.es_column and c.es_index == d.es_index and c != d:
Log.error("")
else:
DEBUG and Log.note("no more metatdata to update")
@ -474,15 +541,19 @@ class ElasticsearchMetadata(Namespace):
with Timer("update {{table}}.{{column}}", param={"table": column.es_index, "column": column.es_column}, silent=not DEBUG):
if column.es_index in self.index_does_not_exist:
DEBUG and Log.note("{{column.es_column}} does not exist", column=column)
self.meta.columns.update({
"clear": ".",
"where": {"eq": {"es_index": column.es_index}}
})
continue
if column.jx_type in STRUCT or column.es_column.endswith("." + EXISTS_TYPE):
if column.jx_type in STRUCT or split_field(column.es_column)[-1] == EXISTS_TYPE:
DEBUG and Log.note("{{column.es_column}} is a struct", column=column)
column.last_updated = Date.now()
continue
elif column.last_updated >= Date.now()-TOO_OLD:
elif column.last_updated > Date.now() - TOO_OLD and column.cardinality is not None:
# DO NOT UPDATE FRESH COLUMN METADATA
DEBUG and Log.note("{{column.es_column}} is still fresh ({{ago}} ago)", column=column, ago=(Date.now()-Date(column.last_updated)).seconds)
continue
try:
self._update_cardinality(column)
@ -502,32 +573,33 @@ class ElasticsearchMetadata(Namespace):
Log.alert("metadata scan has been disabled")
please_stop.on_go(lambda: self.todo.add(THREAD_STOP))
while not please_stop:
c = self.todo.pop()
if c == THREAD_STOP:
column = self.todo.pop()
if column == THREAD_STOP:
break
if c.last_updated >= Date.now()-TOO_OLD:
if column.jx_type in STRUCT or split_field(column.es_column)[-1] == EXISTS_TYPE:
DEBUG and Log.note("{{column.es_column}} is a struct", column=column)
column.last_updated = Date.now()
continue
elif column.last_updated > Date.now() - TOO_OLD and column.cardinality is not None:
# DO NOT UPDATE FRESH COLUMN METADATA
DEBUG and Log.note("{{column.es_column}} is still fresh ({{ago}} ago)", column=column, ago=(Date.now()-Date(column.last_updated)).seconds)
continue
with Timer("Update {{col.es_index}}.{{col.es_column}}", param={"col": c}, silent=not DEBUG, too_long=0.05):
self.meta.columns.update({
"set": {
"last_updated": Date.now()
},
"clear": [
"count",
"cardinality",
"multi",
"partitions",
],
"where": {"eq": {"es_index": c.es_index, "es_column": c.es_column}}
})
with Timer("Update {{col.es_index}}.{{col.es_column}}", param={"col": column}, silent=not DEBUG, too_long=0.05):
if untype_path(column.name) in ["build.type", "run.type"]:
try:
self._update_cardinality(column)
except Exception as e:
Log.warning("problem getting cardinality for {{column.name}}", column=column, cause=e)
else:
column.last_updated = Date.now()
def get_table(self, name):
if name == "meta.columns":
return self.meta.columns
# return self.meta.columns
with self.meta.tables.locker:
return wrap([t for t in self.meta.tables.data if t.name == name])
@ -537,8 +609,9 @@ class ElasticsearchMetadata(Namespace):
def get_schema(self, name):
if name == "meta.columns":
return self.meta.columns.schema
query_path = split_field(name)
root, rest = query_path[0], join_field(query_path[1:])
if name == "meta.tables":
return self.meta.tables
root, rest = tail_field(name)
return self.get_snowflake(root).get_schema(rest)
@ -564,6 +637,13 @@ class Snowflake(object):
return output
Log.error("Can not find index {{index|quote}}", index=self.name)
@property
def sorted_query_paths(self):
"""
RETURN A LIST OF ALL SCHEMA'S IN DEPTH-FIRST TOPOLOGICAL ORDER
"""
return list(reversed(sorted(p[0] for p in self.namespace.alias_to_query_paths.get(self.name))))
@property
def columns(self):
"""
@ -578,15 +658,38 @@ class Schema(jx_base.Schema):
"""
def __init__(self, query_path, snowflake):
if not isinstance(snowflake.query_paths[0], list):
if not is_list(snowflake.query_paths[0]):
Log.error("Snowflake query paths should be a list of string tuples (well, technically, a list of lists of strings)")
self.snowflake = snowflake
try:
self.query_path = [
path = [
p
for p in snowflake.query_paths
if untype_path(p[0]) == query_path
][0]
self.snowflake = snowflake
]
if path:
# WE DO NOT NEED TO LOOK INTO MULTI-VALUED FIELDS AS A TABLE
self.multi = None
self.query_path = path[0]
else:
# LOOK INTO A SPECIFIC MULTI VALUED COLUMN
try:
self.multi = [
c
for c in self.snowflake.columns
if untype_path(c.name) == query_path and c.multi > 1
][0]
self.query_path = [self.multi.name] + self.multi.nested_path
except Exception as e:
# PROBLEM WITH METADATA UPDATE
self.multi = None
self.query_path = [query_path] + ["."]
Log.warning("Problem getting query path {{path|quote}} in snowflake {{sf|quote}}", path=query_path, sf=snowflake.name, cause=e)
if not is_list(self.query_path) or self.query_path[len(self.query_path) - 1] != ".":
Log.error("error")
except Exception as e:
Log.error("logic error", cause=e)
@ -595,43 +698,102 @@ class Schema(jx_base.Schema):
:param column_name:
:return: ALL COLUMNS THAT START WITH column_name, NOT INCLUDING DEEPER NESTED COLUMNS
"""
column_name = unnest_path(column_name)
clean_name = unnest_path(column_name)
if clean_name != column_name:
clean_name = column_name
cleaner = lambda x: x
else:
cleaner = unnest_path
columns = self.columns
deep_path = self.query_path[0]
for path in self.query_path:
# TODO: '.' IMPLIES ALL FIELDS FROM ABSOLUTE PERPECTIVE, ALL OTHERS ARE A RELATIVE PERSPECTIVE
# TODO: HOW TO REFER TO FIELDS THAT MAY BE SHADOWED BY A RELATIVE NAME?
for path in reversed(self.query_path) if clean_name == '.' else self.query_path:
output = [
c
for c in columns
if (
(c.names['.'] != "_id" or column_name == "_id") and
c.jx_type not in OBJECTS and
startswith_field(unnest_path(c.names[path]), column_name)
(c.name != "_id" or clean_name == "_id") and
(
(c.jx_type == EXISTS and column_name.endswith("." + EXISTS_TYPE)) or
c.jx_type not in OBJECTS or
(clean_name == '.' and c.cardinality == 0)
) and
startswith_field(cleaner(relative_field(c.name, path)), clean_name)
)
]
if output:
return set(output)
return set()
def new_leaves(self, column_name):
"""
:param column_name:
:return: ALL COLUMNS THAT START WITH column_name, INCLUDING DEEP COLUMNS
"""
column_name = unnest_path(column_name)
columns = self.columns
all_paths = self.snowflake.sorted_query_paths
output = {}
for c in columns:
if c.name == "_id" and column_name != "_id":
continue
if c.jx_type in OBJECTS:
continue
if c.cardinality == 0:
continue
for path in all_paths:
if not startswith_field(unnest_path(relative_field(c.name, path)), column_name):
continue
existing = output.get(path)
if not existing:
output[path] = [c]
continue
if len(path) > len(c.nested_path[0]):
continue
if any("." + t + "." in c.es_column for t in (STRING_TYPE, NUMBER_TYPE, BOOLEAN_TYPE)):
# ELASTICSEARCH field TYPES ARE NOT ALLOWED
continue
# ONLY THE DEEPEST COLUMN WILL BE CHOSEN
output[path].append(c)
return set(output.values())
def both_leaves(self, column_name):
old = self.old_leaves(column_name)
new = self.new_leaves(column_name)
if old != new:
Log.error(
"not the same: {{old}}, {{new}}",
old=[c.name for c in old],
new=[c.name for c in new]
)
return new
def values(self, column_name, exclude_type=STRUCT):
"""
RETURN ALL COLUMNS THAT column_name REFERS TO
"""
column_name = unnest_path(column_name)
columns = self.columns
output = []
for path in self.query_path:
full_path = untype_path(concat_field(path, column_name))
for c in columns:
if c.jx_type in exclude_type:
continue
# if c.cardinality == 0:
# continue
if untype_path(c.name) == full_path:
output.append(c)
if output:
return output
return []
def values(self, column_name):
"""
RETURN ALL COLUMNS THAT column_name REFERES TO
"""
column_name = unnest_path(column_name)
columns = self.columns
deep_path = self.query_path[0]
for path in self.query_path:
output = [
c
for c in columns
if (
c.jx_type not in STRUCT and
untype_path(c.names[path]) == column_name
)
]
if output:
return output
return output
def __getitem__(self, column_name):
return self.values(column_name)
@ -641,7 +803,7 @@ class Schema(jx_base.Schema):
@property
def columns(self):
return self.snowflake.namespace.get_columns(literal_field(self.snowflake.name))
return self.snowflake.columns
def map_to_es(self):
"""
@ -653,9 +815,9 @@ class Schema(jx_base.Schema):
output,
{
k: c.es_column
for c in self.snowflake.columns
for c in self.columns
if c.jx_type not in STRUCT
for rel_name in [c.names[path]]
for rel_name in [relative_field(c.name, path)]
for k in [rel_name, untype_path(rel_name), unnest_path(rel_name)]
}
)
@ -695,10 +857,12 @@ def metadata_tables():
return wrap(
[
Column(
names={".": c},
name=c,
es_index="meta.tables",
es_column=c,
es_type="string",
jx_type=STRING,
last_updated=Date.now(),
nested_path=ROOT_PATH
)
for c in [
@ -708,10 +872,12 @@ def metadata_tables():
]
]+[
Column(
names={".": c},
name=c,
es_index="meta.tables",
es_column=c,
es_type="integer",
jx_type=INTEGER,
last_updated=Date.now(),
nested_path=ROOT_PATH
)
for c in [
@ -730,4 +896,152 @@ def jx_type(column):
return es_type_to_json_type[column.es_type]
python_type_to_es_type = {
none_type: "undefined",
NullType: "undefined",
bool: "boolean",
str: "string",
text_type: "string",
int: "integer",
float: "double",
Data: "object",
dict: "object",
set: "nested",
list: "nested",
FlatList: "nested",
Date: "double",
Decimal: "double",
datetime: "double",
date: "double"
}
if PY2:
python_type_to_es_type[long] = "integer"
_merge_es_type = {
"undefined": {
"undefined": "undefined",
"boolean": "boolean",
"integer": "integer",
"long": "long",
"float": "float",
"double": "double",
"number": "number",
"string": "string",
"object": "object",
"nested": "nested"
},
"boolean": {
"undefined": "boolean",
"boolean": "boolean",
"integer": "integer",
"long": "long",
"float": "float",
"double": "double",
"number": "number",
"string": "string",
"object": None,
"nested": None
},
"integer": {
"undefined": "integer",
"boolean": "integer",
"integer": "integer",
"long": "long",
"float": "float",
"double": "double",
"number": "number",
"string": "string",
"object": None,
"nested": None
},
"long": {
"undefined": "long",
"boolean": "long",
"integer": "long",
"long": "long",
"float": "double",
"double": "double",
"number": "number",
"string": "string",
"object": None,
"nested": None
},
"float": {
"undefined": "float",
"boolean": "float",
"integer": "float",
"long": "double",
"float": "float",
"double": "double",
"number": "number",
"string": "string",
"object": None,
"nested": None
},
"double": {
"undefined": "double",
"boolean": "double",
"integer": "double",
"long": "double",
"float": "double",
"double": "double",
"number": "number",
"string": "string",
"object": None,
"nested": None
},
"number": {
"undefined": "number",
"boolean": "number",
"integer": "number",
"long": "number",
"float": "number",
"double": "number",
"number": "number",
"string": "string",
"object": None,
"nested": None
},
"string": {
"undefined": "string",
"boolean": "string",
"integer": "string",
"long": "string",
"float": "string",
"double": "string",
"number": "string",
"string": "string",
"object": None,
"nested": None
},
"object": {
"undefined": "object",
"boolean": None,
"integer": None,
"long": None,
"float": None,
"double": None,
"number": None,
"string": None,
"object": "object",
"nested": "nested"
},
"nested": {
"undefined": "nested",
"boolean": None,
"integer": None,
"long": None,
"float": None,
"double": None,
"number": None,
"string": None,
"object": "nested",
"nested": "nested"
}
}
OBJECTS = (OBJECT, EXISTS)

45
vendor/jx_python/containers/cube.py поставляемый
Просмотреть файл

@ -7,21 +7,18 @@
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from collections import Mapping
import mo_dots as dot
from mo_dots import Null, Data, FlatList, wrap, wrap_leaves, listwrap
from mo_logs import Log
from mo_math import MAX, OR
from mo_collections.matrix import Matrix
from mo_future import is_text, is_binary
from jx_base.container import Container
from jx_base.query import _normalize_edge
from jx_python.cubes.aggs import cube_aggs
from jx_python.lists.aggs import is_aggs
from jx_base.query import _normalize_edge
from mo_collections.matrix import Matrix
from mo_dots import Data, FlatList, Null, is_data, is_list, listwrap, wrap, wrap_leaves
import mo_dots as dot
from mo_logs import Log
from mo_math import MAX, OR
class Cube(Container):
@ -36,7 +33,7 @@ class Cube(Container):
ALLOWED, USING THE select AND edges TO DESCRIBE THE data
"""
self.is_value = False if isinstance(select, list) else True
self.is_value = False if is_list(select) else True
self.select = select
self.meta = Data(format="cube") # PUT EXTRA MARKUP HERE
self.is_none = False
@ -45,37 +42,37 @@ class Cube(Container):
is_none = True
# ENSURE frum IS PROPER FORM
if isinstance(select, list):
if is_list(select):
if edges and OR(not isinstance(v, Matrix) for v in data.values()):
Log.error("Expecting data to be a dict with Matrix values")
if not edges:
if not data:
if isinstance(select, list):
if is_list(select):
Log.error("not expecting a list of records")
data = {select.name: Matrix.ZERO}
self.edges = FlatList.EMPTY
elif isinstance(data, Mapping):
elif is_data(data):
# EXPECTING NO MORE THAN ONE rownum EDGE IN THE DATA
length = MAX([len(v) for v in data.values()])
if length >= 1:
self.edges = wrap([{"name": "rownum", "domain": {"type": "rownum"}}])
else:
self.edges = FlatList.EMPTY
elif isinstance(data, list):
if isinstance(select, list):
elif is_list(data):
if is_list(select):
Log.error("not expecting a list of records")
data = {select.name: Matrix.wrap(data)}
self.edges = wrap([{"name": "rownum", "domain": {"type": "rownum", "min": 0, "max": len(data), "interval": 1}}])
elif isinstance(data, Matrix):
if isinstance(select, list):
if is_list(select):
Log.error("not expecting a list of records")
data = {select.name: data}
else:
if isinstance(select, list):
if is_list(select):
Log.error("not expecting a list of records")
data = {select.name: Matrix(value=data)}
@ -148,7 +145,7 @@ class Cube(Container):
return Null
if self.edges:
Log.error("can not get value of with dimension")
if isinstance(self.select, list):
if is_list(self.select):
Log.error("can not get value of multi-valued cubes")
return self.data[self.select.name].cube
@ -205,7 +202,7 @@ class Cube(Container):
# EDGE REMOVES THAT EDGE FROM THIS RESULT, OR ADDS THE PART
# AS A select {"name":edge.name, "value":edge.domain.partitions[coord]}
# PROBABLY NOT, THE value IS IDENTICAL OVER THE REMAINING
if isinstance(item, Mapping):
if is_data(item):
coordinates = [None] * len(self.edges)
# MAP DICT TO NUMERIC INDICES
@ -232,7 +229,7 @@ class Cube(Container):
data={k: Matrix(values=c.__getitem__(coordinates)) for k, c in self.data.items()}
)
return output
elif isinstance(item, text_type):
elif is_text(item):
# RETURN A VALUE CUBE
if self.is_value:
if item != self.select.name:
@ -320,7 +317,7 @@ class Cube(Container):
getKey = [e.domain.getKey for e in self.edges]
lookup = [[getKey[i](p) for p in e.domain.partitions+([None] if e.allowNulls else [])] for i, e in enumerate(self.edges)]
if isinstance(self.select, list):
if is_list(self.select):
selects = listwrap(self.select)
index, v = transpose(*self.data[selects[0].name].groupby(selector))
@ -375,7 +372,7 @@ class Cube(Container):
output = wrap_leaves({keys[i]: lookup[i][c] for i, c in enumerate(coord)})
return output
if isinstance(self.select, list):
if is_list(self.select):
selects = listwrap(self.select)
index, v = transpose(*self.data[selects[0].name].groupby(selector))

22
vendor/jx_python/containers/doc_store.py поставляемый
Просмотреть файл

@ -6,22 +6,20 @@
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from mo_future import is_text, is_binary
from copy import copy
from datetime import datetime
from mo_future import text_type
from mo_dots import wrap, Data, FlatList, literal_field
from mo_json.typed_encoder import TYPE_PREFIX
from mo_logs import Log
from pyLibrary import convert
from jx_base.query import QueryOp
from jx_python import jx
from jx_python.containers import Container
from jx_python.expressions import Variable, Literal
from jx_base.query import QueryOp
from jx_python.expressions import Literal, Variable
from mo_dots import Data, FlatList, is_list, literal_field, wrap
from mo_future import text_type
from mo_json.typed_encoder import TYPE_PREFIX
from mo_logs import Log
INDEX = "__index__"
PARENT = "__parent__"
@ -108,7 +106,7 @@ class DocStore(Container):
if query.sort:
short_list = self._sort(query.sort)
if isinstance(query.select, list):
if is_list(query.select):
accessors = map(jx.get, query.select.value)
if query.window:
@ -218,7 +216,7 @@ class DocStore(Container):
return filters[where.name](self, where)
def _eq(self, op):
if isinstance(op.lhs, Variable) and isinstance(op.rhs, Literal):
if is_op(op.lhs, Variable) and is_literal(op.rhs):
return copy(self._index[op.lhs][op.rhs])
def _and(self, op):

Просмотреть файл

@ -7,29 +7,25 @@
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from copy import copy
import itertools
from collections import Mapping
import jx_base
from jx_base import Container
from jx_base.expressions import jx_expression, Expression, Variable, TRUE
from jx_python.expression_compiler import compile_expression
from jx_base.expressions import TRUE, Variable
from jx_base.language import is_expression, is_op
from jx_python.expressions import jx_expression_to_function
from jx_python.lists.aggs import is_aggs, list_aggs
from jx_python.meta import get_schema_from_list
from mo_collections import UniqueIndex
from mo_dots import Data, wrap, listwrap, unwraplist, unwrap, Null
from mo_future import sort_using_key
from mo_dots import Data, Null, is_data, is_list, listwrap, unwrap, unwraplist, wrap
from mo_future import first, sort_using_key
from mo_logs import Log
from mo_threads import Lock
from pyLibrary import convert
_get = object.__getattribute__
class ListContainer(Container, jx_base.Namespace, jx_base.Table):
"""
@ -96,14 +92,14 @@ class ListContainer(Container, jx_base.Namespace, jx_base.Table):
if q.format == "list":
return Data(data=output.data, meta={"format": "list"})
elif q.format == "table":
head = [c.names['.'] for c in output.schema.columns]
head = [c.name for c in output.schema.columns]
data = [
[r if h == '.' else r[h] for h in head]
[r if h == "." else r[h] for h in head]
for r in output.data
]
return Data(header=head, data=data, meta={"format": "table"})
elif q.format == "cube":
head = [c.names['.'] for c in output.schema.columns]
head = [c.name for c in output.schema.columns]
rows = [
[r[h] for h in head]
for r in output.data
@ -144,10 +140,10 @@ class ListContainer(Container, jx_base.Namespace, jx_base.Table):
return self.where(where)
def where(self, where):
if isinstance(where, Mapping):
temp = compile_expression(jx_expression(where).to_python())
elif isinstance(where, Expression):
temp = compile_expression(where.to_python())
if is_data(where):
temp = jx_expression_to_function(where)
elif is_expression(where):
temp = jx_expression_to_function(where)
else:
temp = where
@ -161,7 +157,7 @@ class ListContainer(Container, jx_base.Namespace, jx_base.Table):
:param select: the variable to extract from list
:return: a simple list of the extraction
"""
if isinstance(select, list):
if is_list(select):
return [(d[s] for s in select) for d in self.data]
else:
return [d[select] for d in self.data]
@ -169,20 +165,20 @@ class ListContainer(Container, jx_base.Namespace, jx_base.Table):
def select(self, select):
selects = listwrap(select)
if len(selects) == 1 and isinstance(selects[0].value, Variable) and selects[0].value.var == ".":
if len(selects) == 1 and is_op(selects[0].value, Variable) and selects[0].value.var == ".":
new_schema = self.schema
if selects[0].name == ".":
return self
else:
new_schema = None
if isinstance(select, list):
if is_list(select):
if all(
isinstance(s.value, Variable) and s.name == s.value.var
is_op(s.value, Variable) and s.name == s.value.var
for s in select
):
names = set(s.value.var for s in select)
new_schema = Schema(".", [c for c in self.schema.columns if c.names['.'] in names])
new_schema = Schema(".", [c for c in self.schema.columns if c.name in names])
push_and_pull = [(s.name, jx_expression_to_function(s.value)) for s in selects]
def selector(d):
@ -195,6 +191,10 @@ class ListContainer(Container, jx_base.Namespace, jx_base.Table):
else:
select_value = jx_expression_to_function(select.value)
new_data = map(select_value, self.data)
if is_op(select.value, Variable):
column = copy(first(c for c in self.schema.columns if c.name == select.value.var))
column.name = '.'
new_schema = Schema("from " + self.name, [column])
return ListContainer("from "+self.name, data=new_data, schema=new_schema)
@ -242,10 +242,16 @@ class ListContainer(Container, jx_base.Namespace, jx_base.Table):
self.data.extend(documents)
def __data__(self):
return wrap({
"meta": {"format": "list"},
"data": [{k: unwraplist(v) for k, v in row.items()} for row in self.data]
})
if first(self.schema.columns).name=='.':
return wrap({
"meta": {"format": "list"},
"data": self.data
})
else:
return wrap({
"meta": {"format": "list"},
"data": [{k: unwraplist(v) for k, v in row.items()} for row in self.data]
})
def get_columns(self, table_name=None):
return self.schema.values()
@ -264,8 +270,6 @@ class ListContainer(Container, jx_base.Namespace, jx_base.Table):
def __len__(self):
return len(self.data)
# class Namespace(jx_base.Namespace):
def get_snowflake(self, name):
if self.name != name:
Log.error("This container only has table by name of {{name}}", name=name)
@ -291,8 +295,6 @@ def _exec(code):
Log.error("Could not execute {{code|quote}}", code=code, cause=e)
from jx_base.schema import Schema
from jx_python import jx
@ -300,6 +302,5 @@ from jx_python import jx
DUAL = ListContainer(
name="dual",
data=[{}],
schema=Schema(table_name="dual", columns=UniqueIndex(keys=("names.\\.",)))
schema=Schema(table_name="dual", columns=UniqueIndex(keys=("name",)))
)

12
vendor/jx_python/cubes/aggs.py поставляемый
Просмотреть файл

@ -7,19 +7,17 @@
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from mo_future import is_text, is_binary
import itertools
from jx_base.domains import DefaultDomain, SimpleSetDomain
from jx_python import windows
from mo_dots import listwrap
from mo_logs import Log
from jx_base.domains import SimpleSetDomain, DefaultDomain
from jx_python.expressions import jx_expression_to_function
from mo_collections.matrix import Matrix
from mo_dots import listwrap
from mo_logs import Log
def cube_aggs(frum, query):

10
vendor/jx_python/expression_compiler.py поставляемый
Просмотреть файл

@ -7,16 +7,15 @@
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from mo_future import is_text, is_binary
import re
from pyLibrary import convert
from mo_dots import Data, coalesce, is_data, listwrap, wrap_leaves
from mo_logs import Log
from mo_dots import coalesce, Data, listwrap, wrap_leaves
from mo_times.dates import Date
from pyLibrary import convert
true = True
false = False
@ -42,6 +41,7 @@ def compile_expression(source):
_ = EMPTY_DICT
_ = re
_ = wrap_leaves
_ = is_data
fake_locals = {}
try:

1011
vendor/jx_python/expressions.py поставляемый

Разница между файлами не показана из-за своего большого размера Загрузить разницу

20
vendor/jx_python/flat_list.py поставляемый
Просмотреть файл

@ -8,15 +8,15 @@
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
from __future__ import division
from __future__ import absolute_import
from collections import Mapping
from __future__ import absolute_import, division, unicode_literals
from mo_future import is_text, is_binary
import functools
from mo_math import MIN
from mo_dots import Data, FlatList, coalesce, is_data, is_list, split_field, wrap
from mo_future import is_text
from mo_logs import Log
from mo_dots import split_field, coalesce, Data, FlatList, wrap
from mo_math import MIN
class PartFlatList(list):
@ -52,10 +52,10 @@ class PartFlatList(list):
yield r
def select(self, fields):
if isinstance(fields, Mapping):
if is_data(fields):
fields=fields.value
if isinstance(fields, text_type):
if is_text(fields):
# RETURN LIST OF VALUES
if len(split_field(fields)) == 1:
if self.path[0] == fields:
@ -71,7 +71,7 @@ class PartFlatList(list):
_select1((wrap(d[depth]) for d in self.data), short_key, 0, output)
return output
if isinstance(fields, list):
if is_list(fields):
output = FlatList()
meta = []
@ -131,7 +131,7 @@ def _select1(data, field, depth, output):
if d == None:
output.append(None)
break
elif isinstance(d, list):
elif is_list(d):
_select1(d, field, i + 1, output)
break
else:

19
vendor/jx_python/group_by.py поставляемый
Просмотреть файл

@ -8,22 +8,19 @@
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
import math
import sys
from mo_dots import listwrap, Null, Data
from mo_future import text_type, binary_type
from mo_logs import Log
from jx_base.container import Container
from jx_base.expressions import jx_expression, Expression
from jx_base.expressions import jx_expression
from jx_base.language import is_expression
from jx_python.expressions import jx_expression_to_function
from mo_collections.multiset import Multiset
from mo_dots.lists import FlatList
from mo_dots import Data, FlatList, Null, listwrap
from mo_future import binary_type, text_type
from mo_logs import Log
from mo_logs.exceptions import Except
@ -57,7 +54,7 @@ def groupby(data, keys=None, size=None, min_size=None, max_size=None, contiguous
if not data:
return Null
if any(isinstance(k, Expression) for k in keys):
if any(is_expression(k) for k in keys):
Log.error("can not handle expressions")
else:
accessor = jx_expression_to_function(jx_expression({"tuple": keys})) # CAN RETURN Null, WHICH DOES NOT PLAY WELL WITH __cmp__
@ -146,7 +143,7 @@ def groupby_min_max_size(data, min_size=0, max_size=None, ):
if max_size == None:
max_size = sys.maxint
if isinstance(data, (bytearray, text_type, binary_type, list)):
if data.__class__ in (bytearray, text_type, binary_type, list, FlatList):
def _iter():
num = int(math.ceil(len(data)/max_size))
for i in range(num):

356
vendor/jx_python/jx.py поставляемый
Просмотреть файл

@ -8,30 +8,14 @@
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
_range = range
from mo_times import Date
from collections import Mapping
from jx_base import query
from jx_python import expressions as _expressions
from jx_python import flat_list, group_by
from mo_dots import listwrap, wrap, unwrap, FlatList, NullType
from mo_dots import set_default, Null, Data, split_field, coalesce, join_field
from mo_future import text_type, boolean_type, none_type, long, generator_types, sort_using_cmp, PY2
from mo_logs import Log
from mo_math import Math
from mo_math import UNION, MIN
from pyLibrary import convert
import mo_dots
from jx_base.container import Container
from jx_base.expressions import TRUE, FALSE, NullOp
from jx_base.expressions import FALSE, TRUE
from jx_base.query import QueryOp, _normalize_selects
from jx_base.language import is_op, value_compare
from jx_python import expressions as _expressions, flat_list, group_by
from jx_python.containers.cube import Cube
from jx_python.cubes.aggs import cube_aggs
from jx_python.expression_compiler import compile_expression
@ -39,7 +23,14 @@ from jx_python.expressions import jx_expression_to_function
from jx_python.flat_list import PartFlatList
from mo_collections.index import Index
from mo_collections.unique_index import UniqueIndex
import mo_dots
from mo_dots import Data, FlatList, Null, coalesce, is_container, is_data, is_list, is_many, join_field, listwrap, set_default, split_field, unwrap, wrap
from mo_dots.objects import DataObject
from mo_future import is_text, sort_using_cmp
from mo_logs import Log
import mo_math
from mo_math import MIN, UNION
from pyLibrary import convert
# A COLLECTION OF DATABASE OPERATORS (RELATIONAL ALGEBRA OPERATORS)
# JSON QUERY EXPRESSION DOCUMENTATION: https://github.com/klahnakoski/jx/tree/master/docs
@ -47,6 +38,7 @@ from mo_dots.objects import DataObject
# TODO: USE http://docs.sqlalchemy.org/en/latest/core/tutorial.html AS DOCUMENTATION FRAMEWORK
builtin_tuple = tuple
_range = range
_Column = None
_merge_type = None
_ = _expressions
@ -65,29 +57,32 @@ def run(query, container=Null):
BUT IT IS ALSO PROCESSING A list CONTAINER; SEPARATE TO A ListContainer
"""
if container == None:
container = wrap(query)['from']
container = wrap(query)["from"]
query_op = QueryOp.wrap(query, container=container, namespace=container.schema)
else:
query_op = QueryOp.wrap(query, container, container.namespace)
if container == None:
from jx_python.containers.list_usingPythonList import DUAL
return DUAL.query(query_op)
elif isinstance(container, Container):
return container.query(query_op)
elif isinstance(container, (list, set) + generator_types):
elif is_many(container):
container = wrap(list(container))
elif isinstance(container, Cube):
if is_aggs(query_op):
return cube_aggs(container, query_op)
elif isinstance(container, QueryOp):
elif is_op(container, QueryOp):
container = run(container)
elif isinstance(container, Mapping):
elif is_data(container):
query = container
container = query['from']
container = query["from"]
container = run(QueryOp.wrap(query, container, container.namespace), container)
else:
Log.error("Do not know how to handle {{type}}", type=container.__class__.__name__)
Log.error(
"Do not know how to handle {{type}}", type=container.__class__.__name__
)
if is_aggs(query_op):
container = list_aggs(container, query_op)
@ -115,10 +110,7 @@ def run(query, container=Null):
container = convert.list2table(container)
container.meta.format = "table"
else:
container = wrap({
"meta": {"format": "list"},
"data": container
})
container = wrap({"meta": {"format": "list"}, "data": container})
return container
@ -127,14 +119,19 @@ groupby = group_by.groupby
def index(data, keys=None):
# return dict that uses keys to index data
# return dict that uses keys to index data
o = Index(keys)
if isinstance(data, Cube):
if data.edges[0].name==keys[0]:
#QUICK PATH
if data.edges[0].name == keys[0]:
# QUICK PATH
names = list(data.data.keys())
for d in (set_default(mo_dots.zip(names, r), {keys[0]: p}) for r, p in zip(zip(*data.data.values()), data.edges[0].domain.partitions.value)):
for d in (
set_default(mo_dots.zip(names, r), {keys[0]: p})
for r, p in zip(
zip(*data.data.values()), data.edges[0].domain.partitions.value
)
):
o.add(d)
return o
else:
@ -157,19 +154,20 @@ def unique_index(data, keys=None, fail_on_dup=True):
o.add(d)
except Exception as e:
o.add(d)
Log.error("index {{index}} is not unique {{key}} maps to both {{value1}} and {{value2}}",
index= keys,
key= select([d], keys)[0],
value1= o[d],
value2= d,
cause=e
Log.error(
"index {{index}} is not unique {{key}} maps to both {{value1}} and {{value2}}",
index=keys,
key=select([d], keys)[0],
value1=o[d],
value2=d,
cause=e,
)
return o
def map2set(data, relation):
"""
EXPECTING A isinstance(relation, Mapping) THAT MAPS VALUES TO lists
EXPECTING A is_data(relation) THAT MAPS VALUES TO lists
THE LISTS ARE EXPECTED TO POINT TO MEMBERS OF A SET
A set() IS RETURNED
"""
@ -178,7 +176,7 @@ def map2set(data, relation):
if isinstance(relation, Data):
Log.error("Does not accept a Data")
if isinstance(relation, Mapping):
if is_data(relation):
try:
# relation[d] is expected to be a list
# return set(cod for d in data for cod in relation[d])
@ -215,20 +213,20 @@ def tuple(data, field_name):
if isinstance(data, FlatList):
Log.error("not supported yet")
if isinstance(field_name, Mapping) and "value" in field_name:
if is_data(field_name) and "value" in field_name:
# SIMPLIFY {"value":value} AS STRING
field_name = field_name["value"]
# SIMPLE PYTHON ITERABLE ASSUMED
if isinstance(field_name, text_type):
if is_text(field_name):
if len(split_field(field_name)) == 1:
return [(d[field_name], ) for d in data]
return [(d[field_name],) for d in data]
else:
path = split_field(field_name)
output = []
flat_list._tuple1(data, path, 0, output)
return output
elif isinstance(field_name, list):
elif is_list(field_name):
paths = [_select_a_field(f) for f in field_name]
output = FlatList()
_tuple((), unwrap(data), paths, 0, output)
@ -265,16 +263,16 @@ def _tuple_deep(v, field, depth, record):
field = {"name":name, "value":["attribute", "path"]}
r[field.name]=v[field.value], BUT WE MUST DEAL WITH POSSIBLE LIST IN field.value PATH
"""
if hasattr(field.value, '__call__'):
return 0, None, record + (field.value(v), )
if hasattr(field.value, "__call__"):
return 0, None, record + (field.value(v),)
for i, f in enumerate(field.value[depth:len(field.value) - 1:]):
for i, f in enumerate(field.value[depth : len(field.value) - 1 :]):
v = v.get(f)
if isinstance(v, list):
if is_list(v):
return depth + i + 1, v, record
f = field.value.last()
return 0, None, record + (v.get(f), )
return 0, None, record + (v.get(f),)
def select(data, field_name):
@ -288,12 +286,14 @@ def select(data, field_name):
return data.select(field_name)
if isinstance(data, UniqueIndex):
data = data._data.values() # THE SELECT ROUTINE REQUIRES dicts, NOT Data WHILE ITERATING
data = (
data._data.values()
) # THE SELECT ROUTINE REQUIRES dicts, NOT Data WHILE ITERATING
if isinstance(data, Mapping):
if is_data(data):
return select_one(data, field_name)
if isinstance(field_name, Mapping):
if is_data(field_name):
field_name = wrap(field_name)
if field_name.value in ["*", "."]:
return data
@ -303,7 +303,7 @@ def select(data, field_name):
field_name = field_name.value
# SIMPLE PYTHON ITERABLE ASSUMED
if isinstance(field_name, text_type):
if is_text(field_name):
path = split_field(field_name)
if len(path) == 1:
return FlatList([d[field_name] for d in data])
@ -311,7 +311,7 @@ def select(data, field_name):
output = FlatList()
flat_list._select1(data, path, 0, output)
return output
elif isinstance(field_name, list):
elif is_list(field_name):
keys = [_select_a_field(wrap(f)) for f in field_name]
return _select(Data(), unwrap(data), keys, 0)
else:
@ -320,9 +320,9 @@ def select(data, field_name):
def _select_a_field(field):
if isinstance(field, text_type):
if is_text(field):
return wrap({"name": field, "value": split_field(field)})
elif isinstance(wrap(field).value, text_type):
elif is_text(wrap(field).value):
field = wrap(field)
return wrap({"name": field.name, "value": split_field(field.value)})
else:
@ -334,8 +334,8 @@ def _select(template, data, fields, depth):
deep_path = []
deep_fields = UniqueIndex(["name"])
for d in data:
if isinstance(d, Data):
Log.error("programmer error, _select can not handle Data")
if d.__class__ is Data:
Log.error("programmer error, _select can not handle Data, only dict")
record = template.copy()
children = None
@ -364,18 +364,18 @@ def _select_deep(v, field, depth, record):
field = {"name":name, "value":["attribute", "path"]}
r[field.name]=v[field.value], BUT WE MUST DEAL WITH POSSIBLE LIST IN field.value PATH
"""
if hasattr(field.value, '__call__'):
if hasattr(field.value, "__call__"):
try:
record[field.name] = field.value(wrap(v))
except Exception as e:
record[field.name] = None
return 0, None
for i, f in enumerate(field.value[depth:len(field.value) - 1:]):
for i, f in enumerate(field.value[depth : len(field.value) - 1 :]):
v = v.get(f)
if v is None:
return 0, None
if isinstance(v, list):
if is_list(v):
return depth + i + 1, v
f = field.value.last()
@ -385,7 +385,9 @@ def _select_deep(v, field, depth, record):
else:
record[field.name] = v.get(f)
except Exception as e:
Log.error("{{value}} does not have {{field}} property", value= v, field=f, cause=e)
Log.error(
"{{value}} does not have {{field}} property", value=v, field=f, cause=e
)
return 0, None
@ -396,26 +398,31 @@ def _select_deep_meta(field, depth):
RETURN FUNCTION THAT PERFORMS THE MAPPING
"""
name = field.name
if hasattr(field.value, '__call__'):
if hasattr(field.value, "__call__"):
try:
def assign(source, destination):
destination[name] = field.value(wrap(source))
return 0, None
return assign
except Exception as e:
def assign(source, destination):
destination[name] = None
return 0, None
return assign
prefix = field.value[depth:len(field.value) - 1:]
prefix = field.value[depth : len(field.value) - 1 :]
if prefix:
def assign(source, destination):
for i, f in enumerate(prefix):
source = source.get(f)
if source is None:
return 0, None
if isinstance(source, list):
if is_list(source):
return depth + i + 1, source
f = field.value.last()
@ -425,23 +432,38 @@ def _select_deep_meta(field, depth):
else:
destination[name] = source.get(f)
except Exception as e:
Log.error("{{value}} does not have {{field}} property", value= source, field=f, cause=e)
Log.error(
"{{value}} does not have {{field}} property",
value=source,
field=f,
cause=e,
)
return 0, None
return assign
else:
f = field.value[0]
if not f: # NO NAME FIELD INDICATES SELECT VALUE
def assign(source, destination):
destination[name] = source
return 0, None
return assign
else:
def assign(source, destination):
try:
destination[name] = source.get(f)
except Exception as e:
Log.error("{{value}} does not have {{field}} property", value= source, field=f, cause=e)
Log.error(
"{{value}} does not have {{field}} property",
value=source,
field=f,
cause=e,
)
return 0, None
return assign
@ -450,7 +472,12 @@ def get_columns(data, leaves=False):
if not leaves:
return wrap([{"name": n} for n in UNION(set(d.keys()) for d in data)])
else:
return wrap([{"name": leaf} for leaf in set(leaf for row in data for leaf, _ in row.leaves())])
return wrap(
[
{"name": leaf}
for leaf in set(leaf for row in data for leaf, _ in row.leaves())
]
)
_ = """
@ -490,23 +517,23 @@ def _deeper_iterator(columns, nested_path, path, data):
c = columns.get(leaf)
if not c:
c = columns[leaf] = _Column(name=leaf, type=type_to_name[v.__class__], table=None, es_column=leaf)
c.type = _merge_type[c.type][type_to_name[v.__class__]]
if c.type == "nested" and not nested_path[0].startswith(leaf + "."):
c.jx_type = _merge_type[c.jx_type][type_to_name[v.__class__]]
if c.jx_type == "nested" and not nested_path[0].startswith(leaf + "."):
if leaf.startswith(nested_path[0] + ".") or leaf == nested_path[0] or not nested_path[0]:
nested_path[0] = leaf
else:
Log.error("nested path conflict: {{leaf}} vs {{nested}}", leaf=leaf, nested=nested_path[0])
if isinstance(v, list) and v:
if is_list(v) and v:
if deep_leaf:
Log.error("nested path conflict: {{leaf}} vs {{nested}}", leaf=leaf, nested=deep_leaf)
deep_leaf = leaf
deep_v = v
elif isinstance(v, Mapping):
elif is_data(v):
for o in _deeper_iterator(columns, nested_path, leaf, [v]):
set_default(output, o)
else:
if c.type not in ["object", "nested"]:
if c.jx_type not in ["object", "nested"]:
output[leaf] = v
if deep_leaf:
@ -517,6 +544,7 @@ def _deeper_iterator(columns, nested_path, path, data):
yield output
"""
def sort(data, fieldnames=None, already_normalized=False):
"""
PASS A FIELD NAME, OR LIST OF FIELD NAMES, OR LIST OF STRUCTS WITH {"field":field_name, "sort":direction}
@ -545,101 +573,23 @@ def sort(data, fieldnames=None, already_normalized=False):
Log.error("problem with compare", e)
return 0
if isinstance(data, list):
if is_list(data):
output = FlatList([unwrap(d) for d in sort_using_cmp(data, cmp=comparer)])
elif hasattr(data, "__iter__"):
output = FlatList([unwrap(d) for d in sort_using_cmp(list(data), cmp=comparer)])
output = FlatList(
[unwrap(d) for d in sort_using_cmp(list(data), cmp=comparer)]
)
else:
Log.error("Do not know how to handle")
output = None
return output
except Exception as e:
Log.error("Problem sorting\n{{data}}", data=data, cause=e)
Log.error("Problem sorting\n{{data}}", data=data, cause=e)
def count(values):
return sum((1 if v!=None else 0) for v in values)
def value_compare(left, right, ordering=1):
"""
SORT VALUES, NULL IS THE LEAST VALUE
:param left: LHS
:param right: RHS
:param ordering: (-1, 0, 0) TO AFFECT SORT ORDER
:return: The return value is negative if x < y, zero if x == y and strictly positive if x > y.
"""
try:
if isinstance(left, list) or isinstance(right, list):
if left == None:
return ordering
elif right == None:
return - ordering
left = listwrap(left)
right = listwrap(right)
for a, b in zip(left, right):
c = value_compare(a, b) * ordering
if c != 0:
return c
if len(left) < len(right):
return - ordering
elif len(left) > len(right):
return ordering
else:
return 0
ltype = type(left)
rtype = type(right)
ltype_num = TYPE_ORDER.get(ltype, 10)
rtype_num = TYPE_ORDER.get(rtype, 10)
type_diff = ltype_num - rtype_num
if type_diff != 0:
return ordering if type_diff > 0 else -ordering
if ltype_num == 9:
return 0
elif ltype is builtin_tuple:
for a, b in zip(left, right):
c = value_compare(a, b)
if c != 0:
return c * ordering
return 0
elif ltype in (dict, Data):
for k in sorted(set(left.keys()) | set(right.keys())):
c = value_compare(left.get(k), right.get(k)) * ordering
if c != 0:
return c
return 0
elif left > right:
return ordering
elif left < right:
return -ordering
else:
return 0
except Exception as e:
Log.error("Can not compare values {{left}} to {{right}}", left=left, right=right, cause=e)
TYPE_ORDER = {
boolean_type: 0,
int: 1,
float: 1,
Date: 1,
text_type: 2,
list: 3,
builtin_tuple: 3,
dict: 4,
Data: 4,
none_type: 9,
NullType: 9,
NullOp: 9
}
if PY2:
TYPE_ORDER[long] = 1
return sum((1 if v != None else 0) for v in values)
def pairwise(values):
@ -654,6 +604,7 @@ def pairwise(values):
yield (a, b)
a = b
pairs = pairwise
@ -667,18 +618,22 @@ def filter(data, where):
if isinstance(data, Container):
return data.filter(where)
if isinstance(data, (list, set)):
if is_container(data):
temp = jx_expression_to_function(where)
dd = wrap(data)
return wrap([unwrap(d) for i, d in enumerate(data) if temp(wrap(d), i, dd)])
else:
Log.error("Do not know how to handle type {{type}}", type=data.__class__.__name__)
Log.error(
"Do not know how to handle type {{type}}", type=data.__class__.__name__
)
try:
return drill_filter(where, data)
except Exception as _:
# WOW! THIS IS INEFFICIENT!
return wrap([unwrap(d) for d in drill_filter(where, [DataObject(d) for d in data])])
return wrap(
[unwrap(d) for d in drill_filter(where, [DataObject(d) for d in data])]
)
def drill_filter(esfilter, data):
@ -690,7 +645,9 @@ def drill_filter(esfilter, data):
esfilter = unwrap(esfilter)
primary_nested = [] # track if nested, changes if not
primary_column = [] # only one path allowed
primary_branch = [] # CONTAINS LISTS OF RECORDS TO ITERATE: constantly changing as we dfs the tree
primary_branch = (
[]
) # CONTAINS LISTS OF RECORDS TO ITERATE: constantly changing as we dfs the tree
def parse_field(fieldname, data, depth):
"""
@ -703,21 +660,21 @@ def drill_filter(esfilter, data):
d = d[c]
except Exception as e:
Log.error("{{name}} does not exist", name=fieldname)
if isinstance(d, list) and len(col) > 1:
if len(primary_column) <= depth+i:
if is_list(d) and len(col) > 1:
if len(primary_column) <= depth + i:
primary_nested.append(True)
primary_column.append(c)
primary_branch.append(d)
elif primary_nested[depth] and primary_column[depth+i] != c:
elif primary_nested[depth] and primary_column[depth + i] != c:
Log.error("only one branch of tree allowed")
else:
primary_nested[depth+i] = True
primary_column[depth+i] = c
primary_branch[depth+i] = d
primary_nested[depth + i] = True
primary_column[depth + i] = c
primary_branch[depth + i] = d
return c, join_field(col[i+1:])
return c, join_field(col[i + 1 :])
else:
if len(primary_column) <= depth+i:
if len(primary_column) <= depth + i:
primary_nested.append(False)
primary_column.append(c)
primary_branch.append([d])
@ -737,7 +694,7 @@ def drill_filter(esfilter, data):
if filter["and"]:
result = True
output = FlatList()
for a in filter[u"and"]:
for a in filter["and"]:
f = pe_filter(a, data, depth)
if f is False:
result = False
@ -749,7 +706,7 @@ def drill_filter(esfilter, data):
return result
elif filter["or"]:
output = FlatList()
for o in filter[u"or"]:
for o in filter["or"]:
f = pe_filter(o, data, depth)
if f is True:
return True
@ -843,7 +800,7 @@ def drill_filter(esfilter, data):
else:
return result
elif filter.missing:
if isinstance(filter.missing, text_type):
if is_text(filter.missing):
field = filter["missing"]
else:
field = filter["missing"]["field"]
@ -863,7 +820,7 @@ def drill_filter(esfilter, data):
first, rest = parse_field(col, data, depth)
d = data[first]
if not rest:
if d==None or not d.startswith(val):
if d == None or not d.startswith(val):
result = False
else:
output[rest] = val
@ -873,7 +830,7 @@ def drill_filter(esfilter, data):
return result
elif filter.exists:
if isinstance(filter["exists"], text_type):
if is_text(filter["exists"]):
field = filter["exists"]
else:
field = filter["exists"]["field"]
@ -887,7 +844,7 @@ def drill_filter(esfilter, data):
else:
return {"exists": rest}
else:
Log.error(u"Can not interpret esfilter: {{esfilter}}", {u"esfilter": filter})
Log.error("Can not interpret esfilter: {{esfilter}}", {"esfilter": filter})
output = [] # A LIST OF OBJECTS MAKING THROUGH THE FILTER
@ -912,7 +869,7 @@ def drill_filter(esfilter, data):
# OUTPUT
for i, d in enumerate(data):
if isinstance(d, Mapping):
if is_data(d):
main([], esfilter, wrap(d), 0)
else:
Log.error("filter is expecting a dict, not {{type}}", type=d.__class__)
@ -927,6 +884,7 @@ def drill_filter(esfilter, data):
# OUTPUT IS A LIST OF ROWS,
# WHERE EACH ROW IS A LIST OF VALUES SEEN DURING A WALK DOWN A PATH IN THE HIERARCHY
uniform_output = FlatList()
def recurse(row, depth):
if depth == max:
uniform_output.append(row)
@ -957,21 +915,24 @@ def wrap_function(func):
"""
RETURN A THREE-PARAMETER WINDOW FUNCTION TO MATCH
"""
if isinstance(func, text_type):
if is_text(func):
return compile_expression(func)
numarg = func.__code__.co_argcount
if numarg == 0:
def temp(row, rownum, rows):
return func()
return temp
elif numarg == 1:
def temp(row, rownum, rows):
return func(row)
return temp
elif numarg == 2:
def temp(row, rownum, rows):
return func(row, rownum)
@ -985,13 +946,17 @@ def window(data, param):
MAYBE WE CAN DO THIS WITH NUMPY (no, the edges of windows are not graceful with numpy)
data - list of records
"""
name = param.name # column to assign window function result
edges = param.edges # columns to gourp by
where = param.where # DO NOT CONSIDER THESE VALUES
sortColumns = param.sort # columns to sort by
calc_value = jx_expression_to_function(param.value) # function that takes a record and returns a value (for aggregation)
name = param.name # column to assign window function result
edges = param.edges # columns to gourp by
where = param.where # DO NOT CONSIDER THESE VALUES
sortColumns = param.sort # columns to sort by
calc_value = jx_expression_to_function(
param.value
) # function that takes a record and returns a value (for aggregation)
aggregate = param.aggregate # WindowFunction to apply
_range = param.range # of form {"min":-10, "max":0} to specify the size and relative position of window
_range = (
param.range
) # of form {"min":-10, "max":0} to specify the size and relative position of window
data = filter(data, where)
@ -1014,7 +979,7 @@ def window(data, param):
if not aggregate or aggregate == "none":
for _, values in groupby(data, edge_values):
if not values:
continue # CAN DO NOTHING WITH THIS ZERO-SAMPLE
continue # CAN DO NOTHING WITH THIS ZERO-SAMPLE
if sortColumns:
sequence = sort(values, sortColumns, already_normalized=True)
@ -1027,7 +992,7 @@ def window(data, param):
for keys, values in groupby(data, edge_values):
if not values:
continue # CAN DO NOTHING WITH THIS ZERO-SAMPLE
continue # CAN DO NOTHING WITH THIS ZERO-SAMPLE
sequence = sort(values, sortColumns)
@ -1052,11 +1017,6 @@ def window(data, param):
r["__temp__"] = None # CLEANUP
def intervals(_min, _max=None, size=1):
"""
RETURN (min, max) PAIRS OF GIVEN SIZE, WHICH COVER THE _min, _max RANGE
@ -1066,8 +1026,8 @@ def intervals(_min, _max=None, size=1):
if _max == None:
_max = _min
_min = 0
_max = int(Math.ceiling(_max))
_min = int(Math.floor(_min))
_max = int(mo_math.ceiling(_max))
_min = int(mo_math.floor(_min))
output = ((x, min(x + size, _max)) for x in _range(_min, _max, size))
return output
@ -1076,10 +1036,10 @@ def intervals(_min, _max=None, size=1):
def prefixes(vals):
"""
:param vals: iterable
:return: vals[:1], vals[:1], ... , vals[:n]
:return: vals[:1], vals[:2], ... , vals[:n]
"""
for i in range(len(vals)):
yield vals[:i + 1]
yield vals[: i + 1]
def accumulate(vals):
@ -1092,6 +1052,7 @@ def accumulate(vals):
yield sum, v
sum += v
def reverse(vals):
# TODO: Test how to do this fastest
if not hasattr(vals, "len"):
@ -1105,11 +1066,10 @@ def reverse(vals):
return wrap(output)
def countdown(vals):
remaining = len(vals) - 1
return [(remaining - i, v) for i, v in enumerate(vals)]
from jx_python.lists.aggs import is_aggs, list_aggs

6
vendor/jx_python/jx_usingDataset.py поставляемый
Просмотреть файл

@ -7,12 +7,10 @@
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
from __future__ import division
from __future__ import absolute_import
from __future__ import absolute_import, division, unicode_literals
from mo_future import is_text, is_binary
import dataset
from jx_python.containers.Table_usingDataset import Table_usingDataset

19
vendor/jx_python/lists/aggs.py поставляемый
Просмотреть файл

@ -7,23 +7,18 @@
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from mo_future import is_text, is_binary
import itertools
from jx_base.query import _normalize_domain
from jx_base.domains import DefaultDomain, SimpleSetDomain
from jx_python import windows
from mo_dots import listwrap, wrap, coalesce
from mo_logs import Log
from mo_math import UNION
from jx_base.domains import SimpleSetDomain, DefaultDomain
from jx_python.expression_compiler import compile_expression
from jx_python.expressions import jx_expression_to_function
from mo_collections.matrix import Matrix
from mo_dots import coalesce, listwrap, wrap
from mo_logs import Log
from mo_math import UNION
from mo_times.dates import Date
_ = Date
@ -49,7 +44,7 @@ def list_aggs(frum, query):
else:
pass
s_accessors = [(ss.name, compile_expression(ss.value.to_python())) for ss in select]
s_accessors = [(ss.name, jx_expression_to_function(ss.value)) for ss in select]
result = {
s.name: Matrix(

686
vendor/jx_python/meta.py поставляемый
Просмотреть файл

@ -7,28 +7,33 @@
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from collections import Mapping
from datetime import date
from datetime import datetime
from decimal import Decimal
from contextlib import contextmanager
import sqlite3
import jx_base
from jx_base import Column, Table
from jx_base.schema import Schema
from jx_python import jx
from mo_collections import UniqueIndex
from mo_dots import Data, concat_field, listwrap, unwraplist, NullType, FlatList, set_default, split_field, join_field, ROOT_PATH, wrap, coalesce
from mo_future import none_type, text_type, long, PY2
from mo_json.typed_encoder import untype_path, unnest_path, python_type_to_json_type, STRUCT
from mo_logs import Log
from mo_threads import Lock
from mo_dots import Data, FlatList, Null, NullType, ROOT_PATH, concat_field, is_container, is_data, is_list, join_field, listwrap, split_field, unwraplist, wrap
from mo_files import File
from mo_future import items, none_type, reduce, text_type, binary_type
from mo_json import (INTEGER, NUMBER, STRING, STRUCT, json2value, python_type_to_json_type, value2json)
from mo_json.typed_encoder import unnest_path, untype_path
from mo_logs import Except, Log
from mo_threads import Lock, Queue, Thread, Till
from mo_times.dates import Date
from pyLibrary.sql import (SQL_AND, SQL_FROM, SQL_ORDERBY, SQL_SELECT, SQL_WHERE, sql_iso, sql_list)
from pyLibrary.sql.sqlite import json_type_to_sqlite_type, quote_column, quote_value
DEBUG = False
singlton = None
db_table_name = quote_column("meta.columns")
INSERT, UPDATE, DELETE, EXECUTE = "insert", "update", "delete", "execute"
class ColumnList(Table, jx_base.Container):
@ -36,14 +41,235 @@ class ColumnList(Table, jx_base.Container):
OPTIMIZED FOR THE PARTICULAR ACCESS PATTERNS USED
"""
def __init__(self):
def __init__(self, name):
Table.__init__(self, "meta.columns")
self.db_file = File("metadata." + name + ".sqlite")
self.data = {} # MAP FROM ES_INDEX TO (abs_column_name to COLUMNS)
self.locker = Lock()
self._schema = None
self.extend(METADATA_COLUMNS)
self.db = sqlite3.connect(
database=self.db_file.abspath, check_same_thread=False, isolation_level=None
)
self.last_load = Null
self.todo = Queue(
"update columns to db"
) # HOLD (action, column) PAIR, WHERE action in ['insert', 'update']
self._db_load()
Thread.run("update " + name, self._db_worker)
def find(self, es_index, abs_column_name):
@contextmanager
def _db_transaction(self):
self.db.execute(str("BEGIN"))
try:
yield
self.db.execute(str("COMMIT"))
except Exception as e:
e = Except.wrap(e)
self.db.execute(str("ROLLBACK"))
Log.error("Transaction failed", cause=e)
def _query(self, query):
result = Data()
curr = self.db.execute(query)
result.meta.format = "table"
result.header = [d[0] for d in curr.description] if curr.description else None
result.data = curr.fetchall()
return result
def _db_create(self):
with self._db_transaction():
self.db.execute(
"CREATE TABLE "
+ db_table_name
+ sql_iso(
sql_list(
[
quote_column(c.name)
+ " "
+ json_type_to_sqlite_type[c.jx_type]
for c in METADATA_COLUMNS
]
+ [
"PRIMARY KEY"
+ sql_iso(
sql_list(map(quote_column, ["es_index", "es_column"]))
)
]
)
)
)
for c in METADATA_COLUMNS:
self._add(c)
self._db_insert_column(c)
def _db_load(self):
self.last_load = Date.now()
result = self._query(
SQL_SELECT
+ "name"
+ SQL_FROM
+ "sqlite_master"
+ SQL_WHERE
+ SQL_AND.join(["name=" + db_table_name, "type=" + quote_value("table")])
)
if not result.data:
self._db_create()
return
result = self._query(
SQL_SELECT
+ all_columns
+ SQL_FROM
+ db_table_name
+ SQL_ORDERBY
+ sql_list(map(quote_column, ["es_index", "name", "es_column"]))
)
with self.locker:
for r in result.data:
c = row_to_column(result.header, r)
self._add(c)
def _db_worker(self, please_stop):
while not please_stop:
try:
with self._db_transaction():
result = self._query(
SQL_SELECT
+ all_columns
+ SQL_FROM
+ db_table_name
+ SQL_WHERE
+ "last_updated > "
+ quote_value(self.last_load)
+ SQL_ORDERBY
+ sql_list(map(quote_column, ["es_index", "name", "es_column"]))
)
with self.locker:
for r in result.data:
c = row_to_column(result.header, r)
self._add(c)
if c.last_updated > self.last_load:
self.last_load = c.last_updated
updates = self.todo.pop_all()
DEBUG and updates and Log.note(
"{{num}} columns to push to db", num=len(updates)
)
for action, column in updates:
while not please_stop:
try:
with self._db_transaction():
DEBUG and Log.note(
"{{action}} db for {{table}}.{{column}}",
action=action,
table=column.es_index,
column=column.es_column,
)
if action is EXECUTE:
self.db.execute(column)
elif action is UPDATE:
self.db.execute(
"UPDATE"
+ db_table_name
+ "SET"
+ sql_list(
[
"count=" + quote_value(column.count),
"cardinality="
+ quote_value(column.cardinality),
"multi=" + quote_value(column.multi),
"partitions="
+ quote_value(
value2json(column.partitions)
),
"last_updated="
+ quote_value(column.last_updated),
]
)
+ SQL_WHERE
+ SQL_AND.join(
[
"es_index = "
+ quote_value(column.es_index),
"es_column = "
+ quote_value(column.es_column),
"last_updated < "
+ quote_value(column.last_updated),
]
)
)
elif action is DELETE:
self.db.execute(
"DELETE FROM"
+ db_table_name
+ SQL_WHERE
+ SQL_AND.join(
[
"es_index = "
+ quote_value(column.es_index),
"es_column = "
+ quote_value(column.es_column),
]
)
)
else:
self._db_insert_column(column)
break
except Exception as e:
e = Except.wrap(e)
if "database is locked" in e:
Log.note("metadata database is locked")
Till(seconds=1).wait()
break
else:
Log.warning("problem updataing database", cause=e)
except Exception as e:
Log.warning("problem updating database", cause=e)
(Till(seconds=10) | please_stop).wait()
def _db_insert_column(self, column):
try:
self.db.execute(
"INSERT INTO"
+ db_table_name
+ sql_iso(all_columns)
+ "VALUES"
+ sql_iso(
sql_list(
[
quote_value(column[c.name])
if c.name not in ("nested_path", "partitions")
else quote_value(value2json(column[c.name]))
for c in METADATA_COLUMNS
]
)
)
)
except Exception as e:
e = Except.wrap(e)
if "UNIQUE constraint failed" in e or " are not unique" in e:
# THIS CAN HAPPEN BECAUSE todo HAS OLD COLUMN DATA
self.todo.add((UPDATE, column), force=True)
else:
Log.error("do not know how to handle", cause=e)
def __copy__(self):
output = object.__new__(ColumnList)
Table.__init__(output, "meta.columns")
output.data = {
t: {c: list(cs) for c, cs in dd.items()} for t, dd in self.data.items()
}
output.locker = Lock()
output._schema = None
return output
def find(self, es_index, abs_column_name=None):
with self.locker:
if es_index.startswith("meta."):
self._update_meta()
@ -62,19 +288,37 @@ class ColumnList(Table, jx_base.Container):
def add(self, column):
self.dirty = True
with self.locker:
return self._add(column)
canonical = self._add(column)
if canonical == None:
return column # ALREADY ADDED
self.todo.add((INSERT if canonical is column else UPDATE, canonical))
return canonical
def remove_table(self, table_name):
del self.data[table_name]
def _add(self, column):
"""
:param column: ANY COLUMN OBJECT
:return: None IF column IS canonical ALREADY (NET-ZERO EFFECT)
"""
columns_for_table = self.data.setdefault(column.es_index, {})
existing_columns = columns_for_table.setdefault(column.names["."], [])
existing_columns = columns_for_table.setdefault(column.name, [])
for canonical in existing_columns:
if canonical is column:
return canonical
return None
if canonical.es_type == column.es_type:
set_default(column.names, canonical.names)
for key in Column.__slots__:
canonical[key] = column[key]
if column.last_updated > canonical.last_updated:
for key in Column.__slots__:
old_value = canonical[key]
new_value = column[key]
if new_value == None:
pass # DO NOT BOTHER CLEARING OLD VALUES (LIKE cardinality AND paritiions)
elif new_value == old_value:
pass # NO NEED TO UPDATE WHEN NO CHANGE MADE (COMMON CASE)
else:
canonical[key] = new_value
return canonical
existing_columns.append(column)
return column
@ -90,18 +334,18 @@ class ColumnList(Table, jx_base.Container):
objects = 0
multi = 1
for column in self._all_columns():
value = column[mc.names["."]]
value = column[mc.name]
if value == None:
pass
else:
count += 1
if isinstance(value, list):
if is_list(value):
multi = max(multi, len(value))
try:
values |= set(value)
except Exception:
objects += len(value)
elif isinstance(value, Mapping):
elif is_data(value):
objects += 1
else:
values.add(value)
@ -126,25 +370,42 @@ class ColumnList(Table, jx_base.Container):
return iter(self._all_columns())
def __len__(self):
return self.data['meta.columns']['es_index'].count
return self.data["meta.columns"]["es_index"].count
def update(self, command):
self.dirty = True
try:
command = wrap(command)
DEBUG and Log.note(
"Update {{timestamp}}: {{command|json}}",
command=command,
timestamp=Date(command["set"].last_updated),
)
eq = command.where.eq
if eq.es_index:
all_columns = self.data.get(eq.es_index, {}).values()
if len(eq) == 1:
if unwraplist(command.clear) == ".":
with self.locker:
del self.data[eq.es_index]
self.todo.add(
(
EXECUTE,
"DELETE FROM "
+ db_table_name
+ SQL_WHERE
+ " es_index="
+ quote_value(eq.es_index),
)
)
return
# FASTEST
all_columns = self.data.get(eq.es_index, {}).values()
with self.locker:
columns = [
c
for cs in all_columns
for c in cs
]
columns = [c for cs in all_columns for c in cs]
elif eq.es_column and len(eq) == 2:
# FASTER
all_columns = self.data.get(eq.es_index, {}).values()
with self.locker:
columns = [
c
@ -155,12 +416,15 @@ class ColumnList(Table, jx_base.Container):
else:
# SLOWER
all_columns = self.data.get(eq.es_index, {}).values()
with self.locker:
columns = [
c
for cs in all_columns
for c in cs
if all(c[k] == v for k, v in eq.items()) # THIS LINE IS VERY SLOW
if all(
c[k] == v for k, v in eq.items()
) # THIS LINE IS VERY SLOW
]
else:
columns = list(self)
@ -168,20 +432,30 @@ class ColumnList(Table, jx_base.Container):
with self.locker:
for col in columns:
DEBUG and Log.note(
"update column {{table}}.{{column}}",
table=col.es_index,
column=col.es_column,
)
for k in command["clear"]:
if k == ".":
self.todo.add((DELETE, col))
lst = self.data[col.es_index]
cols = lst[col.names['.']]
cols = lst[col.name]
cols.remove(col)
if len(cols) == 0:
del lst[col.names['.']]
del lst[col.name]
if len(lst) == 0:
del self.data[col.es_index]
break
else:
col[k] = None
else:
# DID NOT DELETE COLUMNM ("."), CONTINUE TO SET PROPERTIES
for k, v in command.set.items():
col[k] = v
self.todo.add((UPDATE, col))
for k, v in command.set.items():
col[k] = v
except Exception as e:
Log.error("should not happen", cause=e)
@ -191,10 +465,13 @@ class ColumnList(Table, jx_base.Container):
with self.locker:
self._update_meta()
if not self._schema:
self._schema = Schema(".", [c for cs in self.data["meta.columns"].values() for c in cs])
self._schema = Schema(
".", [c for cs in self.data["meta.columns"].values() for c in cs]
)
snapshot = self._all_columns()
from jx_python.containers.list_usingPythonList import ListContainer
query.frum = ListContainer("meta.columns", snapshot, self._schema)
return jx.run(query)
@ -208,7 +485,9 @@ class ColumnList(Table, jx_base.Container):
if not self._schema:
with self.locker:
self._update_meta()
self._schema = Schema(".", [c for cs in self.data["meta.columns"].values() for c in cs])
self._schema = Schema(
".", [c for cs in self.data["meta.columns"].values() for c in cs]
)
return self._schema
@property
@ -229,8 +508,8 @@ class ColumnList(Table, jx_base.Container):
self._update_meta()
output = [
{
"table": concat_field(c.es_index, untype_path(table)),
"name": untype_path(name),
"table": c.es_index,
"name": untype_path(c.name),
"cardinality": c.cardinality,
"es_column": c.es_column,
"es_index": c.es_index,
@ -238,23 +517,20 @@ class ColumnList(Table, jx_base.Container):
"count": c.count,
"nested_path": [unnest_path(n) for n in c.nested_path],
"es_type": c.es_type,
"type": c.jx_type
"type": c.jx_type,
}
for tname, css in self.data.items()
for cname, cs in css.items()
for c in cs
if c.jx_type not in STRUCT # and c.es_column != "_id"
for table, name in c.names.items()
]
from jx_python.containers.list_usingPythonList import ListContainer
return ListContainer(
self.name,
data=output,
schema=jx_base.Schema(
"meta.columns",
SIMPLE_METADATA_COLUMNS
)
schema=jx_base.Schema("meta.columns", SIMPLE_METADATA_COLUMNS),
)
@ -262,7 +538,7 @@ def get_schema_from_list(table_name, frum):
"""
SCAN THE LIST FOR COLUMN TYPES
"""
columns = UniqueIndex(keys=("names.\\.",))
columns = UniqueIndex(keys=("name",))
_get_schema_from_list(frum, ".", parent=".", nested_path=ROOT_PATH, columns=columns)
return Schema(table_name=table_name, columns=list(columns))
@ -271,277 +547,205 @@ def _get_schema_from_list(frum, table_name, parent, nested_path, columns):
"""
:param frum: The list
:param table_name: Name of the table this list holds records for
:param prefix_path: parent path
:param parent: parent path
:param nested_path: each nested array, in reverse order
:param columns: map from full name to column definition
:return:
"""
for d in frum:
row_type = _type_to_name[d.__class__]
row_type = python_type_to_json_type[d.__class__]
if row_type != "object":
# EXPECTING PRIMITIVE VALUE
full_name = parent
column = columns[full_name]
if not column:
column = Column(
names={table_name: full_name},
name=concat_field(table_name, full_name),
es_column=full_name,
es_index=".",
jx_type=python_type_to_json_type[d.__class__],
es_type=row_type,
nested_path=nested_path
es_type=d.__class__.__name__,
jx_type=None, # WILL BE SET BELOW
last_updated=Date.now(),
nested_path=nested_path,
)
columns.add(column)
column.es_type = _merge_type[column.es_type][row_type]
column.jx_type = _merge_type[coalesce(column.jx_type, "undefined")][row_type]
column.es_type = _merge_python_type(column.es_type, d.__class__)
column.jx_type = python_type_to_json_type[column.es_type]
else:
for name, value in d.items():
full_name = concat_field(parent, name)
column = columns[full_name]
if not column:
column = Column(
names={table_name: full_name},
name=concat_field(table_name, full_name),
es_column=full_name,
es_index=".",
es_type="undefined",
nested_path=nested_path
es_type=value.__class__.__name__,
jx_type=None, # WILL BE SET BELOW
last_updated=Date.now(),
nested_path=nested_path,
)
columns.add(column)
if isinstance(value, (list, set)): # GET TYPE OF MULTIVALUE
if is_container(value): # GET TYPE OF MULTIVALUE
v = list(value)
if len(v) == 0:
this_type = "undefined"
this_type = none_type.__name__
elif len(v) == 1:
this_type = _type_to_name[v[0].__class__]
this_type = v[0].__class__.__name__
else:
this_type = _type_to_name[v[0].__class__]
if this_type == "object":
this_type = "nested"
this_type = reduce(
_merge_python_type, (vi.__class__.__name__ for vi in value)
)
else:
this_type = _type_to_name[value.__class__]
new_type = _merge_type[column.es_type][this_type]
column.es_type = new_type
this_type = value.__class__.__name__
column.es_type = _merge_python_type(column.es_type, this_type)
column.jx_type = python_type_to_json_type[column.es_type]
if this_type == "object":
_get_schema_from_list([value], table_name, full_name, nested_path, columns)
elif this_type == "nested":
if this_type in {"object", "dict", "Mapping", "Data"}:
_get_schema_from_list(
[value], table_name, full_name, nested_path, columns
)
elif this_type in {"list", "FlatList"}:
np = listwrap(nested_path)
newpath = unwraplist([join_field(split_field(np[0]) + [name])] + np)
_get_schema_from_list(value, table_name, full_name, newpath, columns)
_get_schema_from_list(
value, table_name, full_name, newpath, columns
)
METADATA_COLUMNS = (
[
Column(
names={".": c},
name=c,
es_index="meta.columns",
es_column=c,
es_type="string",
nested_path=ROOT_PATH
es_type="keyword",
jx_type=STRING,
last_updated=Date.now(),
nested_path=ROOT_PATH,
)
for c in ["es_type", "jx_type", "nested_path", "es_column", "es_index"]
] + [
for c in [
"name",
"es_type",
"jx_type",
"nested_path",
"es_column",
"es_index",
"partitions",
]
]
+ [
Column(
es_index="meta.columns",
names={".": c},
es_column=c,
es_type="object",
nested_path=ROOT_PATH
)
for c in ["names", "partitions"]
] + [
Column(
names={".": c},
name=c,
es_index="meta.columns",
es_column=c,
es_type="long",
nested_path=ROOT_PATH
es_type="integer",
jx_type=INTEGER,
last_updated=Date.now(),
nested_path=ROOT_PATH,
)
for c in ["count", "cardinality", "multi"]
] + [
]
+ [
Column(
names={".": "last_updated"},
name="last_updated",
es_index="meta.columns",
es_column="last_updated",
es_type="time",
nested_path=ROOT_PATH
es_type="double",
jx_type=NUMBER,
last_updated=Date.now(),
nested_path=ROOT_PATH,
)
]
)
SIMPLE_METADATA_COLUMNS = (
def row_to_column(header, row):
return Column(
**{
h: c
if c is None or h not in ("nested_path", "partitions")
else json2value(c)
for h, c in zip(header, row)
}
)
all_columns = sql_list([quote_column(c.name) for c in METADATA_COLUMNS])
SIMPLE_METADATA_COLUMNS = ( # FOR PURLY INTERNAL PYTHON LISTS, NOT MAPPING TO ANOTHER DATASTORE
[
Column(
names={".": c},
name=c,
es_index="meta.columns",
es_column=c,
es_type="string",
nested_path=ROOT_PATH
jx_type=STRING,
last_updated=Date.now(),
nested_path=ROOT_PATH,
)
for c in ["table", "name", "type", "nested_path"]
] + [
]
+ [
Column(
names={".": c},
name=c,
es_index="meta.columns",
es_column=c,
es_type="long",
nested_path=ROOT_PATH
jx_type=INTEGER,
last_updated=Date.now(),
nested_path=ROOT_PATH,
)
for c in ["count", "cardinality", "multi"]
] + [
]
+ [
Column(
names={".": "last_updated"},
name="last_updated",
es_index="meta.columns",
es_column="last_updated",
es_type="time",
nested_path=ROOT_PATH
jx_type=NUMBER,
last_updated=Date.now(),
nested_path=ROOT_PATH,
)
]
)
_type_to_name = {
none_type: "undefined",
NullType: "undefined",
bool: "boolean",
str: "string",
text_type: "string",
int: "integer",
float: "double",
Data: "object",
dict: "object",
set: "nested",
list: "nested",
FlatList: "nested",
Date: "double",
Decimal: "double",
datetime: "double",
date: "double"
_merge_order = {
none_type: 0,
NullType: 1,
bool: 2,
int: 3,
Date: 4,
float: 5,
text_type: 6,
binary_type: 6,
object: 7,
dict: 8,
Mapping: 9,
Data: 10,
list: 11,
FlatList: 12,
}
if PY2:
_type_to_name[long] = "integer"
for k, v in items(_merge_order):
_merge_order[k.__name__] = v
_merge_type = {
"undefined": {
"undefined": "undefined",
"boolean": "boolean",
"integer": "integer",
"long": "long",
"float": "float",
"double": "double",
"number": "number",
"string": "string",
"object": "object",
"nested": "nested"
},
"boolean": {
"undefined": "boolean",
"boolean": "boolean",
"integer": "integer",
"long": "long",
"float": "float",
"double": "double",
"number": "number",
"string": "string",
"object": None,
"nested": None
},
"integer": {
"undefined": "integer",
"boolean": "integer",
"integer": "integer",
"long": "long",
"float": "float",
"double": "double",
"number": "number",
"string": "string",
"object": None,
"nested": None
},
"long": {
"undefined": "long",
"boolean": "long",
"integer": "long",
"long": "long",
"float": "double",
"double": "double",
"number": "number",
"string": "string",
"object": None,
"nested": None
},
"float": {
"undefined": "float",
"boolean": "float",
"integer": "float",
"long": "double",
"float": "float",
"double": "double",
"number": "number",
"string": "string",
"object": None,
"nested": None
},
"double": {
"undefined": "double",
"boolean": "double",
"integer": "double",
"long": "double",
"float": "double",
"double": "double",
"number": "number",
"string": "string",
"object": None,
"nested": None
},
"number": {
"undefined": "number",
"boolean": "number",
"integer": "number",
"long": "number",
"float": "number",
"double": "number",
"number": "number",
"string": "string",
"object": None,
"nested": None
},
"string": {
"undefined": "string",
"boolean": "string",
"integer": "string",
"long": "string",
"float": "string",
"double": "string",
"number": "string",
"string": "string",
"object": None,
"nested": None
},
"object": {
"undefined": "object",
"boolean": None,
"integer": None,
"long": None,
"float": None,
"double": None,
"number": None,
"string": None,
"object": "object",
"nested": "nested"
},
"nested": {
"undefined": "nested",
"boolean": None,
"integer": None,
"long": None,
"float": None,
"double": None,
"number": None,
"string": None,
"object": "nested",
"nested": "nested"
}
}
def _merge_python_type(A, B):
a = _merge_order[A]
b = _merge_order[B]
if a >= b:
output = A
else:
output = B
if isinstance(output, str):
return output
else:
return output.__name__

50
vendor/jx_python/namespace/normal.py поставляемый
Просмотреть файл

@ -7,26 +7,21 @@
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from collections import Mapping
from mo_future import is_text, is_binary
from copy import copy
from mo_dots import Data
from mo_dots import FlatList
from mo_dots import coalesce, Null
from mo_dots import wrap, listwrap
from mo_logs import Log
from mo_math import Math
from jx_base.dimensions import Dimension
from jx_base.domains import Domain
from jx_base.query import QueryOp, get_all_vars
from jx_python.containers import Container
from jx_python.expressions import TRUE
from jx_python.namespace import Namespace, convert_list
from jx_base.query import QueryOp, get_all_vars
from mo_dots import Data, FlatList, Null, coalesce, is_data, is_list, listwrap, wrap
from mo_future import text_type
from mo_logs import Log
import mo_math
DEFAULT_LIMIT = 10
@ -37,7 +32,7 @@ class Normal(Namespace):
"""
def convert(self, expr):
if isinstance(expr, Mapping) and expr["from"]:
if is_data(expr) and expr["from"]:
return self._convert_query(expr)
return expr
@ -47,7 +42,7 @@ class Normal(Namespace):
# Log.error('Expecting from clause to be a Container')
query = wrap(query)
output = QueryOp("from", None)
output = QueryOp(None)
output["from"] = self._convert_from(query["from"])
output.format = query.format
@ -77,7 +72,7 @@ class Normal(Namespace):
output.sort = self._convert_sort(query.sort)
output.limit = coalesce(query.limit, DEFAULT_LIMIT)
if not Math.is_integer(output.limit) or output.limit < 0:
if not mo_math.is_integer(output.limit) or output.limit < 0:
Log.error("Expecting limit >= 0")
output.isLean = query.isLean
@ -94,15 +89,15 @@ class Normal(Namespace):
return output
def _convert_from(self, frum):
if isinstance(frum, text_type):
if is_text(frum):
return Data(name=frum)
elif isinstance(frum, (Container, QueryOp)):
elif is_op(frum, (Container, Variable)):
return frum
else:
Log.error("Expecting from clause to be a name, or a container")
def _convert_select(self, select):
if isinstance(select, text_type):
if is_text(select):
return Data(
name=select.rstrip("."), # TRAILING DOT INDICATES THE VALUE, BUT IS INVALID FOR THE NAME
value=select,
@ -111,7 +106,7 @@ class Normal(Namespace):
else:
select = wrap(select)
output = copy(select)
if not select.value or isinstance(select.value, text_type):
if not select.value or is_text(select.value):
if select.value == ".":
output.name = coalesce(select.name, select.aggregate)
else:
@ -126,7 +121,7 @@ class Normal(Namespace):
return output
def _convert_edge(self, edge):
if isinstance(edge, text_type):
if is_text(edge):
return Data(
name=edge,
value=edge,
@ -134,10 +129,10 @@ class Normal(Namespace):
)
else:
edge = wrap(edge)
if not edge.name and not isinstance(edge.value, text_type):
if not edge.name and not is_text(edge.value):
Log.error("You must name compound edges: {{edge}}", edge= edge)
if isinstance(edge.value, (Mapping, list)) and not edge.domain:
if edge.value.__class__ in (Data, dict, list, FlatList) and not edge.domain:
# COMPLEX EDGE IS SHORT HAND
domain =self._convert_domain()
domain.dimension = Data(fields=edge.value)
@ -158,7 +153,7 @@ class Normal(Namespace):
)
def _convert_group(self, column):
if isinstance(column, text_type):
if is_text(column):
return wrap({
"name": column,
"value": column,
@ -169,7 +164,7 @@ class Normal(Namespace):
if (column.domain and column.domain.type != "default") or column.allowNulls != None:
Log.error("groupby does not accept complicated domains")
if not column.name and not isinstance(column.value, text_type):
if not column.name and not is_text(column.value):
Log.error("You must name compound edges: {{edge}}", edge= column)
return wrap({
@ -191,7 +186,7 @@ class Normal(Namespace):
domain = domain.copy()
domain.name = domain.type
if not isinstance(domain.partitions, list):
if not is_list(domain.partitions):
domain.partitions = list(domain.partitions)
return Domain(**domain)
@ -237,7 +232,7 @@ def normalize_sort(sort=None):
output = FlatList()
for s in listwrap(sort):
if isinstance(s, text_type) or Math.is_integer(s):
if is_text(s) or mo_math.is_integer(s):
output.append({"value": s, "sort": 1})
elif not s.field and not s.value and s.sort==None:
#ASSUME {name: sort} FORM
@ -255,8 +250,7 @@ sort_direction = {
1: 1,
0: 0,
-1: -1,
None: 1,
Null: 1
None: 1
}
canonical_aggregates = {

38
vendor/jx_python/namespace/rename.py поставляемый
Просмотреть файл

@ -7,22 +7,20 @@
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from collections import Mapping
from copy import copy
from mo_dots import set_default, wrap, coalesce, Data, listwrap, unwraplist
from mo_logs import Log
from mo_math import Math
from mo_times.dates import Date
from jx_base.dimensions import Dimension
from jx_base.queries import is_variable_name
from jx_python.namespace import Namespace, convert_list
from jx_base.utils import is_variable_name
from jx_base.query import QueryOp
from jx_base.language import is_op
from jx_python.namespace import Namespace, convert_list
from mo_dots import Data, coalesce, is_data, is_list, listwrap, set_default, unwraplist, wrap, is_many
from mo_future import is_text
from mo_logs import Log
from mo_math import is_number
from mo_times.dates import Date
class Rename(Namespace):
@ -32,7 +30,7 @@ class Rename(Namespace):
EXPECTING A LIST OF {"name":name, "value":value} OBJECTS TO PERFORM A MAPPING
"""
dimensions = wrap(dimensions)
if isinstance(dimensions, Mapping) and dimensions.name == None:
if is_data(dimensions) and dimensions.name == None:
# CONVERT TO A REAL DIMENSION DEFINITION
dimensions = {"name": ".", "type": "set", "edges":[{"name": k, "field": v} for k, v in dimensions.items()]}
@ -44,19 +42,19 @@ class Rename(Namespace):
"""
if expr is True or expr == None or expr is False:
return expr
elif Math.is_number(expr):
elif is_number(expr):
return expr
elif expr == ".":
return "."
elif is_variable_name(expr):
return coalesce(self.dimensions[expr], expr)
elif isinstance(expr, text_type):
elif is_text(expr):
Log.error("{{name|quote}} is not a valid variable name", name=expr)
elif isinstance(expr, Date):
return expr
elif isinstance(expr, QueryOp):
elif is_op(expr, QueryOp):
return self._convert_query(expr)
elif isinstance(expr, Mapping):
elif is_data(expr):
if expr["from"]:
return self._convert_query(expr)
elif len(expr) >= 2:
@ -66,7 +64,7 @@ class Rename(Namespace):
# ASSUME SINGLE-CLAUSE EXPRESSION
k, v = expr.items()[0]
return converter_map.get(k, self._convert_bop)(self, k, v)
elif isinstance(expr, (list, set, tuple)):
elif is_many(expr):
return wrap([self.convert(value) for value in expr])
else:
return expr
@ -88,7 +86,7 @@ class Rename(Namespace):
def _convert_bop(self, op, term):
if isinstance(term, list):
if is_list(term):
return {op: map(self.convert, term)}
return {op: {self.convert(var): val for var, val in term.items()}}
@ -97,7 +95,7 @@ class Rename(Namespace):
return {k: map(self.convert, v)}
def _convert_from(self, frum):
if isinstance(frum, Mapping):
if is_data(frum):
return Data(name=self.convert(frum.name))
else:
return self.convert(frum)
@ -126,7 +124,7 @@ class Rename(Namespace):
if clause == None:
return None
elif isinstance(clause, Mapping):
elif is_data(clause):
return set_default({"value": self.convert(clause.value)}, clause)
else:
return [set_default({"value": self.convert(c.value)}, c) for c in clause]

6
vendor/jx_python/records.py поставляемый
Просмотреть файл

@ -7,9 +7,9 @@
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
from __future__ import division
from __future__ import absolute_import
from __future__ import absolute_import, division, unicode_literals
from mo_future import is_text, is_binary
from mo_dots import listwrap

5
vendor/jx_python/table.py поставляемый
Просмотреть файл

@ -7,10 +7,9 @@
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
from __future__ import division
from __future__ import absolute_import
from __future__ import absolute_import, division, unicode_literals
from mo_future import is_text, is_binary
import jx_base
from mo_dots import Data

16
vendor/jx_python/windows.py поставляемый
Просмотреть файл

@ -8,20 +8,16 @@
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
import functools
from copy import copy
import functools
import mo_math
from mo_collections.multiset import Multiset
from mo_dots.lists import FlatList
from mo_dots import FlatList
from mo_logs import Log
from mo_math import MIN
from mo_math import Math
from mo_math import stats
import mo_math
from mo_math import MIN, stats
from mo_math.stats import ZeroMoment, ZeroMoment2Stats
@ -147,7 +143,7 @@ class _Stats(WindowFunction):
Log.error("Do not know how to handle")
def end(self):
ignore = Math.ceiling(len(self.samples) * (1 - self.middle) / 2)
ignore = mo_math.ceiling(len(self.samples) * (1 - self.middle) / 2)
if ignore * 2 >= len(self.samples):
return stats.Stats()
output = stats.Stats(samples=sorted(self.samples)[ignore:len(self.samples) - ignore:])

7
vendor/mo_collections/__init__.py поставляемый
Просмотреть файл

@ -7,13 +7,12 @@
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from mo_future import is_text, is_binary
from mo_collections.unique_index import UniqueIndex
def reverse(values):
"""
REVERSE - WITH NO SIDE EFFECTS!

6
vendor/mo_collections/array.py поставляемый
Просмотреть файл

@ -11,10 +11,9 @@
# REPLACE NUMPY ARRAY FUNCTIONS
# THIS CODE IS FASTER THAN NUMPY WHEN USING PYPY *AND* THE ARRAYS ARE SMALL
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from mo_future import is_text, is_binary
from mo_logs import Log
@ -100,6 +99,7 @@ MORE_MATH = {
"subtract": lambda a, b: a - b,
"sub": lambda a, b: a - b,
"multiply": lambda a, b: a * b,
"mul": lambda a, b: a * b,
"mult": lambda a, b: a * b,
"divide": lambda a, b: a / b,
"div": lambda a, b: a / b

19
vendor/mo_collections/index.py поставляемый
Просмотреть файл

@ -8,14 +8,11 @@
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from collections import Mapping
from copy import copy
from mo_dots import wrap, unwrap, tuplewrap, get_attr
from mo_dots import get_attr, is_data, is_sequence, tuplewrap, unwrap, wrap
from mo_logs import Log
@ -36,7 +33,7 @@ class Index(object):
def __getitem__(self, key):
try:
if isinstance(key, (list, tuple)) and len(key) < len(self._keys):
if is_sequence(key) and len(key) < len(self._keys):
# RETURN ANOTHER Index
raise NotImplementedError()
@ -67,7 +64,7 @@ class Index(object):
def _test_contains(self, key):
try:
if isinstance(key, (list, tuple)) and len(key) < len(self._keys):
if is_sequence(key) and len(key) < len(self._keys):
# RETURN ANOTHER Index
length = len(key)
key = value2key(self._keys[0:length:], key)
@ -158,15 +155,15 @@ class Index(object):
def value2key(keys, val):
if len(keys) == 1:
if isinstance(val, Mapping):
if is_data(val):
return get_attr(val, keys[0]),
elif isinstance(val, (list, tuple)):
elif is_sequence(val):
return val[0],
return val,
else:
if isinstance(val, Mapping):
if is_data(val):
return tuple(val[k] for k in keys)
elif isinstance(val, (list, tuple)):
elif is_sequence(val):
return tuple(val)
else:
Log.error("do not know what to do here")

19
vendor/mo_collections/matrix.py поставляемый
Просмотреть файл

@ -7,15 +7,11 @@
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from mo_future import text_type, xrange, transpose
from mo_dots import Null, Data, coalesce, get_module
from mo_kwargs import override
from mo_dots import Data, Null, coalesce, get_module, is_sequence
from mo_future import text_type, transpose, xrange
from mo_logs import Log
from mo_logs.exceptions import suppress_exception
class Matrix(object):
@ -40,7 +36,7 @@ class Matrix(object):
self.num = len(dims)
self.dims = tuple(dims)
if zeros != None:
if self.num == 0 or any(d == 0 for d in dims): #NO DIMS, OR HAS A ZERO DIM, THEN IT IS A NULL CUBE
if self.num == 0 or any(d == 0 for d in dims): # NO DIMS, OR HAS A ZERO DIM, THEN IT IS A NULL CUBE
if hasattr(zeros, "__call__"):
self.cube = zeros()
else:
@ -61,7 +57,7 @@ class Matrix(object):
return output
def __getitem__(self, index):
if not isinstance(index, (list, tuple)):
if not is_sequence(index):
if isinstance(index, slice):
sub = self.cube[index]
output = Matrix()
@ -171,10 +167,11 @@ class Matrix(object):
def __iter__(self):
if not self.dims:
return [self.value].__iter__()
yield (tuple(), self.value)
else:
# TODO: MAKE THIS FASTER BY NOT CALLING __getitem__ (MAKES CUBE OBJECTS)
return ((c, self[c]) for c in self._all_combos())
for c in self._all_combos():
yield (c, self[c])
def __float__(self):
return self.value

6
vendor/mo_collections/multiset.py поставляемый
Просмотреть файл

@ -8,12 +8,10 @@
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
from __future__ import division
from __future__ import absolute_import
from __future__ import absolute_import, division, unicode_literals
from mo_future import is_text, is_binary
class Multiset(object):
"""
Multiset IS ONE MEMBER IN A FAMILY OF USEFUL CONTAINERS

6
vendor/mo_collections/persistent_queue.py поставляемый
Просмотреть файл

@ -8,13 +8,11 @@
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
import mo_json
from mo_dots import Data, wrap
from mo_files import File
import mo_json
from mo_logs import Log
from mo_logs.exceptions import suppress_exception
from mo_math.randoms import Random

5
vendor/mo_collections/queue.py поставляемый
Просмотреть файл

@ -9,10 +9,9 @@
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
from __future__ import division
from __future__ import absolute_import
from __future__ import absolute_import, division, unicode_literals
from mo_future import is_text, is_binary
from collections import deque

5
vendor/mo_collections/relation.py поставляемый
Просмотреть файл

@ -8,10 +8,9 @@
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
from __future__ import division
from __future__ import absolute_import
from __future__ import absolute_import, division, unicode_literals
from mo_future import is_text, is_binary
from mo_logs import Log

22
vendor/mo_collections/unique_index.py поставляемый
Просмотреть файл

@ -8,13 +8,11 @@
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from collections import Mapping, Iterable, Set
from collections import Iterable, Mapping, Set
from mo_dots import unwrap, tuplewrap, wrap
from mo_dots import is_data, is_sequence, tuplewrap, unwrap, wrap
from mo_dots.objects import datawrap
from mo_future import PY2, iteritems
from mo_logs import Log
@ -83,7 +81,11 @@ class UniqueIndex(Set, Mapping):
if key == None:
Log.error("Expecting key to be not None")
d = self._data.get(key)
try:
d = self._data.get(key)
except Exception as e:
key = value2key(self._keys, val)
if d is None:
self._data[key] = unwrap(val)
self.count += 1
@ -175,16 +177,16 @@ class UniqueIndex(Set, Mapping):
def value2key(keys, val):
if len(keys) == 1:
if isinstance(val, Mapping):
if is_data(val):
return val[keys[0]]
elif isinstance(val, (list, tuple)):
elif is_sequence(val):
return val[0]
else:
return val
else:
if isinstance(val, Mapping):
if is_data(val):
return datawrap({k: val[k] for k in keys})
elif isinstance(val, (list, tuple)):
elif is_sequence(val):
return datawrap(dict(zip(keys, val)))
else:
Log.error("do not know what to do here")

98
vendor/mo_dots/__init__.py поставляемый
Просмотреть файл

@ -7,23 +7,20 @@
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
import sys
from collections import Mapping
from mo_dots.utils import get_logger, get_module
from mo_future import text_type, binary_type, generator_types
from mo_future import binary_type, generator_types, is_binary, is_text, text_type
from mo_dots.utils import CLASS, OBJ, get_logger, get_module
none_type = type(None)
ModuleType = type(sys.modules[__name__])
_builtin_zip = zip
SELF_PATH = "."
ROOT_PATH = [SELF_PATH]
ROOT_PATH = ["."]
_get = object.__getattribute__
@ -88,13 +85,29 @@ def unliteral_field(field):
return field.replace("\\.", ".")
def tail_field(field):
"""
RETURN THE FIRST STEP IN PATH, ALONG WITH THE REMAINING TAIL
"""
if field == "." or field==None:
return ".", "."
elif "." in field:
if "\\." in field:
return tuple(k.replace("\a", ".") for k in field.replace("\\.", "\a").split(".", 1))
else:
return field.split(".", 1)
else:
return field, "."
def split_field(field):
"""
RETURN field AS ARRAY OF DOT-SEPARATED FIELDS
"""
if field == "." or field==None:
return []
elif isinstance(field, text_type) and "." in field:
elif is_text(field) and "." in field:
if field.startswith(".."):
remainder = field.lstrip(".")
back = len(field) - len(remainder) - 1
@ -105,14 +118,17 @@ def split_field(field):
return [field]
def join_field(field):
def join_field(path):
"""
RETURN field SEQUENCE AS STRING
"""
potent = [f for f in field if f != "."]
if not potent:
return "."
return ".".join([f.replace(".", "\\.") for f in potent])
output = ".".join([f.replace(".", "\\.") for f in path if f != None])
return output if output else "."
# potent = [f for f in path if f != "."]
# if not potent:
# return "."
# return ".".join([f.replace(".", "\\.") for f in potent])
def concat_field(prefix, suffix):
@ -132,8 +148,14 @@ def startswith_field(field, prefix):
"""
RETURN True IF field PATH STRING STARTS WITH prefix PATH STRING
"""
if prefix == ".":
if prefix.startswith("."):
return True
# f_back = len(field) - len(field.strip("."))
# p_back = len(prefix) - len(prefix.strip("."))
# if f_back > p_back:
# return False
# else:
# return True
if field.startswith(prefix):
if len(field) == len(prefix) or field[len(prefix)] == ".":
@ -164,9 +186,9 @@ def relative_field(field, parent):
def hash_value(v):
if isinstance(v, (set, tuple, list)):
if is_many(v):
return hash(tuple(hash_value(vv) for vv in v))
elif not isinstance(v, Mapping):
elif _get(v, CLASS) not in data_types:
return hash(v)
else:
return hash(tuple(sorted(hash_value(vv) for vv in v.values())))
@ -191,7 +213,7 @@ def set_default(*params):
FOR EACH LEAF, RETURN THE HIGHEST PRIORITY LEAF VALUE
"""
p0 = params[0]
agg = p0 if p0 or isinstance(p0, Mapping) else {}
agg = p0 if p0 or _get(p0, CLASS) in data_types else {}
for p in params[1:]:
p = unwrap(p)
if p is None:
@ -207,10 +229,10 @@ def _all_default(d, default, seen=None):
"""
if default is None:
return
if isinstance(default, Data):
if _get(default, CLASS) is Data:
default = object.__getattribute__(default, SLOT) # REACH IN AND GET THE dict
# Log = _late_import()
# Log.error("strictly dict (or object) allowed: got {{type}}", type=default.__class__.__name__)
# Log.error("strictly dict (or object) allowed: got {{type}}", type=_get(default, CLASS).__name__)
for k, default_value in default.items():
default_value = unwrap(default_value) # TWO DIFFERENT Dicts CAN SHARE id() BECAUSE THEY ARE SHORT LIVED
@ -218,7 +240,7 @@ def _all_default(d, default, seen=None):
if existing_value == None:
if default_value != None:
if isinstance(default_value, Mapping):
if _get(default_value, CLASS) in data_types:
df = seen.get(id(default_value))
if df is not None:
_set_attr(d, [k], df)
@ -234,10 +256,10 @@ def _all_default(d, default, seen=None):
except Exception as e:
if PATH_NOT_FOUND not in e:
get_logger().error("Can not set attribute {{name}}", name=k, cause=e)
elif isinstance(existing_value, list) or isinstance(default_value, list):
elif is_list(existing_value) or is_list(default_value):
_set_attr(d, [k], None)
_set_attr(d, [k], listwrap(existing_value) + listwrap(default_value))
elif (hasattr(existing_value, "__setattr__") or isinstance(existing_value, Mapping)) and isinstance(default_value, Mapping):
elif (hasattr(existing_value, "__setattr__") or _get(existing_value, CLASS) in data_types) and _get(default_value, CLASS) in data_types:
df = seen.get(id(default_value))
if df is not None:
_set_attr(d, [k], df)
@ -390,7 +412,7 @@ def _set_attr(obj_, path, value):
elif value == None:
new_value = None
else:
new_value = old_value.__class__(value) # TRY TO MAKE INSTANCE OF SAME CLASS
new_value = _get(old_value, CLASS)(value) # TRY TO MAKE INSTANCE OF SAME CLASS
except Exception as e:
old_value = None
new_value = value
@ -417,7 +439,7 @@ def wrap(v):
:return: Data INSTANCE
"""
type_ = v.__class__
type_ = _get(v, CLASS)
if type_ is dict:
m = object.__new__(Data)
@ -443,10 +465,12 @@ def wrap_leaves(value):
def _wrap_leaves(value):
if value == None:
return None
if isinstance(value, (text_type, binary_type, int, float)):
class_ = _get(value, CLASS)
if class_ in (text_type, binary_type, int, float):
return value
if isinstance(value, Mapping):
if isinstance(value, Data):
if class_ in data_types:
if class_ is Data:
value = unwrap(value)
output = {}
@ -455,7 +479,7 @@ def _wrap_leaves(value):
if key == "":
get_logger().error("key is empty string. Probably a bad idea")
if isinstance(key, binary_type):
if is_binary(key):
key = key.decode("utf8")
d = output
@ -487,7 +511,7 @@ def _wrap_leaves(value):
def unwrap(v):
_type = _get(v, "__class__")
_type = _get(v, CLASS)
if _type is Data:
d = _get(v, SLOT)
return d
@ -496,8 +520,8 @@ def unwrap(v):
elif _type is NullType:
return None
elif _type is DataObject:
d = _get(v, "_obj")
if isinstance(d, Mapping):
d = _get(v, OBJ)
if _get(d, CLASS) in data_types:
return d
else:
return v
@ -537,7 +561,7 @@ def listwrap(value):
"""
if value == None:
return FlatList()
elif isinstance(value, list):
elif is_list(value):
return wrap(value)
elif isinstance(value, set):
return wrap(list(value))
@ -548,7 +572,7 @@ def unwraplist(v):
"""
LISTS WITH ZERO AND ONE element MAP TO None AND element RESPECTIVELY
"""
if isinstance(v, list):
if is_list(v):
if len(v) == 0:
return None
elif len(v) == 1:
@ -564,11 +588,11 @@ def tuplewrap(value):
INTENDED TO TURN lists INTO tuples FOR USE AS KEYS
"""
if isinstance(value, (list, set, tuple) + generator_types):
return tuple(tuplewrap(v) if isinstance(v, (list, tuple)) else v for v in value)
return tuple(tuplewrap(v) if is_sequence(v) else v for v in value)
return unwrap(value),
from mo_dots.datas import Data, SLOT, data_types, is_data
from mo_dots.nones import Null, NullType
from mo_dots.datas import Data, SLOT
from mo_dots.lists import FlatList
from mo_dots.lists import FlatList, is_list, is_sequence, is_container, is_many
from mo_dots.objects import DataObject

124
vendor/mo_dots/datas.py поставляемый
Просмотреть файл

@ -7,18 +7,16 @@
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from collections import MutableMapping, Mapping
from copy import deepcopy
from collections import MutableMapping
from copy import copy, deepcopy
from decimal import Decimal
from mo_future import text_type, PY2, iteritems, none_type, generator_types, long
from mo_future import PY2, generator_types, is_binary, iteritems, long, none_type, text_type
from mo_dots import _getdefault, hash_value, literal_field, coalesce, listwrap, get_logger
from mo_dots.lists import FlatList
from mo_dots import _getdefault, coalesce, get_logger, hash_value, listwrap, literal_field
from mo_dots.utils import CLASS
_get = object.__getattribute__
_set = object.__setattr__
@ -46,10 +44,11 @@ class Data(MutableMapping):
else:
if args:
args0 = args[0]
if isinstance(args0, Data):
_set(self, SLOT, _get(args0, SLOT))
elif isinstance(args0, dict):
class_ = _get(args0, CLASS)
if class_ is dict:
_set(self, SLOT, args0)
elif class_ is Data:
_set(self, SLOT, _get(args0, SLOT))
else:
_set(self, SLOT, dict(args0))
elif kwargs:
@ -59,21 +58,21 @@ class Data(MutableMapping):
def __bool__(self):
d = self._internal_dict
if isinstance(d, dict):
if _get(d, CLASS) is dict:
return bool(d)
else:
return d != None
def __nonzero__(self):
d = self._internal_dict
if isinstance(d, dict):
if _get(d, CLASS) is dict:
return True if d else False
else:
return d != None
def __contains__(self, item):
value = Data.__getitem__(self, item)
if isinstance(value, Mapping) or value:
if _get(value, CLASS) in data_types or value:
return True
return False
@ -86,7 +85,7 @@ class Data(MutableMapping):
return Null
if key == ".":
output = self._internal_dict
if isinstance(output, Mapping):
if _get(output, CLASS) in data_types:
return self
else:
return output
@ -97,9 +96,9 @@ class Data(MutableMapping):
if key.find(".") >= 0:
seq = _split_field(key)
for n in seq:
if isinstance(d, NullType):
if _get(d, CLASS) is NullType:
d = NullType(d, n) # OH DEAR, Null TREATS n AS PATH, NOT LITERAL
elif isinstance(d, list):
elif is_list(d):
d = [_getdefault(dd, n) for dd in d]
else:
d = _getdefault(d, n) # EVERYTHING ELSE TREATS n AS LITERAL
@ -151,9 +150,9 @@ class Data(MutableMapping):
raise e
def __getattr__(self, key):
d = self._internal_dict
d = _get(self, SLOT)
v = d.get(key)
t = v.__class__
t = _get(v, CLASS)
# OPTIMIZED wrap()
if t is dict:
@ -198,13 +197,13 @@ class Data(MutableMapping):
return True
d = self._internal_dict
if not isinstance(d, dict):
if _get(d, CLASS) is not dict:
return d == other
if not d and other == None:
return False
if not isinstance(other, Mapping):
if _get(other, CLASS) not in data_types:
return False
e = unwrap(other)
for k, v in d.items():
@ -224,7 +223,7 @@ class Data(MutableMapping):
def items(self):
d = self._internal_dict
return [(k, wrap(v)) for k, v in d.items() if v != None or isinstance(v, Mapping)]
return [(k, wrap(v)) for k, v in d.items() if v != None or _get(v, CLASS) in data_types]
def leaves(self, prefix=None):
"""
@ -253,11 +252,18 @@ class Data(MutableMapping):
return dict.__len__(d)
def copy(self):
return Data(**self)
d = self._internal_dict
if _get(d, CLASS) is dict:
return Data(**self)
else:
return copy(d)
def __copy__(self):
d = self._internal_dict
return Data(**d)
if _get(d, CLASS) is dict:
return Data(**self)
else:
return copy(d)
def __deepcopy__(self, memo):
d = self._internal_dict
@ -311,7 +317,7 @@ def leaves(value, prefix=None):
output = []
for k, v in value.items():
try:
if isinstance(v, Mapping):
if _get(v, CLASS) in data_types:
output.extend(leaves(v, prefix=prefix + literal_field(k) + "."))
else:
output.append((prefix + literal_field(k), unwrap(v)))
@ -342,7 +348,7 @@ class _DictUsingSelf(dict):
def __getitem__(self, key):
if key == None:
return Null
if isinstance(key, str):
if is_binary(key):
key = key.decode("utf8")
d=self
@ -385,7 +391,7 @@ class _DictUsingSelf(dict):
raise e
def __getattr__(self, key):
if isinstance(key, str):
if is_binary(key):
ukey = key.decode("utf8")
else:
ukey = key
@ -397,7 +403,7 @@ class _DictUsingSelf(dict):
return wrap(o)
def __setattr__(self, key, value):
if isinstance(key, str):
if is_binary(key):
ukey = key.decode("utf8")
else:
ukey = key
@ -421,7 +427,7 @@ class _DictUsingSelf(dict):
if not d and other == None:
return True
if not isinstance(other, Mapping):
if not _get(other, CLASS) in data_types:
return False
e = unwrap(other)
for k, v in dict.items(d):
@ -439,7 +445,7 @@ class _DictUsingSelf(dict):
return wrap(dict.get(self, key, default))
def items(self):
return [(k, wrap(v)) for k, v in dict.items(self) if v != None or isinstance(v, Mapping)]
return [(k, wrap(v)) for k, v in dict.items(self) if v != None or _get(v, CLASS) in data_types]
def leaves(self, prefix=None):
"""
@ -448,7 +454,7 @@ class _DictUsingSelf(dict):
prefix = coalesce(prefix, "")
output = []
for k, v in self.items():
if isinstance(v, Mapping):
if _get(v, CLASS) in data_types:
output.extend(wrap(v).leaves(prefix=prefix + literal_field(k) + "."))
else:
output.append((prefix + literal_field(k), v))
@ -487,7 +493,7 @@ class _DictUsingSelf(dict):
return wrap(dict.__deepcopy__(self, memo))
def __delitem__(self, key):
if isinstance(key, str):
if is_binary(key):
key = key.decode("utf8")
if key.find(".") == -1:
@ -529,11 +535,11 @@ def _str(value, depth):
FOR DEBUGGING POSSIBLY RECURSIVE STRUCTURES
"""
output = []
if depth >0 and isinstance(value, Mapping):
if depth >0 and _get(value, CLASS) in data_types:
for k, v in value.items():
output.append(str(k) + "=" + _str(v, depth - 1))
return "{" + ",\n".join(output) + "}"
elif depth >0 and isinstance(value, list):
elif depth >0 and is_list(value):
for v in value:
output.append(_str(v, depth-1))
return "[" + ",\n".join(output) + "]"
@ -542,7 +548,7 @@ def _str(value, depth):
def _iadd(self, other):
if not isinstance(other, Mapping):
if not _get(other, CLASS) in data_types:
get_logger().error("Expecting a Mapping")
d = unwrap(self)
for ok, ov in other.items():
@ -550,39 +556,61 @@ def _iadd(self, other):
if sv == None:
d[ok] = deepcopy(ov)
elif isinstance(ov, (Decimal, float, long, int)):
if isinstance(sv, Mapping):
if _get(sv, CLASS) in data_types:
get_logger().error(
"can not add {{stype}} with {{otype}",
stype=sv.__class__.__name__,
otype=ov.__class__.__name__
stype=_get(sv, CLASS).__name__,
otype=_get(ov, CLASS).__name__
)
elif isinstance(sv, list):
elif is_list(sv):
d[ok].append(ov)
else:
d[ok] = sv + ov
elif isinstance(ov, list):
elif is_list(ov):
d[ok] = listwrap(sv) + ov
elif isinstance(ov, Mapping):
if isinstance(sv, Mapping):
elif _get(ov, CLASS) in data_types:
if _get(sv, CLASS) in data_types:
_iadd(sv, ov)
elif isinstance(sv, list):
elif is_list(sv):
d[ok].append(ov)
else:
get_logger().error(
"can not add {{stype}} with {{otype}",
stype=sv.__class__.__name__,
otype=ov.__class__.__name__
stype=_get(sv, CLASS).__name__,
otype=_get(ov, CLASS).__name__
)
else:
if isinstance(sv, Mapping):
if _get(sv, CLASS) in data_types:
get_logger().error(
"can not add {{stype}} with {{otype}",
stype=sv.__class__.__name__,
otype=ov.__class__.__name__
stype=_get(sv, CLASS).__name__,
otype=_get(ov, CLASS).__name__
)
else:
d[ok].append(ov)
return self
data_types = (Data, dict) # TYPES TO HOLD DATA
def register_data(type_):
"""
:param type_: ADD OTHER TYPE THAT HOLDS DATA
:return:
"""
global data_types
data_types = tuple(set(data_types + (type_,)))
def is_data(d):
"""
:param d:
:return: True IF d IS A TYPE THAT HOLDS DATA
"""
return d.__class__ in data_types
from mo_dots.nones import Null, NullType
from mo_dots.lists import is_list, FlatList
from mo_dots import unwrap, wrap

57
vendor/mo_dots/lists.py поставляемый
Просмотреть файл

@ -7,20 +7,21 @@
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from copy import deepcopy
from mo_dots import wrap, unwrap, coalesce
from mo_future import generator_types, text_type
from mo_dots import CLASS, coalesce, unwrap, wrap
from mo_dots.nones import Null
LIST = text_type("list")
_get = object.__getattribute__
_get_list = lambda self: _get(self, "list")
_get_list = lambda self: _get(self, LIST)
_set = object.__setattr__
_emit_slice_warning = True
_datawrap = None
Log = None
@ -29,6 +30,7 @@ def _late_import():
global _datawrap
global Log
from mo_dots.objects import datawrap as _datawrap
try:
from mo_logs import Log
@ -51,13 +53,13 @@ class FlatList(list):
# list.__init__(self)
if vals == None:
self.list = []
elif isinstance(vals, FlatList):
elif vals.__class__ is FlatList:
self.list = vals.list
else:
self.list = vals
def __getitem__(self, index):
if isinstance(index, slice):
if _get(index, CLASS) is slice:
# IMPLEMENT FLAT SLICES (for i not in range(0, len(self)): assert self[i]==None)
if index.step is not None:
if not Log:
@ -77,7 +79,7 @@ class FlatList(list):
j = max(min(j, length), 0)
return FlatList(_get_list(self)[i:j])
if index < 0 or len(_get_list(self)) <= index:
if not isinstance(index, int) or index < 0 or len(_get_list(self)) <= index:
return Null
return wrap(_get_list(self)[index])
@ -109,7 +111,6 @@ class FlatList(list):
"""
if not Log:
_late_import()
return FlatList(vals=[unwrap(coalesce(_datawrap(v), Null)[key]) for v in _get_list(self)])
def select(self, key):
@ -185,17 +186,18 @@ class FlatList(list):
return wrap(_get_list(self).pop(index))
def __eq__(self, other):
if isinstance(other, FlatList):
other = _get_list(other)
lst = _get_list(self)
if other == None and len(lst) == 0:
return True
if not isinstance(other, list):
other_class = _get(other, CLASS)
if other_class is FlatList:
other = _get_list(other)
try:
if len(lst) != len(other):
return False
return all([s == o for s, o in zip(lst, other)])
except Exception:
return False
if len(lst) != len(other):
return False
return all([s == o for s, o in zip(lst, other)])
def __add__(self, value):
if value == None:
@ -215,7 +217,7 @@ class FlatList(list):
return FlatList(vals=output)
def __iadd__(self, other):
if isinstance(other, list):
if is_list(other):
self.extend(other)
else:
self.append(other)
@ -282,3 +284,22 @@ class FlatList(list):
FlatList.EMPTY = Null
list_types = (list, FlatList)
container_types = (list, FlatList, set)
sequence_types = (list, FlatList, tuple)
many_types = tuple(set(list_types + container_types + sequence_types + generator_types))
def is_list(l):
return l.__class__ in list_types
def is_container(l):
return l.__class__ in container_types
def is_sequence(l):
return l.__class__ in sequence_types
def is_many(l):
return l.__class__ in many_types

37
vendor/mo_dots/nones.py поставляемый
Просмотреть файл

@ -7,16 +7,17 @@
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from mo_dots import _setdefault, wrap, split_field
from mo_future import text_type, binary_type
from mo_future import is_binary, text_type
from mo_dots import _setdefault, wrap
from mo_dots.utils import CLASS, OBJ
_get = object.__getattribute__
_set = object.__setattr__
_zero_list = []
_null_hash = hash(None)
class NullType(object):
@ -35,7 +36,7 @@ class NullType(object):
key - THE dict ITEM REFERENCE (DOT(.) IS NOT ESCAPED)
"""
d = _get(self, "__dict__")
d["_obj"] = obj
d[OBJ] = obj
d["__key__"] = key
def __bool__(self):
@ -45,7 +46,7 @@ class NullType(object):
return False
def __add__(self, other):
if isinstance(other, list):
if is_list(other):
return other
return Null
@ -58,7 +59,7 @@ class NullType(object):
def __iadd__(self, other):
try:
d = _get(self, "__dict__")
o = d["_obj"]
o = d[OBJ]
if o is None:
return self
key = d["__key__"]
@ -108,10 +109,10 @@ class NullType(object):
return Null
def __eq__(self, other):
return other == None or isinstance(other, NullType)
return other is None or _get(other, CLASS) is NullType or other == None
def __ne__(self, other):
return other is not None and not isinstance(other, NullType)
return other is not None and _get(other, CLASS) is not NullType and other != None
def __or__(self, other):
if other is True:
@ -153,7 +154,7 @@ class NullType(object):
def __getitem__(self, key):
if isinstance(key, slice):
return Null
elif isinstance(key, binary_type):
elif is_binary(key):
key = key.decode("utf8")
elif isinstance(key, int):
return NullType(self, key)
@ -168,11 +169,11 @@ class NullType(object):
key = text_type(key)
d = _get(self, "__dict__")
o = wrap(d["_obj"])
o = wrap(d[OBJ])
k = d["__key__"]
if o is None:
return Null
elif isinstance(o, NullType):
elif _get(o, CLASS) is NullType:
return NullType(self, key)
v = o.get(k)
if v == None:
@ -187,7 +188,7 @@ class NullType(object):
key = text_type(key)
d = _get(self, "__dict__")
o = wrap(d["_obj"])
o = wrap(d[OBJ])
k = d["__key__"]
seq = [k] + [key]
@ -195,7 +196,7 @@ class NullType(object):
def __setitem__(self, key, value):
d = _get(self, "__dict__")
o = d["_obj"]
o = d[OBJ]
if o is None:
return
k = d["__key__"]
@ -225,7 +226,7 @@ class NullType(object):
return "Null"
def __hash__(self):
return hash(None)
return _null_hash
Null = NullType() # INSTEAD OF None!!!
@ -240,9 +241,9 @@ def _assign_to_null(obj, path, value, force=True):
try:
if obj is Null:
return
if isinstance(obj, NullType):
if _get(obj, CLASS) is NullType:
d = _get(obj, "__dict__")
o = d["_obj"]
o = d[OBJ]
p = d["__key__"]
s = [p]+path
return _assign_to_null(o, s, value)

50
vendor/mo_dots/objects.py поставляемый
Просмотреть файл

@ -7,16 +7,17 @@
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from collections import Mapping
from datetime import date, datetime
from decimal import Decimal
from mo_dots import wrap, unwrap, Data, FlatList, NullType, get_attr, set_attr, SLOT
from mo_future import text_type, binary_type, get_function_defaults, get_function_arguments, none_type, generator_types
from mo_future import binary_type, generator_types, get_function_arguments, get_function_defaults, none_type, text_type
from mo_dots import Data, FlatList, NullType, SLOT, get_attr, set_attr, unwrap, wrap
from mo_dots.datas import register_data
from mo_dots.utils import CLASS, OBJ
_get = object.__getattribute__
_set = object.__setattr__
@ -29,31 +30,31 @@ class DataObject(Mapping):
"""
def __init__(self, obj):
_set(self, "_obj", obj)
_set(self, OBJ, obj)
def __getattr__(self, item):
obj = _get(self, "_obj")
obj = _get(self, OBJ)
output = get_attr(obj, item)
return datawrap(output)
def __setattr__(self, key, value):
obj = _get(self, "_obj")
obj = _get(self, OBJ)
set_attr(obj, key, value)
def __getitem__(self, item):
obj = _get(self, "_obj")
obj = _get(self, OBJ)
output = get_attr(obj, item)
return datawrap(output)
def keys(self):
obj = _get(self, "_obj")
obj = _get(self, OBJ)
try:
return obj.__dict__.keys()
except Exception as e:
raise e
def items(self):
obj = _get(self, "_obj")
obj = _get(self, OBJ)
try:
return obj.__dict__.items()
except Exception as e:
@ -64,7 +65,7 @@ class DataObject(Mapping):
]
def iteritems(self):
obj = _get(self, "_obj")
obj = _get(self, OBJ)
try:
return obj.__dict__.iteritems()
except Exception as e:
@ -82,43 +83,40 @@ class DataObject(Mapping):
return (k for k in self.keys())
def __unicode__(self):
obj = _get(self, "_obj")
obj = _get(self, OBJ)
return text_type(obj)
def __str__(self):
obj = _get(self, "_obj")
obj = _get(self, OBJ)
return str(obj)
def __len__(self):
obj = _get(self, "_obj")
obj = _get(self, OBJ)
return len(obj)
def __call__(self, *args, **kwargs):
obj = _get(self, "_obj")
obj = _get(self, OBJ)
return obj(*args, **kwargs)
register_data(DataObject)
def datawrap(v):
type_ = _get(v, "__class__")
type_ = _get(v, CLASS)
if type_ is dict:
m = Data()
_set(m, SLOT, v) # INJECT m.__dict__=v SO THERE IS NO COPY
return m
elif type_ is Data:
return v
elif type_ is DataObject:
return v
elif type_ is none_type:
return None # So we allow `is None`
elif type_ is list:
return FlatList(v)
elif type_ in (Data, DataObject, none_type, FlatList, text_type, binary_type, int, float, Decimal, datetime, date, NullType, none_type):
return v
elif type_ in generator_types:
return (wrap(vv) for vv in v)
elif isinstance(v, (text_type, binary_type, int, float, Decimal, datetime, date, Data, FlatList, NullType, none_type)):
elif isinstance(v, (text_type, binary_type, int, float, Decimal, datetime, date, FlatList, NullType, Mapping, none_type)):
return v
elif isinstance(v, Mapping):
return DataObject(v)
elif hasattr(v, "__data__"):
return v.__data__()
else:

9
vendor/mo_dots/utils.py поставляемый
Просмотреть файл

@ -7,14 +7,15 @@
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
import importlib
import sys
from mo_future import PY2
from mo_future import PY2, text_type
OBJ = text_type("_obj")
CLASS = text_type("__class__")
_Log = None

37
vendor/mo_files/__init__.py поставляемый
Просмотреть файл

@ -8,17 +8,18 @@
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
import base64
from datetime import datetime
import io
from mimetypes import MimeTypes
import os
import re
import shutil
from datetime import datetime
from mimetypes import MimeTypes
from tempfile import mkdtemp, NamedTemporaryFile
from tempfile import NamedTemporaryFile, mkdtemp
from mo_dots import get_module, coalesce, Null
from mo_future import text_type, binary_type, PY3
from mo_logs import Log, Except
from mo_dots import Null, coalesce, get_module, is_list
from mo_files.url import URL
from mo_future import PY3, binary_type, text_type, is_text
from mo_logs import Except, Log
from mo_logs.exceptions import extract_stack
from mo_threads import Thread, Till
@ -42,12 +43,12 @@ class File(object):
"""
YOU MAY SET filename TO {"path":p, "key":k} FOR CRYPTO FILES
"""
self._mime_type = mime_type
if filename == None:
Log.error(u"File must be given a filename")
elif isinstance(filename, File):
if isinstance(filename, File):
return
elif isinstance(filename, (binary_type, text_type)):
self._mime_type = mime_type
if isinstance(filename, (binary_type, text_type)):
try:
self.key = None
if filename==".":
@ -272,10 +273,10 @@ class File(object):
if not self.parent.exists:
self.parent.create()
with open(self._filename, "wb") as f:
if isinstance(data, list) and self.key:
if is_list(data) and self.key:
Log.error(u"list of data and keys are not supported, encrypt before sending to file")
if isinstance(data, list):
if is_list(data):
pass
elif isinstance(data, (binary_type, text_type)):
data=[data]
@ -283,7 +284,7 @@ class File(object):
pass
for d in data:
if not isinstance(d, text_type):
if not is_text(d):
Log.error(u"Expecting unicode data only")
if self.key:
from mo_math.crypto import encrypt
@ -317,7 +318,7 @@ class File(object):
if not self.parent.exists:
self.parent.create()
with open(self._filename, "ab") as output_file:
if not isinstance(content, text_type):
if not is_text(content):
Log.error(u"expecting to write unicode only")
output_file.write(content.encode(encoding))
output_file.write(b"\n")
@ -440,7 +441,7 @@ class TempDirectory(File):
WILL BE DELETED WHEN EXITED
"""
def __new__(cls):
return File.__new__(cls, None)
return object.__new__(cls)
def __init__(self):
File.__init__(self, mkdtemp())
@ -460,7 +461,9 @@ class TempFile(File):
def __new__(cls, *args, **kwargs):
return object.__new__(cls)
def __init__(self):
def __init__(self, filename=None):
if isinstance(filename, File):
return
self.temp = NamedTemporaryFile(delete=False)
self.temp.close()
File.__init__(self, self.temp.name)

20
vendor/mo_files/url.py поставляемый
Просмотреть файл

@ -7,11 +7,9 @@
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from collections import Mapping
from mo_dots import wrap, Data, coalesce, Null
from mo_future import urlparse, text_type, PY2, unichr
from mo_json import value2json, json2value
from mo_dots import Data, Null, coalesce, is_data, is_list, wrap
from mo_future import PY2, is_text, text_type, unichr, urlparse, is_binary
from mo_json import json2value, value2json
from mo_logs import Log
@ -62,7 +60,7 @@ class URL(object):
return False
def __truediv__(self, other):
if not isinstance(other, text_type):
if not is_text(other):
Log.error(u"Expecting text path")
output = self.__copy__()
output.path = output.path.rstrip('/') + "/" + other.lstrip('/')
@ -186,7 +184,7 @@ def url_param2value(param):
u = query.get(k)
if u is None:
query[k] = v
elif isinstance(u, list):
elif is_list(u):
u += [v]
else:
query[k] = [u, v]
@ -202,15 +200,15 @@ def value2url_param(value):
if value == None:
Log.error("Can not encode None into a URL")
if isinstance(value, Mapping):
if is_data(value):
value_ = wrap(value)
output = "&".join([
value2url_param(k) + "=" + (value2url_param(v) if isinstance(v, text_type) else value2url_param(value2json(v)))
value2url_param(k) + "=" + (value2url_param(v) if is_text(v) else value2url_param(value2json(v)))
for k, v in value_.leaves()
])
elif isinstance(value, text_type):
elif is_text(value):
output = "".join(_map2url[c] for c in value.encode('utf8'))
elif isinstance(value, str):
elif is_binary(value):
output = "".join(_map2url[c] for c in value)
elif hasattr(value, "__iter__"):
output = ",".join(value2url_param(v) for v in value)

28
vendor/mo_future/__init__.py поставляемый
Просмотреть файл

@ -7,14 +7,11 @@
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
import json
import sys
PY3 = sys.version_info[0] == 3
PY2 = sys.version_info[0] == 2
@ -35,6 +32,8 @@ if PY3:
from functools import cmp_to_key
from configparser import ConfigParser
from itertools import zip_longest
from functools import reduce
import builtins as __builtin__
izip = zip
zip_longest = itertools.zip_longest
@ -93,6 +92,15 @@ if PY3:
def sort_using_key(data, key):
return sorted(data, key=key)
def first(values):
return iter(values).__next__()
def is_text(t):
return t.__class__ is str
def is_binary(b):
return b.__class__ is bytes
utf8_json_encoder = json.JSONEncoder(
skipkeys=False,
ensure_ascii=False, # DIFF FROM DEFAULTS
@ -115,6 +123,7 @@ else:
from __builtin__ import zip as transpose
from itertools import izip
reduce = __builtin__.reduce
text_type = __builtin__.unicode
string_types = (str, unicode)
binary_type = str
@ -162,6 +171,15 @@ else:
# lambda a, b: (1 if (a[0]>b[0]) else (-1 if (a[0]<b[0]) else 0))
# )
def first(values):
return iter(values).next()
def is_text(t):
return t.__class__ is unicode
def is_binary(b):
return b.__class__ is str
utf8_json_encoder = json.JSONEncoder(
skipkeys=False,
ensure_ascii=False, # DIFF FROM DEFAULTS
@ -237,4 +255,4 @@ else:
d[key] = value
return d
_keep_imports = (ConfigParser, zip_longest, reduce, transpose, izip, HTMLParser, urlparse, StringIO, BytesIO, allocate_lock, get_ident, start_new_thread, interrupt_main)

85
vendor/mo_json/__init__.py поставляемый
Просмотреть файл

@ -7,27 +7,39 @@
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from mo_future import is_text, is_binary
from datetime import date, datetime, timedelta
from decimal import Decimal
import math
import re
from collections import Mapping
from datetime import date, timedelta, datetime
from decimal import Decimal
from mo_dots import FlatList, NullType, Data, wrap_leaves, wrap, Null, SLOT
from mo_dots import Data, FlatList, Null, NullType, SLOT, is_data, wrap, wrap_leaves
from mo_dots.objects import DataObject
from mo_future import text_type, none_type, long, binary_type, PY2
from mo_logs import Except, strings, Log
from mo_future import PY2, is_binary, is_text, items, long, none_type, text_type
from mo_logs import Except, Log, strings
from mo_logs.strings import expand_template
from mo_times import Date, Duration
FIND_LOOPS = False
SNAP_TO_BASE_10 = True # Identify floats near a round base10 value (has 000 or 999) and shorten
SNAP_TO_BASE_10 = False # Identify floats near a round base10 value (has 000 or 999) and shorten
CAN_NOT_DECODE_JSON = "Can not decode JSON"
IS_NULL = '0'
BOOLEAN = 'boolean'
INTEGER = 'integer'
NUMBER = 'number'
STRING = 'string'
OBJECT = 'object'
NESTED = "nested"
EXISTS = "exists"
ALL_TYPES = {IS_NULL: IS_NULL, BOOLEAN: BOOLEAN, INTEGER: INTEGER, NUMBER: NUMBER, STRING: STRING, OBJECT: OBJECT, NESTED: NESTED, EXISTS: EXISTS}
JSON_TYPES = [BOOLEAN, INTEGER, NUMBER, STRING, OBJECT]
PRIMITIVE = [EXISTS, BOOLEAN, INTEGER, NUMBER, STRING]
STRUCT = [EXISTS, OBJECT, NESTED]
_get = object.__getattribute__
@ -81,6 +93,7 @@ def float2json(value):
def _snap_to_base_10(mantissa):
# TODO: https://lists.nongnu.org/archive/html/gcl-devel/2012-10/pdfkieTlklRzN.pdf
digits = mantissa.replace('.', '')
if SNAP_TO_BASE_10:
f9 = strings.find(digits, '999')
@ -159,7 +172,7 @@ def _scrub(value, is_done, stack, scrub_text, scrub_number):
return scrub_number(value)
elif type_ is Data:
return _scrub(_get(value, SLOT), is_done, stack, scrub_text, scrub_number)
elif isinstance(value, Mapping):
elif is_data(value):
_id = id(value)
if _id in is_done:
Log.warning("possible loop in structure detected")
@ -168,16 +181,16 @@ def _scrub(value, is_done, stack, scrub_text, scrub_number):
output = {}
for k, v in value.items():
if isinstance(k, text_type):
if is_text(k):
pass
elif isinstance(k, binary_type):
elif is_binary(k):
k = k.decode('utf8')
# elif hasattr(k, "__unicode__"):
# k = text_type(k)
else:
Log.error("keys must be strings")
v = _scrub(v, is_done, stack, scrub_text, scrub_number)
if v != None or isinstance(v, Mapping):
if v != None or is_data(v):
output[k] = v
is_done.discard(_id)
@ -187,7 +200,7 @@ def _scrub(value, is_done, stack, scrub_text, scrub_number):
for v in value:
v = _scrub(v, is_done, stack, scrub_text, scrub_number)
output.append(v)
return output
return output # if output else None
elif type_ is type:
return value.__name__
elif type_.__name__ == "bool_": # DEAR ME! Numpy has it's own booleans (value==False could be used, but 0==False in Python. DOH!)
@ -276,7 +289,7 @@ def json2value(json_string, params=Null, flexible=False, leaves=False):
:param leaves: ASSUME JSON KEYS ARE DOT-DELIMITED
:return: Python value
"""
if not isinstance(json_string, text_type):
if not is_text(json_string):
Log.error("only unicode json accepted")
try:
@ -367,5 +380,45 @@ def datetime2unix(d):
Log.error("Can not convert {{value}}", value= d, cause=e)
python_type_to_json_type = {
int: NUMBER,
text_type: STRING,
float: NUMBER,
bool: BOOLEAN,
NullType: OBJECT,
none_type: OBJECT,
Data: OBJECT,
dict: OBJECT,
object: OBJECT,
list: NESTED,
set: NESTED,
# tuple: NESTED, # DO NOT INCLUDE, WILL HIDE LOGIC ERRORS
FlatList: NESTED,
Date: NUMBER
}
if PY2:
python_type_to_json_type[str] = STRING
python_type_to_json_type[long] = NUMBER
for k, v in items(python_type_to_json_type):
python_type_to_json_type[k.__name__] = v
_merge_order = {
BOOLEAN: 1,
INTEGER: 2,
NUMBER: 3,
STRING: 4,
OBJECT: 5,
NESTED: 6
}
def _merge_json_type(A, B):
a = _merge_order[A]
b = _merge_order[B]
return A if a >= b else B
from mo_json.decoder import json_decoder
from mo_json.encoder import json_encoder, pypy_json_encode

5
vendor/mo_json/decoder.py поставляемый
Просмотреть файл

@ -7,10 +7,9 @@
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from mo_future import is_text, is_binary
import json
json_decoder = json.loads

49
vendor/mo_json/encoder.py поставляемый
Просмотреть файл

@ -7,24 +7,22 @@
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
import json
import math
import time
from collections import Mapping
from datetime import datetime, date, timedelta
from mo_future import is_text, is_binary
from datetime import date, datetime, timedelta
from decimal import Decimal
import json
from json.encoder import encode_basestring
import math
from math import floor
import time
from mo_dots import Data, FlatList, NullType, Null, SLOT
from mo_future import text_type, binary_type, long, utf8_json_encoder, sort_using_key, xrange, PYPY
from mo_json import ESCAPE_DCT, scrub, float2json
from mo_dots import Data, FlatList, Null, NullType, SLOT, is_data, is_list
from mo_future import PYPY, binary_type, is_binary, is_text, long, sort_using_key, text_type, utf8_json_encoder, xrange
from mo_json import ESCAPE_DCT, float2json, scrub
from mo_logs import Except
from mo_logs.strings import utf82unicode, quote
from mo_logs.strings import quote, utf82unicode
from mo_times import Timer
from mo_times.dates import Date
from mo_times.durations import Duration
@ -111,8 +109,11 @@ class cPythonJSONEncoder(object):
try:
with Timer("scrub", too_long=0.1):
scrubbed = scrub(value)
with Timer("encode", too_long=0.1):
return text_type(self.encoder(scrubbed))
param = {"size": 0}
with Timer("encode {{size}} characters", param=param, too_long=0.1):
output = text_type(self.encoder(scrubbed))
param["size"] = len(output)
return output
except Exception as e:
from mo_logs.exceptions import Except
from mo_logs import Log
@ -198,7 +199,7 @@ def _value2json(value, _buffer):
append(_buffer, float2json(value.seconds))
elif type is NullType:
append(_buffer, u"null")
elif isinstance(value, Mapping):
elif is_data(value):
if not value:
append(_buffer, u"{}")
else:
@ -250,7 +251,7 @@ def _dict2json(value, _buffer):
for k, v in value.items():
append(_buffer, prefix)
prefix = COMMA_QUOTE
if isinstance(k, binary_type):
if is_binary(k):
k = utf82unicode(k)
for c in k:
append(_buffer, ESCAPE_DCT.get(c, c))
@ -275,21 +276,21 @@ def pretty_json(value):
return "false"
elif value is True:
return "true"
elif isinstance(value, Mapping):
elif is_data(value):
try:
items = sort_using_key(value.items(), lambda r: r[0])
values = [encode_basestring(k) + PRETTY_COLON + indent(pretty_json(v)).strip() for k, v in items if v != None]
values = [encode_basestring(k) + PRETTY_COLON + pretty_json(v) for k, v in items if v != None]
if not values:
return "{}"
elif len(values) == 1:
return "{" + values[0] + "}"
else:
return "{\n" + INDENT + (",\n" + INDENT).join(values) + "\n}"
return "{\n" + ",\n".join(indent(v) for v in values) + "\n}"
except Exception as e:
from mo_logs import Log
from mo_math import OR
if OR(not isinstance(k, text_type) for k in value.keys()):
if OR(not is_text(k) for k in value.keys()):
Log.error(
"JSON must have string keys: {{keys}}:",
keys=[k for k in value.keys()],
@ -303,8 +304,8 @@ def pretty_json(value):
)
elif value in (None, Null):
return "null"
elif isinstance(value, (text_type, binary_type)):
if isinstance(value, binary_type):
elif value.__class__ in (binary_type, text_type):
if is_binary(value):
value = utf82unicode(value)
try:
return quote(value)
@ -330,9 +331,9 @@ def pretty_json(value):
Log.note("return value of length {{length}}", length=len(output))
return output
except BaseException as f:
Log.warning("can not even explicit convert {{type}}", type=f.__class__.__name__, cause=f)
Log.warning("can not convert {{type}} to json", type=f.__class__.__name__, cause=f)
return "null"
elif isinstance(value, list):
elif is_list(value):
if not value:
return "[]"

13
vendor/mo_json/stream.py поставляемый
Просмотреть файл

@ -7,15 +7,13 @@
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from mo_future import is_text, is_binary
import json
from collections import Mapping
from types import GeneratorType
from mo_dots import split_field, startswith_field, relative_field, Data, join_field, Null, wrap
from mo_dots import Data, Null, is_data, join_field, relative_field, split_field, startswith_field, wrap
from mo_logs import Log
DEBUG = False
@ -31,7 +29,6 @@ NO_VARS = set()
json_decoder = json.JSONDecoder().decode
def parse(json, query_path, expected_vars=NO_VARS):
"""
INTENDED TO TREAT JSON AS A STREAM; USING MINIMAL MEMORY WHILE IT ITERATES
@ -154,7 +151,7 @@ def parse(json, query_path, expected_vars=NO_VARS):
pass
elif e == ".":
destination[i] = value
elif isinstance(value, Mapping):
elif is_data(value):
destination[i] = value[e]
else:
destination[i] = Null
@ -312,7 +309,7 @@ def parse(json, query_path, expected_vars=NO_VARS):
c = json[index]
return c, index + 1
if isinstance(query_path, Mapping) and query_path.get("items"):
if is_data(query_path) and query_path.get("items"):
path_list = split_field(query_path.get("items")) + ["$items"] # INSERT A MARKER SO THAT OBJECT IS STREAM DECODED
else:
path_list = split_field(query_path)

131
vendor/mo_json/typed_encoder.py поставляемый
Просмотреть файл

@ -7,20 +7,19 @@
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
import time
from collections import Mapping
from mo_future import is_text, is_binary
from datetime import date, datetime, timedelta
from decimal import Decimal
from json.encoder import encode_basestring
import time
from mo_dots import Data, FlatList, NullType, join_field, split_field, _get, SLOT, DataObject
from mo_future import text_type, binary_type, sort_using_key, long, PY2, none_type, generator_types
from mo_json import ESCAPE_DCT, float2json
from mo_json.encoder import UnicodeBuilder, COLON, COMMA, problem_serializing, json_encoder
from mo_dots import CLASS, Data, DataObject, FlatList, NullType, SLOT, _get, is_data, join_field, split_field
from mo_dots.objects import OBJ
from mo_future import binary_type, generator_types, is_binary, is_text, long, sort_using_key, text_type
from mo_json import BOOLEAN, ESCAPE_DCT, EXISTS, INTEGER, NESTED, NUMBER, STRING, float2json, python_type_to_json_type
from mo_json.encoder import COLON, COMMA, UnicodeBuilder, json_encoder, problem_serializing
from mo_logs import Log
from mo_logs.strings import quote, utf82unicode
from mo_times import Date, Duration
@ -45,11 +44,19 @@ def untype_path(encoded):
def unnest_path(encoded):
if encoded.startswith(".."):
encoded = encoded.lstrip(".")
if not encoded:
encoded = "."
remainder = encoded.lstrip(".")
back = len(encoded) - len(remainder)
return ("." * back) + unnest_path(remainder)
return join_field(decode_property(c) for c in split_field(encoded) if c != NESTED_TYPE)
path = split_field(encoded)
if not path:
return "."
if path[-1] == NESTED_TYPE:
path = path[:-1]
if not path:
return "."
return join_field([decode_property(c) for c in path[:-1] if not c.startswith(TYPE_PREFIX)] + [decode_property(path[-1])])
def untyped(value):
@ -57,7 +64,7 @@ def untyped(value):
def _untype_list(value):
if any(isinstance(v, Mapping) for v in value):
if any(is_data(v) for v in value):
# MAY BE MORE TYPED OBJECTS IN THIS LIST
output = [_untype_value(v) for v in value]
else:
@ -91,7 +98,7 @@ def _untype_dict(value):
def _untype_value(value):
_type = _get(value, "__class__")
_type = _get(value, CLASS)
if _type is Data:
return _untype_dict(_get(value, SLOT))
elif _type is dict:
@ -103,7 +110,7 @@ def _untype_value(value):
elif _type is NullType:
return None
elif _type is DataObject:
return _untype_value(_get(value, "_obj"))
return _untype_value(_get(value, OBJ))
elif _type in generator_types:
return _untype_list(value)
else:
@ -144,7 +151,7 @@ def typed_encode(value, sub_schema, path, net_new_properties, buffer):
else:
from mo_logs import Log
Log.error("Can not store {{value}} in {{column|quote}}", value=value, column=sub_schema.names['.'])
Log.error("Can not store {{value}} in {{column|quote}}", value=value, column=sub_schema.name)
sub_schema = {json_type_to_inserter_type[value_json_type]: sub_schema}
@ -186,10 +193,15 @@ def typed_encode(value, sub_schema, path, net_new_properties, buffer):
append(buffer, text_type(len(value)))
append(buffer, '}')
else:
# SINGLETON LISTS OF null SHOULD NOT EXIST
from mo_logs import Log
Log.error("should not happen")
# SINGLETON LIST
append(buffer, '{')
append(buffer, QUOTED_NESTED_TYPE)
append(buffer, '[{')
append(buffer, QUOTED_EXISTS_TYPE)
append(buffer, '1}]')
append(buffer, COMMA)
append(buffer, QUOTED_EXISTS_TYPE)
append(buffer, '1}')
else:
if EXISTS_TYPE not in sub_schema:
sub_schema[EXISTS_TYPE] = {}
@ -200,7 +212,7 @@ def typed_encode(value, sub_schema, path, net_new_properties, buffer):
else:
append(buffer, '{')
append(buffer, QUOTED_EXISTS_TYPE)
append(buffer, '0}')
append(buffer, '1}')
elif _type is binary_type:
if STRING_TYPE not in sub_schema:
sub_schema[STRING_TYPE] = True
@ -246,16 +258,27 @@ def typed_encode(value, sub_schema, path, net_new_properties, buffer):
elif _type in (set, list, tuple, FlatList):
if len(value) == 0:
append(buffer, '{')
append(buffer, QUOTED_NESTED_TYPE)
append(buffer, '[]}')
elif any(isinstance(v, (Mapping, set, list, tuple, FlatList)) for v in value):
if NESTED_TYPE not in sub_schema:
sub_schema[NESTED_TYPE] = {}
net_new_properties.append(path + [NESTED_TYPE])
append(buffer, '{')
append(buffer, QUOTED_NESTED_TYPE)
_list2json(value, sub_schema[NESTED_TYPE], path + [NESTED_TYPE], net_new_properties, buffer)
append(buffer, '}')
append(buffer, QUOTED_EXISTS_TYPE)
append(buffer, '0}')
elif any(v.__class__ in (Data, dict, set, list, tuple, FlatList) for v in value):
# THIS IS NOT DONE BECAUSE
if len(value) == 1:
if NESTED_TYPE in sub_schema:
append(buffer, '{')
append(buffer, QUOTED_NESTED_TYPE)
_list2json(value, sub_schema[NESTED_TYPE], path + [NESTED_TYPE], net_new_properties, buffer)
append(buffer, '}')
else:
# NO NEED TO NEST, SO DO NOT DO IT
typed_encode(value[0], sub_schema, path, net_new_properties, buffer)
else:
if NESTED_TYPE not in sub_schema:
sub_schema[NESTED_TYPE] = {}
net_new_properties.append(path + [NESTED_TYPE])
append(buffer, '{')
append(buffer, QUOTED_NESTED_TYPE)
_list2json(value, sub_schema[NESTED_TYPE], path + [NESTED_TYPE], net_new_properties, buffer)
append(buffer, '}')
else:
# ALLOW PRIMITIVE MULTIVALUES
value = [v for v in value if v != None]
@ -390,9 +413,9 @@ def _dict2json(value, sub_schema, path, net_new_properties, buffer):
continue
append(buffer, prefix)
prefix = COMMA
if isinstance(k, binary_type):
if is_binary(k):
k = utf82unicode(k)
if not isinstance(k, text_type):
if not is_text(k):
Log.error("Expecting property name to be a string")
if k not in sub_schema:
sub_schema[k] = {}
@ -410,40 +433,6 @@ def _dict2json(value, sub_schema, path, net_new_properties, buffer):
append(buffer, '1}')
IS_NULL = '0'
BOOLEAN = 'boolean'
INTEGER = 'integer'
NUMBER = 'number'
STRING = 'string'
OBJECT = 'object'
NESTED = "nested"
EXISTS = "exists"
JSON_TYPES = [BOOLEAN, INTEGER, NUMBER, STRING, OBJECT]
PRIMITIVE = [EXISTS, BOOLEAN, INTEGER, NUMBER, STRING]
STRUCT = [EXISTS, OBJECT, NESTED]
python_type_to_json_type = {
int: NUMBER,
text_type: STRING,
float: NUMBER,
None: OBJECT,
bool: BOOLEAN,
NullType: OBJECT,
none_type: OBJECT,
Data: OBJECT,
dict: OBJECT,
object: OBJECT,
Mapping: OBJECT,
list: NESTED,
FlatList: NESTED,
Date: NUMBER
}
if PY2:
python_type_to_json_type[str] = STRING
python_type_to_json_type[long] = NUMBER
TYPE_PREFIX = "~" # u'\u0442\u0443\u0440\u0435-' # "туре"
@ -461,6 +450,12 @@ QUOTED_STRING_TYPE = quote(STRING_TYPE) + COLON
QUOTED_NESTED_TYPE = quote(NESTED_TYPE) + COLON
QUOTED_EXISTS_TYPE = quote(EXISTS_TYPE) + COLON
inserter_type_to_json_type = {
BOOLEAN_TYPE: BOOLEAN,
NUMBER_TYPE: NUMBER,
STRING_TYPE: STRING
}
json_type_to_inserter_type = {
BOOLEAN: BOOLEAN_TYPE,
INTEGER: NUMBER_TYPE,

20
vendor/mo_json_config/__init__.py поставляемый
Просмотреть файл

@ -8,21 +8,19 @@
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from mo_future import is_text, is_binary
import os
from collections import Mapping
import mo_dots
from mo_dots import set_default, wrap, unwrap
from mo_dots import is_data, is_list, set_default, unwrap, wrap
from mo_files import File
from mo_files.url import URL
from mo_future import text_type
from mo_json import json2value
from mo_json_config.convert import ini2value
from mo_logs import Log, Except
from mo_logs import Except, Log
DEBUG = False
@ -86,7 +84,7 @@ def _replace_ref(node, url):
if url.path.endswith("/"):
url.path = url.path[:-1]
if isinstance(node, Mapping):
if is_data(node):
ref = None
output = {}
for k, v in node.items():
@ -123,7 +121,7 @@ def _replace_ref(node, url):
if not output:
output = new_value
elif isinstance(output, text_type):
elif is_text(output):
Log.error("Can not handle set_default({{output}},{{new_value}})", output=output, new_value=new_value)
else:
output = unwrap(set_default(output, new_value))
@ -131,7 +129,7 @@ def _replace_ref(node, url):
DEBUG and Log.note("Return {{output}}", output=output)
return output
elif isinstance(node, list):
elif is_list(node):
output = [_replace_ref(n, url) for n in node]
# if all(p[0] is p[1] for p in zip(output, node)):
# return node
@ -141,7 +139,7 @@ def _replace_ref(node, url):
def _replace_locals(node, doc_path):
if isinstance(node, Mapping):
if is_data(node):
# RECURS, DEEP COPY
ref = None
output = {}
@ -179,7 +177,7 @@ def _replace_locals(node, doc_path):
else:
return unwrap(set_default(output, new_value))
elif isinstance(node, list):
elif is_list(node):
candidate = [_replace_locals(n, [n] + doc_path) for n in node]
# if all(p[0] is p[1] for p in zip(candidate, node)):
# return node

7
vendor/mo_json_config/convert.py поставляемый
Просмотреть файл

@ -8,12 +8,11 @@
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from mo_future import StringIO, ConfigParser
from mo_future import is_text, is_binary
from mo_dots import wrap
from mo_future import ConfigParser, StringIO
def ini2value(ini_content):

30
vendor/mo_kwargs/__init__.py поставляемый
Просмотреть файл

@ -7,14 +7,11 @@
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from collections import Mapping
from mo_dots import zip as dict_zip, get_logger, wrap
from mo_future import text_type, get_function_arguments, get_function_defaults, get_function_name
from mo_future import is_text, is_binary
from mo_dots import get_logger, is_data, wrap, zip as dict_zip
from mo_future import get_function_arguments, get_function_defaults, get_function_name, text_type
from mo_logs import Except
@ -74,25 +71,26 @@ def override(func):
elif func_name in ("__init__", "__new__"):
def w_constructor(*args, **kwargs):
if "kwargs" in kwargs:
packed = params_pack(params, kwargs, dict_zip(params, args), kwargs["kwargs"], defaults)
elif len(args) == 2 and len(kwargs) == 0 and isinstance(args[1], Mapping):
packed = params_pack(params, dict_zip(params[1:], args[1:]), kwargs, kwargs["kwargs"], defaults)
elif len(args) == 2 and len(kwargs) == 0 and is_data(args[1]):
# ASSUME SECOND UNNAMED PARAM IS kwargs
packed = params_pack(params, args[1], defaults)
else:
# DO NOT INCLUDE self IN kwargs
packed = params_pack(params, kwargs, dict_zip(params, args), defaults)
packed = params_pack(params, dict_zip(params[1:], args[1:]), kwargs, defaults)
try:
return func(**packed)
return func(args[0], **packed)
except TypeError as e:
packed['self'] = args[0] # DO NOT SAY IS MISSING
raise_error(e, packed)
return w_constructor
elif params[0] == "self":
def w_bound_method(*args, **kwargs):
if len(args) == 2 and len(kwargs) == 0 and isinstance(args[1], Mapping):
if len(args) == 2 and len(kwargs) == 0 and is_data(args[1]):
# ASSUME SECOND UNNAMED PARAM IS kwargs
packed = params_pack(params, args[1], defaults)
elif "kwargs" in kwargs and isinstance(kwargs["kwargs"], Mapping):
elif "kwargs" in kwargs and is_data(kwargs["kwargs"]):
# PUT args INTO kwargs
packed = params_pack(params, kwargs, dict_zip(params[1:], args[1:]), kwargs["kwargs"], defaults)
else:
@ -105,10 +103,10 @@ def override(func):
else:
def w_kwargs(*args, **kwargs):
if len(args) == 1 and len(kwargs) == 0 and isinstance(args[0], Mapping):
if len(args) == 1 and len(kwargs) == 0 and is_data(args[0]):
# ASSUME SINGLE PARAMETER IS kwargs
packed = params_pack(params, args[0], defaults)
elif "kwargs" in kwargs and isinstance(kwargs["kwargs"], Mapping):
elif "kwargs" in kwargs and is_data(kwargs["kwargs"]):
# PUT args INTO kwargs
packed = params_pack(params, kwargs, dict_zip(params, args), kwargs["kwargs"], defaults)
else:
@ -127,6 +125,8 @@ def params_pack(params, *args):
if a == None:
continue
for k, v in a.items():
if v == None:
continue
k = text_type(k)
if k in settings:
continue

224
vendor/mo_logs/__init__.py поставляемый
Просмотреть файл

@ -7,21 +7,19 @@
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from mo_future import is_text, is_binary
from datetime import datetime
import os
import platform
import sys
from collections import Mapping
from datetime import datetime
from mo_dots import coalesce, listwrap, wrap, unwrap, unwraplist, set_default, FlatList
from mo_future import text_type, PY3
from mo_dots import Data, FlatList, coalesce, is_data, is_list, listwrap, unwraplist, wrap
from mo_future import PY3, text_type
from mo_logs import constants
from mo_logs.exceptions import Except, suppress_exception
from mo_logs.strings import indent
from mo_logs.exceptions import Except, LogItem, suppress_exception
from mo_logs.strings import CR, indent
_Thread = None
if PY3:
@ -30,7 +28,6 @@ else:
STDOUT = sys.stdout
class Log(object):
"""
FOR STRUCTURED LOGGING AND EXCEPTION CHAINING
@ -78,7 +75,7 @@ class Log(object):
from mo_threads import profiles
profiles.enable_profilers(settings.cprofile.filename)
if settings.profile is True or (isinstance(settings.profile, Mapping) and settings.profile.enabled):
if settings.profile is True or (is_data(settings.profile) and settings.profile.enabled):
Log.error("REMOVED 2018-09-02, Activedata revision 3f30ff46f5971776f8ba18")
# from mo_logs import profiles
#
@ -175,38 +172,20 @@ class Log(object):
:param more_params: *any more parameters (which will overwrite default_params)
:return:
"""
if not isinstance(template, text_type):
timestamp = datetime.utcnow()
if not is_text(template):
Log.error("Log.note was expecting a unicode template")
if len(template) > 10000:
template = template[:10000]
params = dict(unwrap(default_params), **more_params)
log_params = set_default({
"template": template,
"params": params,
"timestamp": datetime.utcnow(),
"machine": machine_metadata
}, log_context, {"context": exceptions.NOTE})
if not template.startswith("\n") and template.find("\n") > -1:
template = "\n" + template
if cls.trace:
log_template = "{{machine.name}} (pid {{machine.pid}}) - {{timestamp|datetime}} - {{thread.name}} - \"{{location.file}}:{{location.line}}\" ({{location.method}}) - " + template.replace("{{", "{{params.")
f = sys._getframe(stack_depth + 1)
log_params.location = {
"line": f.f_lineno,
"file": text_type(f.f_code.co_filename.split(os.sep)[-1]),
"method": text_type(f.f_code.co_name)
}
thread = _Thread.current()
log_params.thread = {"name": thread.name, "id": thread.id}
else:
log_template = "{{timestamp|datetime}} - " + template.replace("{{", "{{params.")
cls.main_log.write(log_template, log_params)
Log._annotate(
LogItem(
context=exceptions.NOTE,
format=template,
template=template,
params=dict(default_params, **more_params)
),
timestamp,
stack_depth+1
)
@classmethod
def unexpected(
@ -227,22 +206,26 @@ class Log(object):
:param more_params: *any more parameters (which will overwrite default_params)
:return:
"""
timestamp = datetime.utcnow()
if not is_text(template):
Log.error("Log.warning was expecting a unicode template")
if isinstance(default_params, BaseException):
cause = default_params
default_params = {}
params = dict(unwrap(default_params), **more_params)
if "values" in more_params.keys():
Log.error("Can not handle a logging parameter by name `values`")
if cause and not isinstance(cause, Except):
cause = Except(exceptions.UNEXPECTED, text_type(cause), trace=exceptions._extract_traceback(0))
params = Data(dict(default_params, **more_params))
cause = unwraplist([Except.wrap(c) for c in listwrap(cause)])
trace = exceptions.extract_stack(stack_depth + 1)
trace = exceptions.extract_stack(1)
e = Except(type=exceptions.UNEXPECTED, template=template, params=params, cause=cause, trace=trace)
Log.note(
"{{error}}",
error=e,
log_context=set_default({"context": exceptions.WARNING}, log_context),
stack_depth=stack_depth + 1
e = Except(exceptions.UNEXPECTED, template=template, params=params, cause=cause, trace=trace)
Log._annotate(
e,
timestamp,
stack_depth+1
)
@classmethod
@ -259,44 +242,23 @@ class Log(object):
:param default_params: *dict* parameters to fill in template
:param stack_depth: *int* how many calls you want popped off the stack to report the *true* caller
:param log_context: *dict* extra key:value pairs for your convenience
:param more_params: *any more parameters (which will overwrite default_params)
:param more_params: more parameters (which will overwrite default_params)
:return:
"""
# USE replace() AS POOR MAN'S CHILD TEMPLATE
template = ("*" * 80) + "\n" + indent(template, prefix="** ").strip() + "\n" + ("*" * 80)
Log.note(
template,
default_params=default_params,
stack_depth=stack_depth + 1,
log_context=set_default({"context": exceptions.ALARM}, log_context),
**more_params
timestamp = datetime.utcnow()
format = ("*" * 80) + CR + indent(template, prefix="** ").strip() + CR + ("*" * 80)
Log._annotate(
LogItem(
context=exceptions.ALARM,
format=format,
template=template,
params=dict(default_params, **more_params)
),
timestamp,
stack_depth + 1
)
@classmethod
def alert(
cls,
template,
default_params={},
stack_depth=0,
log_context=None,
**more_params
):
"""
:param template: *string* human readable string with placeholders for parameters
:param default_params: *dict* parameters to fill in template
:param stack_depth: *int* how many calls you want popped off the stack to report the *true* caller
:param log_context: *dict* extra key:value pairs for your convenience
:param more_params: *any more parameters (which will overwrite default_params)
:return:
"""
return Log.alarm(
template,
default_params=default_params,
stack_depth=stack_depth + 1,
log_context=set_default({"context": exceptions.ALARM}, log_context),
**more_params
)
alert = alarm
@classmethod
def warning(
@ -317,7 +279,8 @@ class Log(object):
:param more_params: *any more parameters (which will overwrite default_params)
:return:
"""
if not isinstance(template, text_type):
timestamp = datetime.utcnow()
if not is_text(template):
Log.error("Log.warning was expecting a unicode template")
if isinstance(default_params, BaseException):
@ -326,19 +289,18 @@ class Log(object):
if "values" in more_params.keys():
Log.error("Can not handle a logging parameter by name `values`")
params = dict(unwrap(default_params), **more_params)
params = Data(dict(default_params, **more_params))
cause = unwraplist([Except.wrap(c) for c in listwrap(cause)])
trace = exceptions.extract_stack(stack_depth + 1)
e = Except(type=exceptions.WARNING, template=template, params=params, cause=cause, trace=trace)
Log.note(
"{{error|unicode}}",
error=e,
log_context=set_default({"context": exceptions.WARNING}, log_context),
stack_depth=stack_depth + 1
e = Except(exceptions.WARNING, template=template, params=params, cause=cause, trace=trace)
Log._annotate(
e,
timestamp,
stack_depth+1
)
@classmethod
def error(
cls,
@ -359,7 +321,7 @@ class Log(object):
:param more_params: *any more parameters (which will overwrite default_params)
:return:
"""
if not isinstance(template, text_type):
if not is_text(template):
sys.stderr.write(str("Log.error was expecting a unicode template"))
Log.error("Log.error was expecting a unicode template")
@ -367,12 +329,12 @@ class Log(object):
cause = default_params
default_params = {}
params = dict(unwrap(default_params), **more_params)
params = Data(dict(default_params, **more_params))
add_to_trace = False
if cause == None:
causes = None
elif isinstance(cause, list):
elif is_list(cause):
causes = []
for c in listwrap(cause): # CAN NOT USE LIST-COMPREHENSION IN PYTHON3 (EXTRA STACK DEPTH FROM THE IN-LINED GENERATOR)
causes.append(Except.wrap(c, stack_depth=1))
@ -388,55 +350,47 @@ class Log(object):
if add_to_trace:
cause[0].trace.extend(trace[1:])
e = Except(type=exceptions.ERROR, template=template, params=params, cause=causes, trace=trace)
e = Except(context=exceptions.ERROR, template=template, params=params, cause=causes, trace=trace)
raise_from_none(e)
@classmethod
def fatal(
def _annotate(
cls,
template, # human readable template
default_params={}, # parameters for template
cause=None, # pausible cause
stack_depth=0,
log_context=None,
**more_params
item,
timestamp,
stack_depth
):
"""
SEND TO STDERR
:param template: *string* human readable string with placeholders for parameters
:param default_params: *dict* parameters to fill in template
:param cause: *Exception* for chaining
:param stack_depth: *int* how many calls you want popped off the stack to report the *true* caller
:param log_context: *dict* extra key:value pairs for your convenience
:param more_params: *any more parameters (which will overwrite default_params)
:param itemt: A LogItemTHE TYPE OF MESSAGE
:param stack_depth: FOR TRACKING WHAT LINE THIS CAME FROM
:return:
"""
if default_params and isinstance(listwrap(default_params)[0], BaseException):
cause = default_params
default_params = {}
item.timestamp = timestamp
item.machine = machine_metadata
item.template = strings.limit(item.template, 10000)
params = dict(unwrap(default_params), **more_params)
item.format = strings.limit(item.format, 10000)
if item.format == None:
format = text_type(item)
else:
format = item.format.replace("{{", "{{params.")
if not format.startswith(CR) and format.find(CR) > -1:
format = CR + format
cause = unwraplist([Except.wrap(c) for c in listwrap(cause)])
trace = exceptions.extract_stack(stack_depth + 1)
e = Except(type=exceptions.ERROR, template=template, params=params, cause=cause, trace=trace)
error_mode = cls.error_mode
with suppress_exception:
if not error_mode:
cls.error_mode = True
Log.note(
"{{error|unicode}}",
error=e,
log_context=set_default({"context": exceptions.FATAL}, log_context),
stack_depth=stack_depth + 1
)
cls.error_mode = error_mode
sys.stderr.write(str(e))
if cls.trace:
log_format = item.format = "{{machine.name}} (pid {{machine.pid}}) - {{timestamp|datetime}} - {{thread.name}} - \"{{location.file}}:{{location.line}}\" - ({{location.method}}) - " + format
f = sys._getframe(stack_depth + 1)
item.location = {
"line": f.f_lineno,
"file": text_type(f.f_code.co_filename),
"method": text_type(f.f_code.co_name)
}
thread = _Thread.current()
item.thread = {"name": thread.name, "id": thread.id}
else:
log_format = item.format = "{{timestamp|datetime}} - " + format
cls.main_log.write(log_format, item.__data__())
def write(self):
raise NotImplementedError

8
vendor/mo_logs/constants.py поставляемый
Просмотреть файл

@ -6,14 +6,12 @@
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from mo_future import is_text, is_binary
import sys
from mo_dots import set_attr as mo_dots_set_attr
from mo_dots import wrap, join_field, split_field
from mo_dots import join_field, set_attr as mo_dots_set_attr, split_field, wrap
DEBUG = True

9
vendor/mo_logs/convert.py поставляемый
Просмотреть файл

@ -8,14 +8,13 @@
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from mo_future import is_text, is_binary
from datetime import date, datetime
import json as _json
from datetime import datetime, date
from mo_future import text_type, PY3
from mo_future import PY3
def unix2datetime(u):

88
vendor/mo_logs/exceptions.py поставляемый
Просмотреть файл

@ -9,17 +9,14 @@
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from mo_future import is_text, is_binary
import sys
from collections import Mapping
from mo_dots import Data, listwrap, unwraplist, set_default, Null, coalesce
from mo_future import text_type, PY3
from mo_logs.strings import indent, expand_template
from mo_dots import Data, Null, is_data, listwrap, unwraplist
from mo_future import PY3, text_type
from mo_logs.strings import CR, expand_template, indent
FATAL = "FATAL"
ERROR = "ERROR"
@ -29,27 +26,47 @@ UNEXPECTED = "UNEXPECTED"
NOTE = "NOTE"
class Except(Exception):
class LogItem(object):
def __init__(self, context, format, template, params):
self.context = context
self.format = format
self.template = template
self.params = params
def __data__(self):
return Data(self.__dict__)
class Except(Exception, LogItem):
@staticmethod
def new_instance(desc):
return Except(
type=desc.type,
context=desc.context,
template=desc.template,
params=desc.params,
cause=[Except.new_instance(c) for c in listwrap(desc.cause)],
trace=desc.trace
)
def __init__(self, type=ERROR, template=Null, params=Null, cause=Null, trace=Null, **kwargs):
Exception.__init__(self)
self.type = type
self.template = template
self.params = set_default(kwargs, params)
def __init__(self, context=ERROR, template=Null, params=Null, cause=Null, trace=Null, **_):
if context == None:
raise ValueError("expecting context to not be None")
self.cause = Except.wrap(cause)
Exception.__init__(self)
LogItem.__init__(
self,
context=context,
format=None,
template=template,
params=params
)
if not trace:
self.trace=extract_stack(2)
self.trace = extract_stack(2)
else:
self.trace = trace
@ -66,7 +83,7 @@ class Except(Exception):
return Null
elif isinstance(e, (list, Except)):
return e
elif isinstance(e, Mapping):
elif is_data(e):
e.cause = unwraplist([Except.wrap(c) for c in listwrap(e.cause)])
return Except(**e)
else:
@ -78,9 +95,9 @@ class Except(Exception):
cause = Except.wrap(getattr(e, '__cause__', None))
if hasattr(e, "message") and e.message:
output = Except(type=ERROR, template=text_type(e.message), trace=trace, cause=cause)
output = Except(context=ERROR, template=text_type(e.message), trace=trace, cause=cause)
else:
output = Except(type=ERROR, template=text_type(e), trace=trace, cause=cause)
output = Except(context=ERROR, template=text_type(e), trace=trace, cause=cause)
trace = extract_stack(stack_depth + 2) # +2 = to remove the caller, and it's call to this' Except.wrap()
output.trace.extend(trace)
@ -91,11 +108,11 @@ class Except(Exception):
return expand_template(self.template, self.params)
def __contains__(self, value):
if isinstance(value, text_type):
if is_text(value):
if self.template.find(value) >= 0 or self.message.find(value) >= 0:
return True
if self.type == value:
if self.context == value:
return True
for c in listwrap(self.cause):
if value in c:
@ -103,7 +120,7 @@ class Except(Exception):
return False
def __unicode__(self):
output = self.type + ": " + self.template + "\n"
output = self.context + ": " + self.template + CR
if self.params:
output = expand_template(output, self.params)
@ -113,8 +130,10 @@ class Except(Exception):
if self.cause:
cause_strings = []
for c in listwrap(self.cause):
with suppress_exception:
try:
cause_strings.append(text_type(c))
except Exception as e:
sys.stderr("Problem serializing cause"+text_type(c))
output += "caused by\n\t" + "and caused by\n\t".join(cause_strings)
@ -128,13 +147,9 @@ class Except(Exception):
return self.__unicode__().encode('latin1', 'replace')
def __data__(self):
return Data(
type=self.type,
template=self.template,
params=self.params,
cause=self.cause,
trace=self.trace
)
output = Data({k:getattr(self,k) for k in vars(self)})
output.cause=unwraplist([c.__data__() for c in listwrap(output.cause)])
return output
def extract_stack(start=0):
@ -195,11 +210,10 @@ def _parse_traceback(tb):
def format_trace(tbs, start=0):
trace = []
for d in tbs[start::]:
item = expand_template('File "{{file}}", line {{line}}, in {{method}}\n', d)
trace.append(item)
return "".join(trace)
return "".join(
expand_template('File "{{file}}", line {{line}}, in {{method}}\n', d)
for d in tbs[start::]
)
class Suppress(object):
@ -208,13 +222,13 @@ class Suppress(object):
"""
def __init__(self, exception_type):
self.type = exception_type
self.context = exception_type
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
if not exc_val or isinstance(exc_val, self.type):
if not exc_val or isinstance(exc_val, self.context):
return True
suppress_exception = Suppress(Exception)

121
vendor/mo_logs/log_usingElasticSearch.py поставляемый
Просмотреть файл

@ -7,59 +7,73 @@
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from collections import Mapping
from datetime import date, datetime
import sys
import mo_json
from jx_python import jx
from mo_dots import wrap, coalesce, FlatList
from mo_future import text_type, binary_type, number_types
from mo_json import value2json
from mo_dots import coalesce, is_data, is_sequence, listwrap, wrap
from mo_future import is_binary, is_text, number_types, text_type
from mo_json import datetime2unix, json2value, value2json
from mo_kwargs import override
from mo_logs import Log, strings
from mo_logs.exceptions import suppress_exception
from mo_logs.exceptions import Except, suppress_exception
from mo_logs.log_usingNothing import StructuredLogger
from mo_threads import Thread, Queue, Till, THREAD_STOP
from mo_times import MINUTE, Duration
from mo_math.randoms import Random
from mo_threads import Queue, THREAD_STOP, Thread, Till
from mo_times import Duration, MINUTE
from mo_times.dates import datetime2unix
from pyLibrary.convert import bytes2base64
from pyLibrary.env.elasticsearch import Cluster
MAX_BAD_COUNT = 5
LOG_STRING_LENGTH = 2000
PAUSE_AFTER_GOOD_INSERT = 1
PAUSE_AFTER_BAD_INSERT = 60
class StructuredLogger_usingElasticSearch(StructuredLogger):
@override
def __init__(self, host, index, port=9200, type="log", queue_size=1000, batch_size=100, kwargs=None):
def __init__(
self,
host,
index,
port=9200,
type="log",
queue_size=1000,
batch_size=100,
kwargs=None,
):
"""
settings ARE FOR THE ELASTICSEARCH INDEX
"""
kwargs.timeout = Duration(coalesce(kwargs.timeout, "30second")).seconds
kwargs.retry.times = coalesce(kwargs.retry.times, 3)
kwargs.retry.sleep = Duration(coalesce(kwargs.retry.sleep, MINUTE)).seconds
kwargs.host = Random.sample(listwrap(host), 1)[0]
schema = json2value(value2json(SCHEMA), leaves=True)
schema.mappings[type].properties["~N~"].type = "nested"
self.es = Cluster(kwargs).get_or_create_index(
schema=mo_json.json2value(value2json(SCHEMA), leaves=True),
schema=schema,
limit_replicas=True,
typed=True,
kwargs=kwargs
kwargs=kwargs,
)
self.batch_size = batch_size
self.es.add_alias(coalesce(kwargs.alias, kwargs.index))
self.queue = Queue("debug logs to es", max=queue_size, silent=True)
Thread.run("add debug logs to es", self._insert_loop)
self.worker = Thread.run("add debug logs to es", self._insert_loop)
def write(self, template, params):
if params.get("template"):
# DETECTED INNER TEMPLATE, ASSUME TRACE IS ON, SO DO NOT NEED THE OUTER TEMPLATE
self.queue.add({"value": params})
else:
template = strings.limit(template, 2000)
self.queue.add({"value": {"template": template, "params": params}}, timeout=3 * MINUTE)
try:
params.template = strings.limit(params.template, 2000)
params.format = None
self.queue.add({"value": _deep_json_to_string(params, 3)}, timeout=3 * 60)
except Exception as e:
sys.stdout.write(text_type(Except.wrap(e)))
return self
def _insert_loop(self, please_stop=None):
@ -68,7 +82,7 @@ class StructuredLogger_usingElasticSearch(StructuredLogger):
try:
messages = wrap(self.queue.pop_all())
if not messages:
Till(seconds=1).wait()
Till(seconds=PAUSE_AFTER_GOOD_INSERT).wait()
continue
for g, mm in jx.groupby(messages, size=self.batch_size):
@ -76,9 +90,17 @@ class StructuredLogger_usingElasticSearch(StructuredLogger):
for i, message in enumerate(mm):
if message is THREAD_STOP:
please_stop.go()
return
continue
try:
scrubbed.append(_deep_json_to_string(message, depth=3))
messages = flatten_causal_chain(message.value)
scrubbed.append(
{
"value": [
_deep_json_to_string(m, depth=3)
for m in messages
]
}
)
except Exception as e:
Log.warning("Problem adding to scrubbed list", cause=e)
@ -88,13 +110,18 @@ class StructuredLogger_usingElasticSearch(StructuredLogger):
Log.warning("Problem inserting logs into ES", cause=f)
bad_count += 1
if bad_count > MAX_BAD_COUNT:
Log.warning("Given up trying to write debug logs to ES index {{index}}", index=self.es.settings.index)
Till(seconds=30).wait()
Log.warning(
"Given up trying to write debug logs to ES index {{index}}",
index=self.es.settings.index,
)
Till(seconds=PAUSE_AFTER_BAD_INSERT).wait()
self.es.flush()
# CONTINUE TO DRAIN THIS QUEUE
while not please_stop:
try:
Till(seconds=1).wait()
Till(seconds=PAUSE_AFTER_GOOD_INSERT).wait()
self.queue.pop_all()
except Exception as e:
Log.warning("Should not happen", cause=e)
@ -105,6 +132,21 @@ class StructuredLogger_usingElasticSearch(StructuredLogger):
with suppress_exception:
self.queue.close()
self.worker.join()
def flatten_causal_chain(log_item, output=None):
output = output or []
if is_text(log_item):
output.append({"template": log_item})
return
output.append(log_item)
for c in listwrap(log_item.cause):
flatten_causal_chain(c, output)
log_item.cause = None
return output
def _deep_json_to_string(value, depth):
@ -113,31 +155,32 @@ def _deep_json_to_string(value, depth):
:param depth: THE MAX DEPTH OF PROPERTIES, DEEPER WILL BE STRING-IFIED
:return: FLATTER STRUCTURE
"""
if isinstance(value, Mapping):
if is_data(value):
if depth == 0:
return strings.limit(value2json(value), LOG_STRING_LENGTH)
return {k: _deep_json_to_string(v, depth - 1) for k, v in value.items()}
elif isinstance(value, (list, FlatList)):
elif is_sequence(value):
return strings.limit(value2json(value), LOG_STRING_LENGTH)
elif isinstance(value, number_types):
return value
elif isinstance(value, text_type):
elif is_text(value):
return strings.limit(value, LOG_STRING_LENGTH)
elif isinstance(value, binary_type):
elif is_binary(value):
return strings.limit(bytes2base64(value), LOG_STRING_LENGTH)
elif isinstance(value, (date, datetime)):
return datetime2unix(value)
else:
return strings.limit(value2json(value), LOG_STRING_LENGTH)
SCHEMA = {
"settings": {"index.number_of_shards": 2, "index.number_of_replicas": 2},
"mappings": {"_default_": {
"dynamic_templates": [
{"everything_else": {
"match": "*",
"mapping": {"index": False}
}}
]
}}
"mappings": {
"_default_": {
"dynamic_templates": [
{"everything_else": {"match": "*", "mapping": {"index": False}}}
]
},
},
}

7
vendor/mo_logs/log_usingEmail.py поставляемый
Просмотреть файл

@ -9,11 +9,10 @@
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from mo_dots import listwrap, literal_field, Data
from mo_future import is_text, is_binary
from mo_dots import Data, listwrap, literal_field
from mo_kwargs import override
from mo_logs import Log
from mo_logs.exceptions import ALARM, NOTE

5
vendor/mo_logs/log_usingFile.py поставляемый
Просмотреть файл

@ -9,10 +9,9 @@
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from mo_future import is_text, is_binary
import time
from mo_future import allocate_lock

8
vendor/mo_logs/log_usingLogger.py поставляемый
Просмотреть файл

@ -9,18 +9,16 @@
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from mo_future import is_text, is_binary
import logging
from mo_dots import unwrap
from mo_logs import Log
from mo_logs.exceptions import suppress_exception
from mo_logs.log_usingNothing import StructuredLogger
from mo_logs.log_usingThreadedStream import StructuredLogger_usingThreadedStream, time_delta_pusher
from mo_dots import unwrap
_THREAD_STOP = None
_Queue = None

9
vendor/mo_logs/log_usingMozLog.py поставляемый
Просмотреть файл

@ -7,17 +7,16 @@
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from mo_future import is_text, is_binary
from decimal import Decimal
from mo_dots import wrap
from mo_json import value2json, datetime2unix
from mo_json import datetime2unix, value2json
from mo_kwargs import override
from mo_logs import Log
from mo_logs.exceptions import ERROR, NOTE, WARNING, ALARM
from mo_logs.exceptions import ALARM, ERROR, NOTE, WARNING
from mo_logs.log_usingElasticSearch import _deep_json_to_string
from mo_logs.log_usingNothing import StructuredLogger

5
vendor/mo_logs/log_usingMulti.py поставляемый
Просмотреть файл

@ -9,10 +9,9 @@
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from mo_future import is_text, is_binary
from mo_logs import Log
from mo_logs.exceptions import suppress_exception
from mo_logs.log_usingNothing import StructuredLogger

5
vendor/mo_logs/log_usingNothing.py поставляемый
Просмотреть файл

@ -9,11 +9,10 @@
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from mo_future import is_text, is_binary
class StructuredLogger(object):
"""
ABSTRACT BASE CLASS FOR JSON LOGGING

44
vendor/mo_logs/log_usingQueue.py поставляемый
Просмотреть файл

@ -1,44 +0,0 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from mo_logs.log_usingNothing import StructuredLogger
from mo_logs.strings import expand_template
from mo_threads import Queue
class StructuredLogger_usingQueue(StructuredLogger):
def __init__(self, name=None):
queue_name = "log messages to queue"
if name:
queue_name += " "+name
self.queue = Queue(queue_name)
def write(self, template, params):
self.queue.add(expand_template(template, params))
def stop(self):
self.queue.close()
def pop(self):
lines = self.queue.pop()
output = []
for l in lines.split("\n"):
if l[19:22] == " - ":
l = l[22:]
if l.strip().startswith("File"):
continue
output.append(l)
return "\n".join(output).strip()

7
vendor/mo_logs/log_usingSES.py поставляемый
Просмотреть файл

@ -9,13 +9,12 @@
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from mo_future import is_text, is_binary
from boto.ses import connect_to_region
from mo_dots import listwrap, unwrap, literal_field, Data
from mo_dots import Data, listwrap, literal_field, unwrap
from mo_kwargs import override
from mo_logs import Log, suppress_exception
from mo_logs.exceptions import ALARM, NOTE

9
vendor/mo_logs/log_usingStream.py поставляемый
Просмотреть файл

@ -9,15 +9,14 @@
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from mo_future import is_text, is_binary
import sys
from mo_future import PY3, allocate_lock
from mo_logs.log_usingNothing import StructuredLogger
from mo_logs.strings import expand_template
from mo_logs.strings import CR, expand_template
class StructuredLogger_usingStream(StructuredLogger):
@ -36,7 +35,7 @@ class StructuredLogger_usingStream(StructuredLogger):
value = expand_template(template, params)
self.locker.acquire()
try:
self.writer(value + "\n")
self.writer(value + CR)
finally:
self.locker.release()

9
vendor/mo_logs/log_usingThread.py поставляемый
Просмотреть файл

@ -9,13 +9,12 @@
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import, division, unicode_literals
from mo_logs import Log, Except, suppress_exception
from mo_future import is_text, is_binary
from mo_logs import Except, Log, suppress_exception
from mo_logs.log_usingNothing import StructuredLogger
from mo_threads import Thread, Queue, Till, THREAD_STOP
from mo_threads import Queue, THREAD_STOP, Thread, Till
DEBUG = False

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше