lib updates
This commit is contained in:
Родитель
ba6cec27f8
Коммит
a718a6db24
|
@ -7,14 +7,15 @@
|
|||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from uuid import uuid4
|
||||
|
||||
from mo_dots import wrap, coalesce, listwrap
|
||||
from mo_future import text_type
|
||||
from jx_base.expressions import jx_expression
|
||||
from jx_python.expressions import Literal, Python
|
||||
from mo_dots import coalesce, listwrap, wrap
|
||||
from mo_dots.datas import register_data
|
||||
from mo_future import is_text, text_type
|
||||
from mo_json import value2json
|
||||
from mo_logs import Log
|
||||
from mo_logs.strings import expand_template, quote
|
||||
|
@ -72,18 +73,26 @@ def DataClass(name, columns, constraint=None):
|
|||
:return: The class that has been created
|
||||
"""
|
||||
|
||||
from jx_python.expressions import jx_expression
|
||||
|
||||
columns = wrap([{"name": c, "required": True, "nulls": False, "type": object} if isinstance(c, text_type) else c for c in columns])
|
||||
columns = wrap(
|
||||
[
|
||||
{"name": c, "required": True, "nulls": False, "type": object}
|
||||
if is_text(c)
|
||||
else c
|
||||
for c in columns
|
||||
]
|
||||
)
|
||||
slots = columns.name
|
||||
required = wrap(filter(lambda c: c.required and not c.nulls and not c.default, columns)).name
|
||||
required = wrap(
|
||||
filter(lambda c: c.required and not c.nulls and not c.default, columns)
|
||||
).name
|
||||
nulls = wrap(filter(lambda c: c.nulls, columns)).name
|
||||
defaults = {c.name: coalesce(c.default, None) for c in columns}
|
||||
types = {c.name: coalesce(c.type, object) for c in columns}
|
||||
types = {c.name: coalesce(c.jx_type, object) for c in columns}
|
||||
|
||||
code = expand_template(
|
||||
"""
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
from mo_future import is_text, is_binary
|
||||
from collections import Mapping
|
||||
|
||||
meta = None
|
||||
|
@ -170,31 +179,32 @@ class {{class_name}}(Mapping):
|
|||
"slots": "(" + (", ".join(quote(s) for s in slots)) + ")",
|
||||
"required": "{" + (", ".join(quote(s) for s in required)) + "}",
|
||||
"nulls": "{" + (", ".join(quote(s) for s in nulls)) + "}",
|
||||
"defaults": jx_expression({"literal": defaults}).to_python(),
|
||||
"defaults": Literal(defaults).to_python(),
|
||||
"len_slots": len(slots),
|
||||
"dict": "{" + (", ".join(quote(s) + ": self." + s for s in slots)) + "}",
|
||||
"assign": "; ".join("_set(output, "+quote(s)+", self."+s+")" for s in slots),
|
||||
"types": "{" + (",".join(quote(k) + ": " + v.__name__ for k, v in types.items())) + "}",
|
||||
"constraint_expr": jx_expression(constraint).to_python(),
|
||||
"constraint": value2json(constraint)
|
||||
}
|
||||
"assign": "; ".join(
|
||||
"_set(output, " + quote(s) + ", self." + s + ")" for s in slots
|
||||
),
|
||||
"types": "{"
|
||||
+ (",".join(quote(k) + ": " + v.__name__ for k, v in types.items()))
|
||||
+ "}",
|
||||
"constraint_expr": Python[jx_expression(constraint)].to_python(),
|
||||
"constraint": value2json(constraint),
|
||||
},
|
||||
)
|
||||
|
||||
return _exec(code, name)
|
||||
output = _exec(code, name)
|
||||
register_data(output)
|
||||
return output
|
||||
|
||||
|
||||
class TableDesc(DataClass(
|
||||
"Table",
|
||||
[
|
||||
"name",
|
||||
"url",
|
||||
"query_path",
|
||||
"timestamp"
|
||||
],
|
||||
constraint={"and": [
|
||||
{"eq": [{"last": "query_path"}, {"literal": "."}]}
|
||||
]}
|
||||
)):
|
||||
class TableDesc(
|
||||
DataClass(
|
||||
"Table",
|
||||
["name", "url", "query_path", "timestamp"],
|
||||
constraint={"and": [{"eq": [{"last": "query_path"}, {"literal": "."}]}]},
|
||||
)
|
||||
):
|
||||
@property
|
||||
def columns(self):
|
||||
raise NotImplementedError()
|
||||
|
@ -204,23 +214,25 @@ class TableDesc(DataClass(
|
|||
Column = DataClass(
|
||||
"Column",
|
||||
[
|
||||
# "table",
|
||||
"names", # MAP FROM TABLE NAME TO COLUMN NAME (ONE COLUMN CAN HAVE MULTIPLE NAMES)
|
||||
"name",
|
||||
"es_column",
|
||||
"es_index",
|
||||
"es_type",
|
||||
{"name": "jx_type", "nulls": True},
|
||||
"jx_type",
|
||||
{"name": "useSource", "default": False},
|
||||
{"name": "nested_path", "nulls": True}, # AN ARRAY OF PATHS (FROM DEEPEST TO SHALLOWEST) INDICATING THE JSON SUB-ARRAYS
|
||||
"nested_path", # AN ARRAY OF PATHS (FROM DEEPEST TO SHALLOWEST) INDICATING THE JSON SUB-ARRAYS
|
||||
{"name": "count", "nulls": True},
|
||||
{"name": "cardinality", "nulls": True},
|
||||
{"name": "multi", "nulls": True},
|
||||
{"name": "partitions", "nulls": True},
|
||||
{"name": "last_updated", "nulls": True}
|
||||
"last_updated",
|
||||
],
|
||||
constraint={"and": [
|
||||
{"eq": [{"last": "nested_path"}, {"literal": "."}]}
|
||||
]}
|
||||
constraint={
|
||||
"and": [
|
||||
{"not": {"eq": {"es_column": "string"}}},
|
||||
{"eq": [{"last": "nested_path"}, {"literal": "."}]},
|
||||
]
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -7,15 +7,12 @@
|
|||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from collections import Mapping
|
||||
from mo_future import is_text, is_binary
|
||||
from copy import copy
|
||||
|
||||
from mo_dots import Data
|
||||
from mo_dots import set_default, split_field, wrap, join_field
|
||||
from mo_dots import Data, is_data, join_field, set_default, split_field, wrap, is_many
|
||||
from mo_future import generator_types, text_type
|
||||
from mo_logs import Log
|
||||
|
||||
|
@ -67,9 +64,9 @@ class Container(object):
|
|||
return frum
|
||||
elif isinstance(frum, _Query):
|
||||
return _run(frum)
|
||||
elif isinstance(frum, (list, set) + generator_types):
|
||||
elif is_many(frum):
|
||||
return _ListContainer(frum)
|
||||
elif isinstance(frum, text_type):
|
||||
elif is_text(frum):
|
||||
# USE DEFAULT STORAGE TO FIND Container
|
||||
if not config.default.settings:
|
||||
Log.error("expecting jx_base.container.config.default.settings to contain default elasticsearch connection info")
|
||||
|
@ -83,7 +80,7 @@ class Container(object):
|
|||
)
|
||||
settings.type = None # WE DO NOT WANT TO INFLUENCE THE TYPE BECAUSE NONE IS IN THE frum STRING ANYWAY
|
||||
return type2container["elasticsearch"](settings)
|
||||
elif isinstance(frum, Mapping):
|
||||
elif is_data(frum):
|
||||
frum = wrap(frum)
|
||||
if frum.type and type2container[frum.type]:
|
||||
return type2container[frum.type](frum.settings)
|
||||
|
|
|
@ -7,17 +7,12 @@
|
|||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from collections import Mapping
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from jx_base.domains import ALGEBRAIC, Domain, KNOWN
|
||||
from mo_dots import Data, FlatList, Null, coalesce, is_data, is_list, join_field, listwrap, split_field, wrap
|
||||
import mo_dots as dot
|
||||
from jx_base.domains import Domain, ALGEBRAIC, KNOWN
|
||||
from mo_dots import Null, coalesce, join_field, split_field, Data
|
||||
from mo_dots import wrap, listwrap
|
||||
from mo_dots.lists import FlatList
|
||||
from mo_future import transpose
|
||||
from mo_logs import Log
|
||||
from mo_math import SUM
|
||||
from mo_times.timer import Timer
|
||||
|
@ -56,7 +51,7 @@ class Dimension(object):
|
|||
fields = coalesce(dim.field, dim.fields)
|
||||
if not fields:
|
||||
return # NO FIELDS TO SEARCH
|
||||
elif isinstance(fields, Mapping):
|
||||
elif is_data(fields):
|
||||
self.fields = wrap(fields)
|
||||
edges = wrap([{"name": k, "value": v, "allowNulls": False} for k, v in self.fields.items()])
|
||||
else:
|
||||
|
@ -88,7 +83,7 @@ class Dimension(object):
|
|||
temp = Data(partitions=[])
|
||||
for i, count in enumerate(parts):
|
||||
a = dim.path(d.getEnd(d.partitions[i]))
|
||||
if not isinstance(a, list):
|
||||
if not is_list(a):
|
||||
Log.error("The path function on " + dim.name + " must return an ARRAY of parts")
|
||||
addParts(
|
||||
temp,
|
||||
|
@ -98,7 +93,7 @@ class Dimension(object):
|
|||
)
|
||||
self.value = coalesce(dim.value, "name")
|
||||
self.partitions = temp.partitions
|
||||
elif isinstance(fields, Mapping):
|
||||
elif is_data(fields):
|
||||
self.value = "name" # USE THE "name" ATTRIBUTE OF PARTS
|
||||
|
||||
partitions = FlatList()
|
||||
|
@ -135,7 +130,7 @@ class Dimension(object):
|
|||
array = parts.data.values()[0].cube # DIG DEEP INTO RESULT (ASSUME SINGLE VALUE CUBE, WITH NULL AT END)
|
||||
|
||||
def edges2value(*values):
|
||||
if isinstance(fields, Mapping):
|
||||
if is_data(fields):
|
||||
output = Data()
|
||||
for e, v in transpose(edges, values):
|
||||
output[e.name] = v
|
||||
|
@ -192,7 +187,7 @@ class Dimension(object):
|
|||
def getDomain(self, **kwargs):
|
||||
# kwargs.depth IS MEANT TO REACH INTO SUB-PARTITIONS
|
||||
kwargs = wrap(kwargs)
|
||||
kwargs.depth = coalesce(kwargs.depth, len(self.fields)-1 if isinstance(self.fields, list) else None)
|
||||
kwargs.depth = coalesce(kwargs.depth, len(self.fields)-1 if is_list(self.fields) else None)
|
||||
|
||||
if not self.partitions and self.edges:
|
||||
# USE EACH EDGE AS A PARTITION, BUT isFacet==True SO IT ALLOWS THE OVERLAP
|
||||
|
|
|
@ -7,21 +7,15 @@
|
|||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
import itertools
|
||||
from collections import Mapping
|
||||
from numbers import Number
|
||||
|
||||
from mo_future import text_type
|
||||
|
||||
from jx_base.expressions import jx_expression
|
||||
from mo_collections.unique_index import UniqueIndex
|
||||
from mo_dots import coalesce, Data, set_default, Null, listwrap
|
||||
from mo_dots import wrap
|
||||
from mo_dots.lists import FlatList
|
||||
from mo_dots import Data, FlatList, Null, coalesce, is_container, is_data, listwrap, set_default, unwrap, wrap
|
||||
from mo_future import text_type
|
||||
from mo_logs import Log
|
||||
from mo_math import MAX, MIN
|
||||
from mo_times.dates import Date
|
||||
|
@ -210,7 +204,12 @@ class SimpleSetDomain(Domain):
|
|||
DOMAIN IS A LIST OF OBJECTS, EACH WITH A value PROPERTY
|
||||
"""
|
||||
|
||||
__slots__ = ["NULL", "partitions", "map", "order"]
|
||||
__slots__ = [
|
||||
"NULL", # THE value FOR NULL
|
||||
"partitions", # LIST OF {name, value, dataIndex} dicts
|
||||
"map", # MAP FROM value TO name
|
||||
"order" # MAP FROM value TO dataIndex
|
||||
]
|
||||
|
||||
def __init__(self, **desc):
|
||||
Domain.__init__(self, **desc)
|
||||
|
@ -246,15 +245,18 @@ class SimpleSetDomain(Domain):
|
|||
if desc.partitions and desc.dimension.fields and len(desc.dimension.fields) > 1:
|
||||
self.key = desc.key
|
||||
self.map = UniqueIndex(keys=desc.dimension.fields)
|
||||
elif desc.partitions and isinstance(desc.key, (list, set)):
|
||||
elif desc.partitions and is_container(desc.key):
|
||||
# TODO: desc.key CAN BE MUCH LIKE A SELECT, WHICH UniqueIndex CAN NOT HANDLE
|
||||
self.key = desc.key
|
||||
self.map = UniqueIndex(keys=desc.key)
|
||||
elif desc.partitions and isinstance(desc.partitions[0][desc.key], Mapping):
|
||||
elif desc.partitions and is_data(desc.partitions[0][desc.key]):
|
||||
# LOOKS LIKE OBJECTS
|
||||
# sorted = desc.partitions[desc.key]
|
||||
|
||||
self.key = desc.key
|
||||
self.map = UniqueIndex(keys=desc.key)
|
||||
# self.key = UNION(set(d[desc.key].keys()) for d in desc.partitions)
|
||||
# self.map = UniqueIndex(keys=self.key)
|
||||
self.order = {p[self.key]: p.dataIndex for p in desc.partitions}
|
||||
self.partitions = desc.partitions
|
||||
elif len(desc.partitions) == 0:
|
||||
# CREATE AN EMPTY DOMAIN
|
||||
self.key = "value"
|
||||
|
@ -388,11 +390,11 @@ class SetDomain(Domain):
|
|||
elif desc.partitions and desc.dimension.fields and len(desc.dimension.fields) > 1:
|
||||
self.key = desc.key
|
||||
self.map = UniqueIndex(keys=desc.dimension.fields)
|
||||
elif desc.partitions and isinstance(desc.key, (list, set)):
|
||||
elif desc.partitions and is_container(desc.key):
|
||||
# TODO: desc.key CAN BE MUCH LIKE A SELECT, WHICH UniqueIndex CAN NOT HANDLE
|
||||
self.key = desc.key
|
||||
self.map = UniqueIndex(keys=desc.key)
|
||||
elif desc.partitions and isinstance(desc.partitions[0][desc.key], Mapping):
|
||||
elif desc.partitions and is_data(desc.partitions[0][desc.key]):
|
||||
self.key = desc.key
|
||||
self.map = UniqueIndex(keys=desc.key)
|
||||
# self.key = UNION(set(d[desc.key].keys()) for d in desc.partitions)
|
||||
|
@ -663,7 +665,7 @@ class RangeDomain(Domain):
|
|||
if not self.key:
|
||||
Log.error("Must have a key value")
|
||||
|
||||
parts = list(listwrap(self.partitions))
|
||||
parts =listwrap(self.partitions)
|
||||
for i, p in enumerate(parts):
|
||||
self.min = MIN([self.min, p.min])
|
||||
self.max = MAX([self.max, p.max])
|
||||
|
@ -675,10 +677,10 @@ class RangeDomain(Domain):
|
|||
|
||||
# VERIFY PARTITIONS DO NOT OVERLAP, HOLES ARE FINE
|
||||
for p, q in itertools.product(parts, parts):
|
||||
if p is not q and p.min <= q.min and q.min < p.max:
|
||||
if p.min <= q.min and q.min < p.max and unwrap(p) is not unwrap(q):
|
||||
Log.error("partitions overlap!")
|
||||
|
||||
self.partitions = parts
|
||||
self.partitions = wrap(parts)
|
||||
return
|
||||
elif any([self.min == None, self.max == None, self.interval == None]):
|
||||
Log.error("Can not handle missing parameter")
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -7,11 +7,10 @@
|
|||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
|
||||
from mo_future import is_text, is_binary
|
||||
class Facts(object):
|
||||
"""
|
||||
REPRESENT A HIERARCHICAL DATASTORE: MULTIPLE TABLES IN A DATABASE ALONG
|
||||
|
|
|
@ -0,0 +1,233 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from copy import copy
|
||||
from math import isnan
|
||||
|
||||
from mo_dots import Data, data_types, listwrap
|
||||
from mo_dots.lists import list_types
|
||||
from mo_future import boolean_type, long, none_type, text_type
|
||||
from mo_logs import Log
|
||||
from mo_times import Date
|
||||
|
||||
builtin_tuple = tuple
|
||||
|
||||
Expression = None
|
||||
expression_module = None
|
||||
JX = None
|
||||
|
||||
|
||||
_next_id = 0
|
||||
|
||||
|
||||
def next_id():
|
||||
global _next_id
|
||||
try:
|
||||
return _next_id
|
||||
finally:
|
||||
_next_id+=1
|
||||
|
||||
|
||||
def all_bases(bases):
|
||||
for b in bases:
|
||||
yield b
|
||||
for y in all_bases(b.__bases__):
|
||||
yield y
|
||||
|
||||
|
||||
# EVERY OPERATOR WILL HAVE lang WHICH POINTS TO LANGUAGE
|
||||
class LanguageElement(type):
|
||||
def __new__(cls, name, bases, dct):
|
||||
x = type.__new__(cls, name, bases, dct)
|
||||
x.lang = None
|
||||
if x.__module__ == expression_module:
|
||||
# ALL OPS IN expression_module ARE GIVEN AN ID, NO OTHERS
|
||||
x.id = next_id()
|
||||
return x
|
||||
|
||||
def __init__(cls, *args):
|
||||
global Expression, expression_module
|
||||
type.__init__(cls, *args)
|
||||
if not expression_module and cls.__name__ == "Expression":
|
||||
# THE expression_module IS DETERMINED BY THE LOCATION OF Expression CLASS
|
||||
Expression = cls
|
||||
expression_module = cls.__module__
|
||||
|
||||
|
||||
BaseExpression = LanguageElement(str("BaseExpression"), (object,), {})
|
||||
|
||||
|
||||
class Language(object):
|
||||
|
||||
def __init__(self, name):
|
||||
self.name = name
|
||||
self.ops = None
|
||||
|
||||
def __getitem__(self, item):
|
||||
class_ = self.ops[item.id]
|
||||
if class_.__name__ != item.__class__.__name__:
|
||||
Log.error("programming error")
|
||||
item.__class__ = class_
|
||||
return item
|
||||
|
||||
def __str__(self):
|
||||
return self.name
|
||||
|
||||
|
||||
def define_language(lang_name, module_vars):
|
||||
# LET ALL EXPRESSIONS POINT TO lang OBJECT WITH ALL EXPRESSIONS
|
||||
# ENSURE THIS IS BELOW ALL SUB_CLASS DEFINITIONS SO var() CAPTURES ALL EXPRESSIONS
|
||||
global JX
|
||||
|
||||
if lang_name:
|
||||
language = Language(lang_name)
|
||||
language.ops = copy(JX.ops)
|
||||
else:
|
||||
num_ops = 1 + max(
|
||||
obj.id
|
||||
for obj in module_vars.values() if isinstance(obj, type) and hasattr(obj, 'id')
|
||||
)
|
||||
language = JX = Language("JX")
|
||||
language.ops = [None] * num_ops
|
||||
|
||||
for _, new_op in module_vars.items():
|
||||
if isinstance(new_op, type) and hasattr(new_op, 'id'):
|
||||
# EXPECT OPERATORS TO HAVE id
|
||||
# EXPECT NEW DEFINED OPS IN THIS MODULE TO HAVE lang NOT SET
|
||||
curr = getattr(new_op, "lang")
|
||||
if not curr:
|
||||
old_op = language.ops[new_op.id]
|
||||
if old_op is not None and old_op.__name__ != new_op.__name__:
|
||||
Log.error("Logic error")
|
||||
language.ops[new_op.id] = new_op
|
||||
setattr(new_op, "lang", language)
|
||||
|
||||
if lang_name:
|
||||
# ENSURE THE ALL OPS ARE DEFINED ON THE NEW LANGUAGE
|
||||
for base_op, new_op in list(zip(JX.ops, language.ops)):
|
||||
if new_op is base_op:
|
||||
# MISSED DEFINITION, ADD ONE
|
||||
new_op = type(base_op.__name__, (base_op,), {})
|
||||
language.ops[new_op.id] = new_op
|
||||
setattr(new_op, "lang", language)
|
||||
|
||||
return language
|
||||
|
||||
|
||||
def is_op(call, op):
|
||||
"""
|
||||
:param call: The specific operator instance (a method call)
|
||||
:param op: The the operator we are testing against
|
||||
:return: isinstance(call, op), but faster
|
||||
"""
|
||||
try:
|
||||
return call.id == op.id
|
||||
except Exception as e:
|
||||
return False
|
||||
|
||||
|
||||
def is_expression(call):
|
||||
try:
|
||||
output = getattr(call, 'id', None) != None
|
||||
except Exception:
|
||||
output = False
|
||||
if output != isinstance(call, Expression):
|
||||
Log.error("programmer error")
|
||||
return output
|
||||
|
||||
|
||||
def value_compare(left, right, ordering=1):
|
||||
"""
|
||||
SORT VALUES, NULL IS THE LEAST VALUE
|
||||
:param left: LHS
|
||||
:param right: RHS
|
||||
:param ordering: (-1, 0, 1) TO AFFECT SORT ORDER
|
||||
:return: The return value is negative if x < y, zero if x == y and strictly positive if x > y.
|
||||
"""
|
||||
|
||||
try:
|
||||
ltype = left.__class__
|
||||
rtype = right.__class__
|
||||
|
||||
if ltype in list_types or rtype in list_types:
|
||||
if left == None:
|
||||
return ordering
|
||||
elif right == None:
|
||||
return - ordering
|
||||
|
||||
left = listwrap(left)
|
||||
right = listwrap(right)
|
||||
for a, b in zip(left, right):
|
||||
c = value_compare(a, b) * ordering
|
||||
if c != 0:
|
||||
return c
|
||||
|
||||
if len(left) < len(right):
|
||||
return - ordering
|
||||
elif len(left) > len(right):
|
||||
return ordering
|
||||
else:
|
||||
return 0
|
||||
|
||||
if ltype is float and isnan(left):
|
||||
left = None
|
||||
ltype = none_type
|
||||
if rtype is float and isnan(right):
|
||||
right = None
|
||||
rtype = none_type
|
||||
|
||||
null_order = ordering*10
|
||||
ltype_num = TYPE_ORDER.get(ltype, null_order)
|
||||
rtype_num = TYPE_ORDER.get(rtype, null_order)
|
||||
|
||||
type_diff = ltype_num - rtype_num
|
||||
if type_diff != 0:
|
||||
return ordering if type_diff > 0 else -ordering
|
||||
|
||||
if ltype_num == null_order:
|
||||
return 0
|
||||
elif ltype is builtin_tuple:
|
||||
for a, b in zip(left, right):
|
||||
c = value_compare(a, b)
|
||||
if c != 0:
|
||||
return c * ordering
|
||||
return 0
|
||||
elif ltype in data_types:
|
||||
for k in sorted(set(left.keys()) | set(right.keys())):
|
||||
c = value_compare(left.get(k), right.get(k)) * ordering
|
||||
if c != 0:
|
||||
return c
|
||||
return 0
|
||||
elif left > right:
|
||||
return ordering
|
||||
elif left < right:
|
||||
return -ordering
|
||||
else:
|
||||
return 0
|
||||
except Exception as e:
|
||||
Log.error("Can not compare values {{left}} to {{right}}", left=left, right=right, cause=e)
|
||||
|
||||
|
||||
TYPE_ORDER = {
|
||||
boolean_type: 0,
|
||||
int: 1,
|
||||
float: 1,
|
||||
Date: 1,
|
||||
long: 1,
|
||||
text_type: 2,
|
||||
list: 3,
|
||||
builtin_tuple: 3,
|
||||
dict: 4,
|
||||
Data: 4
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -7,13 +7,11 @@
|
|||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from collections import Mapping
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from mo_future import is_text, is_binary
|
||||
from jx_base.query import QueryOp
|
||||
from mo_dots import is_data
|
||||
|
||||
|
||||
class Namespace(object):
|
||||
|
@ -32,7 +30,7 @@ class Namespace(object):
|
|||
raise NotImplementedError()
|
||||
|
||||
def _convert_query(self, query):
|
||||
output = QueryOp("from", None)
|
||||
output = QueryOp(None)
|
||||
output.select = self._convert_clause(query.select)
|
||||
output.where = self.convert(query.where)
|
||||
output["from"] = self._convert_from(query["from"])
|
||||
|
@ -60,7 +58,7 @@ class Namespace(object):
|
|||
def convert_list(operator, operand):
|
||||
if operand==None:
|
||||
return None
|
||||
elif isinstance(operand, Mapping):
|
||||
elif is_data(operand):
|
||||
return operator(operand)
|
||||
else:
|
||||
return map(operator, operand)
|
||||
|
|
|
@ -5,14 +5,12 @@
|
|||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from mo_future import is_text, is_binary
|
||||
import re
|
||||
|
||||
from mo_future import text_type
|
||||
|
||||
from mo_logs import Log
|
||||
|
||||
keyword_pattern = re.compile(r"(\w|[\\.,$-])+(?:\.(\w|[\\.,$-])+)*")
|
||||
|
@ -23,7 +21,7 @@ def is_variable_name(value):
|
|||
Log.warning("not expected")
|
||||
return True
|
||||
|
||||
if not value or not isinstance(value, text_type):
|
||||
if not value or not is_text(value):
|
||||
return False # _a._b
|
||||
value = value.lstrip(".")
|
||||
if not value:
|
||||
|
|
|
@ -7,26 +7,24 @@
|
|||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from collections import Mapping
|
||||
from copy import copy
|
||||
|
||||
import jx_base
|
||||
from jx_base.dimensions import Dimension
|
||||
from jx_base.domains import Domain, SetDomain, DefaultDomain
|
||||
from jx_base.expressions import jx_expression, Expression, Variable, LeavesOp, ScriptOp, OffsetOp, TRUE, FALSE
|
||||
from jx_base.queries import is_variable_name
|
||||
from mo_dots import Data, relative_field, concat_field
|
||||
from mo_dots import coalesce, Null, set_default, unwraplist, literal_field
|
||||
from mo_dots import wrap, unwrap, listwrap
|
||||
from mo_dots.lists import FlatList
|
||||
from mo_future import text_type
|
||||
from mo_json.typed_encoder import untype_path, STRUCT
|
||||
from jx_base.domains import DefaultDomain, Domain, SetDomain
|
||||
from jx_base.expressions import Expression, FALSE, LeavesOp, QueryOp as QueryOp_, ScriptOp, TRUE, Variable, jx_expression
|
||||
from jx_base.utils import is_variable_name
|
||||
from jx_base.language import is_expression, is_op
|
||||
from mo_dots import Data, FlatList, Null, coalesce, concat_field, is_container, is_data, is_list, listwrap, literal_field, relative_field, set_default, unwrap, unwraplist, wrap
|
||||
from mo_future import is_text, text_type
|
||||
from mo_json import STRUCT
|
||||
from mo_json.typed_encoder import untype_path
|
||||
from mo_logs import Log
|
||||
from mo_math import AND, UNION, Math
|
||||
import mo_math
|
||||
from mo_math import AND, UNION, is_number
|
||||
|
||||
DEFAULT_LIMIT = 10
|
||||
MAX_LIMIT = 10000
|
||||
|
@ -47,8 +45,7 @@ def _late_import():
|
|||
_ = _Column
|
||||
|
||||
|
||||
|
||||
class QueryOp(Expression):
|
||||
class QueryOp(QueryOp_):
|
||||
__slots__ = ["frum", "select", "edges", "groupby", "where", "window", "sort", "limit", "having", "format", "isLean"]
|
||||
|
||||
# def __new__(cls, op=None, frum=None, select=None, edges=None, groupby=None, window=None, where=None, sort=None, limit=None, format=None):
|
||||
|
@ -57,11 +54,11 @@ class QueryOp(Expression):
|
|||
# setattr(output, s, None)
|
||||
# return output
|
||||
|
||||
def __init__(self, op, frum, select=None, edges=None, groupby=None, window=None, where=None, sort=None, limit=None, format=None):
|
||||
def __init__(self,frum, select=None, edges=None, groupby=None, window=None, where=None, sort=None, limit=None, format=None):
|
||||
if isinstance(frum, jx_base.Table):
|
||||
pass
|
||||
else:
|
||||
Expression.__init__(self, op, frum)
|
||||
Expression.__init__(self,frum)
|
||||
self.frum = frum
|
||||
self.select = select
|
||||
self.edges = edges
|
||||
|
@ -74,7 +71,7 @@ class QueryOp(Expression):
|
|||
|
||||
def __data__(self):
|
||||
def select___data__():
|
||||
if isinstance(self.select, list):
|
||||
if is_list(self.select):
|
||||
return [s.__data__() for s in self.select]
|
||||
else:
|
||||
return self.select.__data__()
|
||||
|
@ -103,16 +100,15 @@ class QueryOp(Expression):
|
|||
format=copy(self.format)
|
||||
)
|
||||
|
||||
|
||||
def vars(self, exclude_where=False, exclude_select=False):
|
||||
"""
|
||||
:return: variables in query
|
||||
"""
|
||||
def edges_get_all_vars(e):
|
||||
output = set()
|
||||
if isinstance(e.value, text_type):
|
||||
if is_text(e.value):
|
||||
output.add(e.value)
|
||||
if isinstance(e.value, Expression):
|
||||
if is_expression(e.value):
|
||||
output |= e.value.vars()
|
||||
if e.domain.key:
|
||||
output.add(e.domain.key)
|
||||
|
@ -180,13 +176,12 @@ class QueryOp(Expression):
|
|||
edge.range.max = e.range.max.map(map_)
|
||||
return edge
|
||||
|
||||
if isinstance(self.select, list):
|
||||
if is_list(self.select):
|
||||
select = wrap([map_select(s, map_) for s in self.select])
|
||||
else:
|
||||
select = map_select(self.select, map_)
|
||||
|
||||
return QueryOp(
|
||||
"from",
|
||||
frum=self.frum.map(map_),
|
||||
select=select,
|
||||
edges=wrap([map_edge(e, map_) for e in self.edges]),
|
||||
|
@ -206,17 +201,16 @@ class QueryOp(Expression):
|
|||
"""
|
||||
NORMALIZE QUERY SO IT CAN STILL BE JSON
|
||||
"""
|
||||
if isinstance(query, QueryOp) or query == None:
|
||||
if is_op(query, QueryOp) or query == None:
|
||||
return query
|
||||
|
||||
query = wrap(query)
|
||||
table = container.get_table(query['from'])
|
||||
schema = table.schema
|
||||
output = QueryOp(
|
||||
op="from",
|
||||
frum=table,
|
||||
format=query.format,
|
||||
limit=Math.min(MAX_LIMIT, coalesce(query.limit, DEFAULT_LIMIT))
|
||||
limit=mo_math.min(MAX_LIMIT, coalesce(query.limit, DEFAULT_LIMIT))
|
||||
)
|
||||
|
||||
if query.select or isinstance(query.select, (Mapping, list)):
|
||||
|
@ -243,7 +237,7 @@ class QueryOp(Expression):
|
|||
output.window = [_normalize_window(w) for w in listwrap(query.window)]
|
||||
output.having = None
|
||||
output.sort = _normalize_sort(query.sort)
|
||||
if not Math.is_integer(output.limit) or output.limit < 0:
|
||||
if not mo_math.is_integer(output.limit) or output.limit < 0:
|
||||
Log.error("Expecting limit >= 0")
|
||||
|
||||
output.isLean = query.isLean
|
||||
|
@ -263,7 +257,6 @@ class QueryOp(Expression):
|
|||
def column_names(self):
|
||||
return listwrap(self.select).name + self.edges.name + self.groupby.name
|
||||
|
||||
|
||||
def __getitem__(self, item):
|
||||
if item == "from":
|
||||
return self.frum
|
||||
|
@ -281,6 +274,7 @@ class QueryOp(Expression):
|
|||
|
||||
|
||||
canonical_aggregates = wrap({
|
||||
"cardinality": {"name":"cardinality", "default": 0},
|
||||
"count": {"name": "count", "default": 0},
|
||||
"min": {"name": "minimum"},
|
||||
"max": {"name": "maximum"},
|
||||
|
@ -292,14 +286,14 @@ canonical_aggregates = wrap({
|
|||
|
||||
def _normalize_selects(selects, frum, schema=None, ):
|
||||
if frum == None or isinstance(frum, (list, set, text_type)):
|
||||
if isinstance(selects, list):
|
||||
if is_list(selects):
|
||||
if len(selects) == 0:
|
||||
return Null
|
||||
else:
|
||||
output = [_normalize_select_no_context(s, schema=schema) for s in selects]
|
||||
else:
|
||||
return _normalize_select_no_context(selects, schema=schema)
|
||||
elif isinstance(selects, list):
|
||||
elif is_list(selects):
|
||||
output = [ss for s in selects for ss in _normalize_select(s, frum=frum, schema=schema)]
|
||||
else:
|
||||
output = _normalize_select(selects, frum, schema=schema)
|
||||
|
@ -322,7 +316,7 @@ def _normalize_select(select, frum, schema=None):
|
|||
if not _Column:
|
||||
_late_import()
|
||||
|
||||
if isinstance(select, text_type):
|
||||
if is_text(select):
|
||||
canonical = select = Data(value=select)
|
||||
else:
|
||||
select = wrap(select)
|
||||
|
@ -346,16 +340,16 @@ def _normalize_select(select, frum, schema=None):
|
|||
)
|
||||
for c in frum.get_leaves()
|
||||
])
|
||||
elif isinstance(select.value, text_type):
|
||||
elif is_text(select.value):
|
||||
if select.value.endswith(".*"):
|
||||
canonical.name = coalesce(select.name, ".")
|
||||
value = jx_expression(select[:-2], schema=schema)
|
||||
if not isinstance(value, Variable):
|
||||
if not is_op(value, Variable):
|
||||
Log.error("`*` over general expression not supported yet")
|
||||
output.append([
|
||||
set_default(
|
||||
{
|
||||
"value": LeavesOp("leaves", value, prefix=select.prefix),
|
||||
"value": LeavesOp(value, prefix=select.prefix),
|
||||
"format": "dict" # MARKUP FOR DECODING
|
||||
},
|
||||
canonical
|
||||
|
@ -383,7 +377,7 @@ def _normalize_select_no_context(select, schema=None):
|
|||
if not _Column:
|
||||
_late_import()
|
||||
|
||||
if isinstance(select, text_type):
|
||||
if is_text(select):
|
||||
select = Data(value=select)
|
||||
else:
|
||||
select = wrap(select)
|
||||
|
@ -395,24 +389,24 @@ def _normalize_select_no_context(select, schema=None):
|
|||
output.value = jx_expression(".", schema=schema)
|
||||
else:
|
||||
return Null
|
||||
elif isinstance(select.value, text_type):
|
||||
elif is_text(select.value):
|
||||
if select.value.endswith(".*"):
|
||||
name = select.value[:-2]
|
||||
name = select.value[:-2].lstrip(".")
|
||||
output.name = coalesce(select.name, name)
|
||||
output.value = LeavesOp("leaves", Variable(name), prefix=coalesce(select.prefix, name))
|
||||
output.value = LeavesOp(Variable(name), prefix=coalesce(select.prefix, name))
|
||||
else:
|
||||
if select.value == ".":
|
||||
output.name = coalesce(select.name, select.aggregate, ".")
|
||||
output.value = jx_expression(select.value, schema=schema)
|
||||
elif select.value == "*":
|
||||
output.name = coalesce(select.name, select.aggregate, ".")
|
||||
output.value = LeavesOp("leaves", Variable("."))
|
||||
output.value = LeavesOp(Variable("."))
|
||||
else:
|
||||
output.name = coalesce(select.name, select.value, select.aggregate)
|
||||
output.name = coalesce(select.name, select.value.lstrip("."), select.aggregate)
|
||||
output.value = jx_expression(select.value, schema=schema)
|
||||
elif isinstance(select.value, (int, float)):
|
||||
elif is_number(output.value):
|
||||
if not output.name:
|
||||
output.name = text_type(select.value)
|
||||
output.name = text_type(output.value)
|
||||
output.value = jx_expression(select.value, schema=schema)
|
||||
else:
|
||||
output.value = jx_expression(select.value, schema=schema)
|
||||
|
@ -441,18 +435,19 @@ def _normalize_edge(edge, dim_index, limit, schema=None):
|
|||
if not _Column:
|
||||
_late_import()
|
||||
|
||||
if edge == None:
|
||||
if not edge:
|
||||
Log.error("Edge has no value, or expression is empty")
|
||||
elif isinstance(edge, text_type):
|
||||
elif is_text(edge):
|
||||
if schema:
|
||||
leaves = unwraplist(list(schema.leaves(edge)))
|
||||
if not leaves or isinstance(leaves, (list, set)):
|
||||
if not leaves or is_container(leaves):
|
||||
return [
|
||||
Data(
|
||||
name=edge,
|
||||
value=jx_expression(edge, schema=schema),
|
||||
allowNulls=True,
|
||||
dim=dim_index
|
||||
dim=dim_index,
|
||||
domain=_normalize_domain(None, limit)
|
||||
)
|
||||
]
|
||||
elif isinstance(leaves, _Column):
|
||||
|
@ -463,7 +458,7 @@ def _normalize_edge(edge, dim_index, limit, schema=None):
|
|||
dim=dim_index,
|
||||
domain=_normalize_domain(domain=leaves, limit=limit, schema=schema)
|
||||
)]
|
||||
elif isinstance(leaves.fields, list) and len(leaves.fields) == 1:
|
||||
elif is_list(leaves.fields) and len(leaves.fields) == 1:
|
||||
return [Data(
|
||||
name=leaves.name,
|
||||
value=jx_expression(leaves.fields[0], schema=schema),
|
||||
|
@ -490,10 +485,10 @@ def _normalize_edge(edge, dim_index, limit, schema=None):
|
|||
]
|
||||
else:
|
||||
edge = wrap(edge)
|
||||
if not edge.name and not isinstance(edge.value, text_type):
|
||||
if not edge.name and not is_text(edge.value):
|
||||
Log.error("You must name compound and complex edges: {{edge}}", edge=edge)
|
||||
|
||||
if isinstance(edge.value, (list, set)) and not edge.domain:
|
||||
if is_container(edge.value) and not edge.domain:
|
||||
# COMPLEX EDGE IS SHORT HAND
|
||||
domain = _normalize_domain(schema=schema)
|
||||
domain.dimension = Data(fields=edge.value)
|
||||
|
@ -521,8 +516,10 @@ def _normalize_edge(edge, dim_index, limit, schema=None):
|
|||
def _normalize_groupby(groupby, limit, schema=None):
|
||||
if groupby == None:
|
||||
return None
|
||||
output = wrap([n for ie, e in enumerate(listwrap(groupby)) for n in _normalize_group(e, ie, limit, schema=schema) ])
|
||||
if any(o==None for o in output):
|
||||
output = wrap([n for e in listwrap(groupby) for n in _normalize_group(e, None, limit, schema=schema)])
|
||||
for i, o in enumerate(output):
|
||||
o.dim = i
|
||||
if any(o == None for o in output):
|
||||
Log.error("not expected")
|
||||
return output
|
||||
|
||||
|
@ -534,14 +531,14 @@ def _normalize_group(edge, dim_index, limit, schema=None):
|
|||
:param schema: for context
|
||||
:return: a normalized groupby
|
||||
"""
|
||||
if isinstance(edge, text_type):
|
||||
if is_text(edge):
|
||||
if edge.endswith(".*"):
|
||||
prefix = edge[:-2]
|
||||
if schema:
|
||||
output = wrap([
|
||||
{
|
||||
"name": concat_field(prefix, literal_field(relative_field(untype_path(c.names["."]), prefix))),
|
||||
"put": {"name": literal_field(untype_path(c.names["."]))},
|
||||
{ # BECASUE THIS IS A GROUPBY, EARLY SPLIT INTO LEAVES WORKS JUST FINE
|
||||
"name": concat_field(prefix, literal_field(relative_field(untype_path(c.name), prefix))),
|
||||
"put": {"name": literal_field(untype_path(c.name))},
|
||||
"value": jx_expression(c.es_column, schema=schema),
|
||||
"allowNulls": True,
|
||||
"domain": {"type": "default"}
|
||||
|
@ -553,7 +550,7 @@ def _normalize_group(edge, dim_index, limit, schema=None):
|
|||
return wrap([{
|
||||
"name": untype_path(prefix),
|
||||
"put": {"name": literal_field(untype_path(prefix))},
|
||||
"value": jx_expression(prefix, schema=schema),
|
||||
"value": LeavesOp(Variable(prefix)),
|
||||
"allowNulls": True,
|
||||
"dim":dim_index,
|
||||
"domain": {"type": "default"}
|
||||
|
@ -571,7 +568,7 @@ def _normalize_group(edge, dim_index, limit, schema=None):
|
|||
if (edge.domain and edge.domain.type != "default") or edge.allowNulls != None:
|
||||
Log.error("groupby does not accept complicated domains")
|
||||
|
||||
if not edge.name and not isinstance(edge.value, text_type):
|
||||
if not edge.name and not is_text(edge.value):
|
||||
Log.error("You must name compound edges: {{edge}}", edge= edge)
|
||||
|
||||
return wrap([{
|
||||
|
@ -593,7 +590,7 @@ def _normalize_domain(domain=None, limit=None, schema=None):
|
|||
return DefaultDomain(type="default", limit=limit)
|
||||
elif isinstance(domain, Dimension):
|
||||
return domain.getDomain()
|
||||
elif schema and isinstance(domain, text_type) and schema[domain]:
|
||||
elif schema and is_text(domain) and schema[domain]:
|
||||
return schema[domain].getDomain()
|
||||
elif isinstance(domain, Domain):
|
||||
return domain
|
||||
|
@ -613,7 +610,7 @@ def _normalize_window(window, schema=None):
|
|||
if hasattr(v, "__call__"):
|
||||
expr = v
|
||||
else:
|
||||
expr = ScriptOp("script", v)
|
||||
expr = ScriptOp(v)
|
||||
|
||||
return Data(
|
||||
name=coalesce(window.name, window.value),
|
||||
|
@ -653,7 +650,7 @@ def _map_term_using_schema(master, path, term, schema_edges):
|
|||
if isinstance(dimension, Dimension):
|
||||
domain = dimension.getDomain()
|
||||
if dimension.fields:
|
||||
if isinstance(dimension.fields, Mapping):
|
||||
if is_data(dimension.fields):
|
||||
# EXPECTING A TUPLE
|
||||
for local_field, es_field in dimension.fields.items():
|
||||
local_value = v[local_field]
|
||||
|
@ -696,7 +693,7 @@ def _map_term_using_schema(master, path, term, schema_edges):
|
|||
continue
|
||||
else:
|
||||
Log.error("not expected")
|
||||
elif isinstance(v, Mapping):
|
||||
elif is_data(v):
|
||||
sub = _map_term_using_schema(master, path + [k], v, schema_edges[k])
|
||||
output.append(sub)
|
||||
continue
|
||||
|
@ -710,7 +707,7 @@ def _where_terms(master, where, schema):
|
|||
USE THE SCHEMA TO CONVERT DIMENSION NAMES TO ES FILTERS
|
||||
master - TOP LEVEL WHERE (FOR PLACING NESTED FILTERS)
|
||||
"""
|
||||
if isinstance(where, Mapping):
|
||||
if is_data(where):
|
||||
if where.term:
|
||||
# MAP TERM
|
||||
try:
|
||||
|
@ -722,13 +719,13 @@ def _where_terms(master, where, schema):
|
|||
# MAP TERM
|
||||
output = FlatList()
|
||||
for k, v in where.terms.items():
|
||||
if not isinstance(v, (list, set)):
|
||||
if not is_container(v):
|
||||
Log.error("terms filter expects list of values")
|
||||
edge = schema.edges[k]
|
||||
if not edge:
|
||||
output.append({"terms": {k: v}})
|
||||
else:
|
||||
if isinstance(edge, text_type):
|
||||
if is_text(edge):
|
||||
# DIRECT FIELD REFERENCE
|
||||
return {"terms": {edge: v}}
|
||||
try:
|
||||
|
@ -736,7 +733,7 @@ def _where_terms(master, where, schema):
|
|||
except Exception as e:
|
||||
Log.error("programmer error", e)
|
||||
fields = domain.dimension.fields
|
||||
if isinstance(fields, Mapping):
|
||||
if is_data(fields):
|
||||
or_agg = []
|
||||
for vv in v:
|
||||
and_agg = []
|
||||
|
@ -746,7 +743,7 @@ def _where_terms(master, where, schema):
|
|||
and_agg.append({"term": {es_field: vvv}})
|
||||
or_agg.append({"and": and_agg})
|
||||
output.append({"or": or_agg})
|
||||
elif isinstance(fields, list) and len(fields) == 1 and is_variable_name(fields[0]):
|
||||
elif is_list(fields) and len(fields) == 1 and is_variable_name(fields[0]):
|
||||
output.append({"terms": {fields[0]: v}})
|
||||
elif domain.partitions:
|
||||
output.append({"or": [domain.getPartByKey(vv).esfilter for vv in v]})
|
||||
|
@ -770,19 +767,19 @@ def _normalize_sort(sort=None):
|
|||
|
||||
output = FlatList()
|
||||
for s in listwrap(sort):
|
||||
if isinstance(s, text_type):
|
||||
if is_text(s):
|
||||
output.append({"value": jx_expression(s), "sort": 1})
|
||||
elif isinstance(s, Expression):
|
||||
elif is_expression(s):
|
||||
output.append({"value": s, "sort": 1})
|
||||
elif Math.is_integer(s):
|
||||
output.append({"value": OffsetOp("offset", s), "sort": 1})
|
||||
elif mo_math.is_integer(s):
|
||||
output.append({"value": jx_expression({"offset": s}), "sort": 1})
|
||||
elif not s.sort and not s.value and all(d in sort_direction for d in s.values()):
|
||||
for v, d in s.items():
|
||||
output.append({"value": jx_expression(v), "sort": sort_direction[d]})
|
||||
elif not s.sort and not s.value:
|
||||
Log.error("`sort` clause must have a `value` property")
|
||||
else:
|
||||
output.append({"value": jx_expression(coalesce(s.value, s.field)), "sort": coalesce(sort_direction[s.sort], 1)})
|
||||
output.append({"value": jx_expression(coalesce(s.value, s.field)), "sort": sort_direction[s.sort]})
|
||||
return output
|
||||
|
||||
|
||||
|
@ -795,8 +792,7 @@ sort_direction = {
|
|||
1: 1,
|
||||
0: 0,
|
||||
-1: -1,
|
||||
None: 1,
|
||||
Null: 1
|
||||
None: 1
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -7,14 +7,14 @@
|
|||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from mo_future import is_text, is_binary
|
||||
from copy import copy
|
||||
|
||||
from mo_dots import Null, startswith_field, set_default, wrap
|
||||
from mo_json.typed_encoder import unnest_path, untype_path, STRUCT, EXISTS, OBJECT, NESTED
|
||||
from mo_dots import Null, relative_field, set_default, startswith_field, wrap
|
||||
from mo_json import EXISTS, NESTED, OBJECT, STRUCT
|
||||
from mo_json.typed_encoder import unnest_path, untype_path
|
||||
from mo_logs import Log
|
||||
|
||||
|
||||
|
@ -56,7 +56,7 @@ class Schema(object):
|
|||
:param column:
|
||||
:return: NAME OF column
|
||||
"""
|
||||
return column.names[self.query_path]
|
||||
return relative_field(column.name, query_path)
|
||||
|
||||
def values(self, name):
|
||||
"""
|
||||
|
@ -86,13 +86,13 @@ class Schema(object):
|
|||
full_name = self.query_path
|
||||
return set_default(
|
||||
{
|
||||
c.names[full_name]: c.es_column
|
||||
relative_field(c.name, full_name): c.es_column
|
||||
for k, cs in self.lookup.items()
|
||||
# if startswith_field(k, full_name)
|
||||
for c in cs if c.jx_type not in STRUCT
|
||||
},
|
||||
{
|
||||
c.names["."]: c.es_column
|
||||
c.name: c.es_column
|
||||
for k, cs in self.lookup.items()
|
||||
# if startswith_field(k, full_name)
|
||||
for c in cs if c.jx_type not in STRUCT
|
||||
|
@ -104,14 +104,13 @@ class Schema(object):
|
|||
return copy(self._columns)
|
||||
|
||||
|
||||
|
||||
def _indexer(columns, query_path):
|
||||
all_names = set(unnest_path(n) for c in columns for n in c.names.values()) | {"."}
|
||||
all_names = set(unnest_path(c.name) for c in columns) | {"."}
|
||||
|
||||
lookup_leaves = {} # ALL LEAF VARIABLES
|
||||
for full_name in all_names:
|
||||
for c in columns:
|
||||
cname = c.names[query_path]
|
||||
cname = relative_field(c.name, query_path)
|
||||
nfp = unnest_path(cname)
|
||||
if (
|
||||
startswith_field(nfp, full_name) and
|
||||
|
@ -126,7 +125,7 @@ def _indexer(columns, query_path):
|
|||
lookup_variables = {} # ALL NOT-NESTED VARIABLES
|
||||
for full_name in all_names:
|
||||
for c in columns:
|
||||
cname = c.names[query_path]
|
||||
cname = relative_field(c.name, query_path)
|
||||
nfp = unnest_path(cname)
|
||||
if (
|
||||
startswith_field(nfp, full_name) and
|
||||
|
@ -142,7 +141,7 @@ def _indexer(columns, query_path):
|
|||
relative_lookup = {}
|
||||
for c in columns:
|
||||
try:
|
||||
cname = c.names[query_path]
|
||||
cname = relative_field(c.name, query_path)
|
||||
cs = relative_lookup.setdefault(cname, set())
|
||||
cs.add(c)
|
||||
|
||||
|
|
|
@ -7,11 +7,10 @@
|
|||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
|
||||
from mo_future import is_text, is_binary
|
||||
class Snowflake(object):
|
||||
"""
|
||||
REPRESENT ONE ALIAS, AND ITS NESTED ARRAYS
|
||||
|
|
|
@ -7,11 +7,10 @@
|
|||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
|
||||
from mo_future import is_text, is_binary
|
||||
class Table(object):
|
||||
|
||||
def __init__(self, full_name):
|
||||
|
|
|
@ -0,0 +1,56 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from mo_future import is_text
|
||||
from mo_logs import Log
|
||||
|
||||
keyword_pattern = re.compile(r"(\w|[\\.,$-])+(?:\.(\w|[\\.,$-])+)*")
|
||||
|
||||
|
||||
def is_variable_name(value):
|
||||
if value.__class__.__name__ == "Variable":
|
||||
Log.warning("not expected")
|
||||
return True
|
||||
|
||||
if not value or not is_text(value):
|
||||
return False # _a._b
|
||||
value = value.lstrip(".")
|
||||
if not value:
|
||||
return True
|
||||
match = keyword_pattern.match(value)
|
||||
if not match:
|
||||
return False
|
||||
return match.group(0) == value
|
||||
|
||||
|
||||
def dequote(s):
|
||||
"""
|
||||
If a string has single or double quotes around it, remove them.
|
||||
Make sure the pair of quotes match.
|
||||
If a matching pair of quotes is not found, return the string unchanged.
|
||||
"""
|
||||
if (s[0] == s[-1]) and s.startswith(("'", '"')):
|
||||
return s[1:-1]
|
||||
return s
|
||||
|
||||
|
||||
def is_column_name(col):
|
||||
if re.match(r"(\$|\w|\\\.)+(?:\.(\$|\w|\\\.)+)*\.\$\w{6}$", col):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def get_property_name(s):
|
||||
if s == ".":
|
||||
return s
|
||||
else:
|
||||
return s.lstrip(".")
|
|
@ -0,0 +1,16 @@
|
|||
# `jx_elasticsearch`
|
||||
|
||||
This library implements [JSON Query Expressions]() atop an Elasticsearch.
|
||||
|
||||
|
||||
## Contribution
|
||||
|
||||
New, or old, versions of Elasticsearch should be added by copying the `es52` subdirectory, and altering the implementation to deal with the differences.
|
||||
|
||||
There are two directories in the git history that may help for old versions.
|
||||
|
||||
1. `es09` for Elasticsearch version 0.9.x (with MVEL scripting)
|
||||
2. `es14` is for any version 1.x variant of Elasticsearch (with Groovy scripting)
|
||||
|
||||
Both of these directories are too old to be used directly, but they do have code templates for their respective scripting language, and they do have other hints about how to construct queries with the limitations of the older versions.
|
||||
|
|
@ -7,10 +7,9 @@
|
|||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from mo_future import is_text, is_binary
|
||||
from jx_base.container import type2container
|
||||
from mo_files.url import URL
|
||||
from mo_kwargs import override
|
||||
|
|
|
@ -1,106 +0,0 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http:# mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from jx_base.expressions import Variable
|
||||
from jx_base.queries import is_variable_name
|
||||
from jx_elasticsearch import es09
|
||||
from jx_elasticsearch.es09.util import aggregates, fix_es_stats, build_es_query
|
||||
from jx_elasticsearch import post as es_post
|
||||
# from jx_elasticsearch.es52.expressions import Variable
|
||||
from jx_python.containers.cube import Cube
|
||||
from jx_python.expressions import jx_expression_to_function
|
||||
from mo_collections.matrix import Matrix
|
||||
from mo_dots import listwrap, unwrap, literal_field
|
||||
from mo_math import AND
|
||||
|
||||
|
||||
def is_aggop(query):
|
||||
if not query.edges:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def es_aggop(es, mvel, query):
|
||||
select = listwrap(query.select)
|
||||
FromES = build_es_query(query)
|
||||
|
||||
isSimple = AND(aggregates[s.aggregate] == "count" for s in select)
|
||||
if isSimple:
|
||||
return es_countop(es, query) # SIMPLE, USE TERMS FACET INSTEAD
|
||||
|
||||
|
||||
value2facet = dict() # ONLY ONE FACET NEEDED PER
|
||||
name2facet = dict() # MAP name TO FACET WITH STATS
|
||||
|
||||
for s in select:
|
||||
if s.value not in value2facet:
|
||||
if isinstance(s.value, Variable):
|
||||
unwrap(FromES.facets)[s.name] = {
|
||||
"statistical": {
|
||||
"field": s.value.var
|
||||
},
|
||||
"facet_filter": query.where.to_esfilter()
|
||||
}
|
||||
else:
|
||||
unwrap(FromES.facets)[s.name] = {
|
||||
"statistical": {
|
||||
"script": jx_expression_to_function(s.value)
|
||||
},
|
||||
"facet_filter": query.where.to_es_filter()
|
||||
}
|
||||
value2facet[s.value] = s.name
|
||||
name2facet[s.name] = value2facet[s.value]
|
||||
|
||||
data = es_post(es, FromES, query.limit)
|
||||
|
||||
matricies = {s.name: Matrix(value=fix_es_stats(data.facets[literal_field(s.name)])[aggregates[s.aggregate]]) for s in select}
|
||||
cube = Cube(query.select, [], matricies)
|
||||
cube.frum = query
|
||||
return cube
|
||||
|
||||
|
||||
|
||||
def es_countop(es, mvel, query):
|
||||
"""
|
||||
RETURN SINGLE COUNT
|
||||
"""
|
||||
select = listwrap(query.select)
|
||||
FromES = build_es_query(query)
|
||||
for s in select:
|
||||
|
||||
if is_variable_name(s.value):
|
||||
FromES.facets[s.name] = {
|
||||
"terms": {
|
||||
"field": s.value,
|
||||
"size": query.limit,
|
||||
},
|
||||
"facet_filter":{"exists":{"field":s.value}}
|
||||
}
|
||||
else:
|
||||
# COMPLICATED value IS PROBABLY A SCRIPT, USE IT
|
||||
FromES.facets[s.name] = {
|
||||
"terms": {
|
||||
"script_field": es09.expressions.compile_expression(s.value, query),
|
||||
"size": 200000
|
||||
}
|
||||
}
|
||||
|
||||
data = es_post(es, FromES, query.limit)
|
||||
|
||||
matricies = {}
|
||||
for s in select:
|
||||
matricies[s.name] = Matrix(value=data.hits.facets[s.name].total)
|
||||
|
||||
cube = Cube(query.select, query.edges, matricies)
|
||||
cube.frum = query
|
||||
return cube
|
|
@ -1,730 +0,0 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http:# mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import division
|
||||
from __future__ import absolute_import
|
||||
from collections import Mapping
|
||||
|
||||
from datetime import datetime
|
||||
import re
|
||||
|
||||
from jx_base.queries import keyword_pattern
|
||||
|
||||
from mo_future import text_type
|
||||
from pyLibrary import convert
|
||||
from mo_collections import reverse
|
||||
from mo_logs import Log
|
||||
from mo_logs.strings import quote
|
||||
from mo_math import Math
|
||||
from mo_dots import split_field, Data, Null, join_field, coalesce, listwrap
|
||||
from mo_times.durations import Duration
|
||||
|
||||
|
||||
class _MVEL(object):
|
||||
def __init__(self, fromData, isLean=False):
|
||||
self.fromData = fromData
|
||||
self.isLean = isLean
|
||||
self.prefixMap = []
|
||||
self.functions = {}
|
||||
|
||||
|
||||
def code(self, query):
|
||||
"""
|
||||
RETURN THE MVEL THAT WILL FILTER USING query.where AND TERM-PACK THE query.select CLAUSE
|
||||
"""
|
||||
selectList = listwrap(query.select)
|
||||
fromPath = query.frum.name # FIRST NAME IS THE INDEX
|
||||
sourceVar = "__sourcedoc__"
|
||||
whereClause = query.where
|
||||
|
||||
# PARSE THE fromPath
|
||||
code = self.frum(fromPath, sourceVar, "__loop")
|
||||
select = self.select(selectList, fromPath, "output", sourceVar)
|
||||
|
||||
body = "var output = \"\";\n" + \
|
||||
code.replace(
|
||||
"<CODE>",
|
||||
"if (" + _where(whereClause, lambda v: self._translate(v)) + "){\n" +
|
||||
select.body +
|
||||
"}\n"
|
||||
) + \
|
||||
"output\n"
|
||||
|
||||
# ADD REFERENCED CONTEXT VARIABLES
|
||||
context = self.getFrameVariables(body)
|
||||
|
||||
func = UID()
|
||||
predef = addFunctions(select.head+context+body).head
|
||||
param = "_source" if body.find(sourceVar) else ""
|
||||
|
||||
output = predef + \
|
||||
select.head + \
|
||||
context + \
|
||||
'var ' + func + ' = function('+sourceVar+'){\n' + \
|
||||
body + \
|
||||
'};\n' + \
|
||||
func + '('+param+')\n'
|
||||
|
||||
return Compiled(output)
|
||||
|
||||
def frum(self, fromPath, sourceVar, loopVariablePrefix):
|
||||
"""
|
||||
indexName NAME USED TO REFER TO HIGH LEVEL DOCUMENT
|
||||
loopVariablePrefix PREFIX FOR LOOP VARIABLES
|
||||
"""
|
||||
loopCode = "if (<PATH> != null){ for(<VAR> : <PATH>){\n<CODE>\n}}\n"
|
||||
self.prefixMap = []
|
||||
code = "<CODE>"
|
||||
path = split_field(fromPath)
|
||||
|
||||
# ADD LOCAL VARIABLES
|
||||
columns = INDEX_CACHE[path[0]].columns
|
||||
for i, c in enumerate(columns):
|
||||
if c.name.find("\\.") >= 0:
|
||||
self.prefixMap.insert(0, {
|
||||
"path": c.name,
|
||||
"variable": "get(" + sourceVar + ", \"" + c.name.replace("\\.", ".") + "\")"
|
||||
})
|
||||
else:
|
||||
self.prefixMap.insert(0, {
|
||||
"path": c.name,
|
||||
"variable": sourceVar + ".?" + c.name
|
||||
})
|
||||
|
||||
# ADD LOOP VARIABLES
|
||||
currPath = []
|
||||
# self.prefixMap.insert(0, {"path": path[0], "variable": path[0]})
|
||||
for i, step in enumerate(path[1::]):
|
||||
loopVariable = loopVariablePrefix + str(i)
|
||||
currPath.append(step)
|
||||
pathi = ".".join(currPath)
|
||||
shortPath = self._translate(pathi)
|
||||
self.prefixMap.insert(0, {"path": pathi, "variable": loopVariable})
|
||||
|
||||
loop = loopCode.replace("<VAR>", loopVariable).replace("<PATH>", shortPath)
|
||||
code = code.replace("<CODE>", loop)
|
||||
return code
|
||||
|
||||
def _translate(self, variableName):
|
||||
shortForm = variableName
|
||||
for p in self.prefixMap:
|
||||
prefix = p["path"]
|
||||
if shortForm == prefix:
|
||||
shortForm = p["variable"]
|
||||
else:
|
||||
shortForm = replacePrefix(shortForm, prefix + ".", p["variable"] + ".?") # ADD NULL CHECK
|
||||
shortForm = replacePrefix(shortForm, prefix + "[", p["variable"] + "[")
|
||||
return shortForm
|
||||
|
||||
# CREATE A PIPE DELIMITED RESULT SET
|
||||
def select(self, selectList, fromPath, varName, sourceVar):
|
||||
path = split_field(fromPath)
|
||||
is_deep = len(path) > 1
|
||||
heads = []
|
||||
list = []
|
||||
for s in selectList:
|
||||
if is_deep:
|
||||
if s.value and is_variable_name(s.value):
|
||||
shortForm = self._translate(s.value)
|
||||
list.append("Value2Pipe(" + shortForm + ")\n")
|
||||
else:
|
||||
Log.error("do not know how to handle yet")
|
||||
else:
|
||||
if s.value and is_variable_name(s.value):
|
||||
list.append("Value2Pipe(getDocValue(" + value2MVEL(s.value) + "))\n")
|
||||
elif s.value:
|
||||
shortForm = self._translate(s.value)
|
||||
list.append("Value2Pipe(" + shortForm + ")\n")
|
||||
else:
|
||||
code, decode = self.Parts2Term(s.domain)
|
||||
heads.append(code.head)
|
||||
list.append("Value2Pipe(" + code.body + ")\n")
|
||||
|
||||
|
||||
if len(split_field(fromPath)) > 1:
|
||||
output = 'if (' + varName + ' != "") ' + varName + '+="|";\n' + varName + '+=' + '+"|"+'.join(["Value2Pipe("+v+")\n" for v in list]) + ';\n'
|
||||
else:
|
||||
output = varName + ' = ' + '+"|"+'.join(["Value2Pipe("+v+")\n" for v in list]) + ';\n'
|
||||
|
||||
return Data(
|
||||
head="".join(heads),
|
||||
body=output
|
||||
)
|
||||
def Parts2Term(self, domain):
|
||||
"""
|
||||
TERMS ARE ALWAYS ESCAPED SO THEY CAN BE COMPOUNDED WITH PIPE (|)
|
||||
|
||||
CONVERT AN ARRAY OF PARTS{name, esfilter} TO AN MVEL EXPRESSION
|
||||
RETURN expression, function PAIR, WHERE
|
||||
expression - MVEL EXPRESSION
|
||||
function - TAKES RESULT OF expression AND RETURNS PART
|
||||
"""
|
||||
fields = domain.dimension.fields
|
||||
|
||||
term = []
|
||||
if len(split_field(self.fromData.name)) == 1 and fields:
|
||||
if isinstance(fields, Mapping):
|
||||
# CONVERT UNORDERED FIELD DEFS
|
||||
jx_fields, es_fields = transpose(*[(k, fields[k]) for k in sorted(fields.keys())])
|
||||
else:
|
||||
jx_fields, es_fields = transpose(*[(i, e) for i, e in enumerate(fields)])
|
||||
|
||||
# NO LOOPS BECAUSE QUERY IS SHALLOW
|
||||
# DOMAIN IS FROM A DIMENSION, USE IT'S FIELD DEFS TO PULL
|
||||
if len(es_fields) == 1:
|
||||
def fromTerm(term):
|
||||
return domain.getPartByKey(term)
|
||||
|
||||
return Data(
|
||||
head="",
|
||||
body='getDocValue('+quote(domain.dimension.fields[0])+')'
|
||||
), fromTerm
|
||||
else:
|
||||
def fromTerm(term):
|
||||
terms = [convert.pipe2value(t) for t in convert.pipe2value(term).split("|")]
|
||||
|
||||
candidate = dict(zip(jx_fields, terms))
|
||||
for p in domain.partitions:
|
||||
for k, t in candidate.items():
|
||||
if p.value[k] != t:
|
||||
break
|
||||
else:
|
||||
return p
|
||||
if domain.type in ["uid", "default"]:
|
||||
part = {"value": candidate}
|
||||
domain.partitions.append(part)
|
||||
return part
|
||||
else:
|
||||
return Null
|
||||
|
||||
for f in es_fields:
|
||||
term.append('Value2Pipe(getDocValue('+quote(f)+'))')
|
||||
|
||||
return Data(
|
||||
head="",
|
||||
body='Value2Pipe('+('+"|"+'.join(term))+')'
|
||||
), fromTerm
|
||||
else:
|
||||
for v in domain.partitions:
|
||||
term.append("if (" + _where(v.esfilter, lambda x: self._translate(x)) + ") " + value2MVEL(domain.getKey(v)) + "; else ")
|
||||
term.append(value2MVEL(domain.getKey(domain.NULL)))
|
||||
|
||||
func_name = "_temp"+UID()
|
||||
return self.register_function("+\"|\"+".join(term))
|
||||
|
||||
def Parts2TermScript(self, domain):
|
||||
code, decode = self.Parts2Term(domain)
|
||||
func = addFunctions(code.head + code.body)
|
||||
return func.head + code.head + code.body, decode
|
||||
|
||||
def getFrameVariables(self, body):
|
||||
contextVariables = []
|
||||
columns = self.fromData.columns
|
||||
|
||||
parentVarNames = set() # ALL PARENTS OF VARIABLES WITH "." IN NAME
|
||||
body = body.replace(".?", ".")
|
||||
|
||||
for i, c in enumerate(columns):
|
||||
j = body.find(c.name, 0)
|
||||
while j >= 0:
|
||||
s = j
|
||||
j = body.find(c.name, s + 1)
|
||||
|
||||
test0 = body[s - 1: s + len(c.name) + 1:]
|
||||
test3 = body[s - 8: s + len(c.name):]
|
||||
|
||||
if test0[:-1] == "\"" + c.name:
|
||||
continue
|
||||
if test3 == "_source." + c.name:
|
||||
continue
|
||||
|
||||
def defParent(name):
|
||||
# DO NOT MAKE THE SAME PARENT TWICE
|
||||
if name in parentVarNames:
|
||||
return
|
||||
parentVarNames.add(name)
|
||||
|
||||
if len(split_field(name)) == 1:
|
||||
contextVariables.append("Map " + name + " = new HashMap();\n")
|
||||
else:
|
||||
defParent(join_field(split_field(name)[0:-1]))
|
||||
contextVariables.append(name + " = new HashMap();\n")
|
||||
|
||||
body = body.replace(c.name, "-"*len(c.name))
|
||||
|
||||
if self.isLean or c.useSource:
|
||||
if len(split_field(c.name)) > 1:
|
||||
defParent(join_field(split_field(c.name)[0:-1]))
|
||||
contextVariables.append(c.name + " = getSourceValue(\"" + c.name + "\");\n")
|
||||
else:
|
||||
contextVariables.append(c.name + " = _source[\"" + c.name + "\"];\n")
|
||||
else:
|
||||
if len(split_field(c.name)) > 1:
|
||||
defParent(join_field(split_field(c.name)[0:-1]))
|
||||
contextVariables.append(c.name + " = getDocValue(\"" + c.name + "\");\n")
|
||||
else:
|
||||
contextVariables.append(c.name + " = getDocValue(\"" + c.name + "\");\n")
|
||||
break
|
||||
|
||||
return "".join(contextVariables)
|
||||
|
||||
def compile_expression(self, expression, constants=None):
|
||||
# EXPAND EXPRESSION WITH ANY CONSTANTS
|
||||
expression = setValues(expression, constants)
|
||||
|
||||
fromPath = self.fromData.name # FIRST NAME IS THE INDEX
|
||||
indexName = join_field(split_field(fromPath)[:1:])
|
||||
|
||||
context = self.getFrameVariables(expression)
|
||||
if context == "":
|
||||
return addFunctions(expression).head+expression
|
||||
|
||||
func = UID()
|
||||
code = addFunctions(context+expression)
|
||||
output = code.head + \
|
||||
'var ' + func + ' = function(' + indexName + '){\n' + \
|
||||
context + \
|
||||
expression + ";\n" + \
|
||||
'};\n' + \
|
||||
func + '(_source)\n'
|
||||
|
||||
return Compiled(output)
|
||||
|
||||
def register_function(self, code):
|
||||
for n, c in self.functions.items():
|
||||
if c == code:
|
||||
break
|
||||
else:
|
||||
n = "_temp" + UID()
|
||||
self.functions[n] = code
|
||||
|
||||
return Data(
|
||||
head='var ' + n + ' = function(){\n' + code + '\n};\n',
|
||||
body=n + '()\n'
|
||||
)
|
||||
|
||||
|
||||
class Compiled(object):
|
||||
def __init__(self, code):
|
||||
self.code=code
|
||||
|
||||
def __str__(self):
|
||||
return self.code
|
||||
|
||||
def __data__(self):
|
||||
return self.code
|
||||
|
||||
|
||||
|
||||
|
||||
__UID__ = 1000
|
||||
|
||||
|
||||
def UID():
|
||||
output = "_" + str(__UID__)
|
||||
globals()["__UID__"] += 1
|
||||
return output
|
||||
|
||||
|
||||
def setValues(expression, constants):
|
||||
if not constants:
|
||||
return expression
|
||||
|
||||
constants = constants.copy()
|
||||
|
||||
# EXPAND ALL CONSTANTS TO PRIMITIVE VALUES (MVEL CAN ONLY ACCEPT PRIMITIVE VALUES)
|
||||
for c in constants:
|
||||
value = c.value
|
||||
n = c.name
|
||||
if len(split_field(n)) >= 3:
|
||||
continue # DO NOT GO TOO DEEP
|
||||
if isinstance(value, list):
|
||||
continue # DO NOT MESS WITH ARRAYS
|
||||
|
||||
if isinstance(value, Mapping):
|
||||
for k, v in value.items():
|
||||
constants.append({"name": n + "." + k, "value": v})
|
||||
|
||||
for c in reverse(constants):# REVERSE ORDER, SO LONGER NAMES ARE TESTED FIRST
|
||||
s = 0
|
||||
while True:
|
||||
s = expression.find(c.name, s)
|
||||
if s == -1:
|
||||
break
|
||||
if re.match(r"\w", expression[s - 1]):
|
||||
break
|
||||
if re.match(r"\w", expression[s + len(c.name)]):
|
||||
break
|
||||
|
||||
v = value2MVEL(c.value)
|
||||
expression = expression[:s:] + "" + v + expression[:s + len(c.name):]
|
||||
|
||||
return expression
|
||||
|
||||
|
||||
def unpack_terms(facet, selects):
|
||||
# INTERPRET THE TERM-PACKED ES RESULTS AND RETURN DATA CUBE
|
||||
# ASSUME THE .term IS JSON OBJECT WITH ARRAY OF RESULT OBJECTS
|
||||
mod = len(selects)
|
||||
output = []
|
||||
for t in facet.terms:
|
||||
if t.term == "":
|
||||
continue # NO DATA
|
||||
value = []
|
||||
for i, v in enumerate(t.term.split("|")):
|
||||
value.append(convert.pipe2value(v))
|
||||
if ((i + 1) % mod) == 0:
|
||||
value.append(t.count)
|
||||
output.append(value)
|
||||
value = []
|
||||
|
||||
return output
|
||||
|
||||
|
||||
# PASS esFilter SIMPLIFIED ElasticSearch FILTER OBJECT
|
||||
# RETURN MVEL EXPRESSION
|
||||
def _where(esFilter, _translate):
|
||||
if not esFilter or esFilter is True:
|
||||
return "true"
|
||||
|
||||
keys = esFilter.keys()
|
||||
if len(keys) != 1:
|
||||
Log.error("Expecting only one filter aggregate")
|
||||
|
||||
op = keys[0]
|
||||
if op == "and":
|
||||
list = esFilter[op]
|
||||
if not (list):
|
||||
return "true"
|
||||
if len(list) == 1:
|
||||
return _where(list[0], _translate)
|
||||
output = "(" + " && ".join(_where(l, _translate) for l in list) + ")"
|
||||
return output
|
||||
elif op == "or":
|
||||
list = esFilter[op]
|
||||
if not list:
|
||||
return "false"
|
||||
if len(list) == 1:
|
||||
return _where(list[0], _translate)
|
||||
output = "(" + " || ".join(_where(l, _translate) for l in list) + ")"
|
||||
return output
|
||||
elif op == "not":
|
||||
return "!(" + _where(esFilter[op, _translate]) + ")"
|
||||
elif op == "term":
|
||||
pair = esFilter[op]
|
||||
if len(pair.keys()) == 1:
|
||||
return [_translate(k) + "==" + value2MVEL(v) for k, v in pair.items()][0]
|
||||
else:
|
||||
return "(" + " && ".join(_translate(k) + "==" + value2MVEL(v) for k, v in pair.items()) + ")"
|
||||
elif op == "terms":
|
||||
output = []
|
||||
for variableName, valueList in esFilter[op].items():
|
||||
if not valueList:
|
||||
Log.error("Expecting something in 'terms' array")
|
||||
if len(valueList) == 1:
|
||||
output.append(_translate(variableName) + "==" + value2MVEL(valueList[0]))
|
||||
else:
|
||||
output.append("(" + " || ".join(_translate(variableName) + "==" + value2MVEL(v) for v in valueList) + ")")
|
||||
return " && ".join(output)
|
||||
elif op == "exists":
|
||||
# "exists":{"field":"myField"}
|
||||
pair = esFilter[op]
|
||||
variableName = pair.field
|
||||
return "(" + _translate(variableName) + "!=null)"
|
||||
elif op == "missing":
|
||||
fieldName = _translate(esFilter[op].field)
|
||||
testExistence = coalesce(esFilter[op].existence, True)
|
||||
testNull = coalesce(esFilter[op].null_value, True)
|
||||
|
||||
output = []
|
||||
if testExistence and not testNull:
|
||||
output.append("(" + fieldName.replace(".?", ".") + " == empty)") # REMOVE THE .? SO WE REFER TO THE FIELD, NOT GET THE VALUE
|
||||
if testNull:
|
||||
output.append("(" + fieldName + "==null)")
|
||||
return " || ".join(output)
|
||||
elif op == "range":
|
||||
pair = esFilter[op]
|
||||
ranges = []
|
||||
|
||||
for variableName, r in pair.items():
|
||||
if r.gte:
|
||||
ranges.append(value2MVEL(r.gte) + "<=" + _translate(variableName))
|
||||
elif r.gt:
|
||||
ranges.append(value2MVEL(r.gt) + "<" + _translate(variableName))
|
||||
elif r["from"]:
|
||||
if r.include_lower == None or r.include_lower:
|
||||
ranges.append(value2MVEL(r["from"]) + "<=" + _translate(variableName))
|
||||
else:
|
||||
ranges.append(value2MVEL(r["from"]) + "<" + _translate(variableName))
|
||||
|
||||
if r.lte:
|
||||
ranges.append(value2MVEL(r.lte) + ">=" + _translate(variableName))
|
||||
elif r.lt:
|
||||
ranges.append(value2MVEL(r.lt) + ">" + _translate(variableName))
|
||||
elif r["from"]:
|
||||
if r.include_lower == None or r.include_lower:
|
||||
ranges.append(value2MVEL(r["from"]) + ">=" + _translate(variableName))
|
||||
else:
|
||||
ranges.append(value2MVEL(r["from"]) + ">" + _translate(variableName))
|
||||
|
||||
return "("+" && ".join(ranges)+")"
|
||||
|
||||
elif op == "script":
|
||||
script = esFilter[op].script
|
||||
return _translate(script)
|
||||
elif op == "prefix":
|
||||
pair = esFilter[op]
|
||||
variableName, value = pair.items()[0]
|
||||
return _translate(variableName) + ".startsWith(" + quote(value) + ")"
|
||||
elif op == "match_all":
|
||||
return "true"
|
||||
else:
|
||||
Log.error("'" + op + "' is an unknown aggregate")
|
||||
|
||||
return ""
|
||||
|
||||
|
||||
VAR_CHAR = "abcdefghijklmnopqurstvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_.\""
|
||||
|
||||
|
||||
|
||||
|
||||
def value2MVEL(value):
|
||||
"""
|
||||
FROM PYTHON VALUE TO MVEL EQUIVALENT
|
||||
"""
|
||||
if isinstance(value, datetime):
|
||||
return str(convert.datetime2milli(value)) + " /*" + value.format("yyNNNdd HHmmss") + "*/" # TIME
|
||||
if isinstance(value, Duration):
|
||||
return str(convert.timedelta2milli(value)) + " /*" + str(value) + "*/" # DURATION
|
||||
|
||||
if Math.is_number(value):
|
||||
return str(value)
|
||||
return quote(value)
|
||||
|
||||
# FROM PYTHON VALUE TO ES QUERY EQUIVALENT
|
||||
def value2query(value):
|
||||
if isinstance(value, datetime):
|
||||
return convert.datetime2milli(value)
|
||||
if isinstance(value, Duration):
|
||||
return value.milli
|
||||
|
||||
if Math.is_number(value):
|
||||
return value
|
||||
return quote(value)
|
||||
|
||||
|
||||
def value2value(value):
|
||||
"""
|
||||
CONVERT FROM PYTHON VALUE TO ES EQUIVALENT
|
||||
"""
|
||||
if isinstance(value, datetime):
|
||||
return convert.datetime2milli(value)
|
||||
if isinstance(value, Duration):
|
||||
return value.milli # DURATION
|
||||
return value
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def addFunctions(mvel):
|
||||
"""
|
||||
PREPEND THE REQUIRED MVEL FUNCTIONS TO THE CODE
|
||||
"""
|
||||
isAdded = Data() # SOME FUNCTIONS DEPEND ON OTHERS
|
||||
|
||||
head=[]
|
||||
body=mvel
|
||||
|
||||
keepAdding = True
|
||||
while keepAdding:
|
||||
keepAdding = False
|
||||
for func_name, func_code in FUNCTIONS.items():
|
||||
if isAdded[func_name]:
|
||||
continue
|
||||
if mvel.find(func_name) == -1:
|
||||
continue
|
||||
keepAdding = True
|
||||
isAdded[func_name] = func_code
|
||||
head.append(func_code)
|
||||
mvel = func_code + mvel
|
||||
return Data(
|
||||
head="".join(head),
|
||||
body=body
|
||||
)
|
||||
|
||||
|
||||
FUNCTIONS = {
|
||||
"String2Quote":
|
||||
"var String2Quote = function(str){\n" +
|
||||
"if (!(str is String)){ str; }else{\n" + # LAST VALUE IS RETURNED. "return" STOPS EXECUTION COMPLETELY!
|
||||
"" + value2MVEL("\"") + "+" +
|
||||
"str.replace(" + value2MVEL("\\") + "," + value2MVEL("\\\\") +
|
||||
").replace(" + value2MVEL("\"") + "," + value2MVEL("\\\"") +
|
||||
").replace(" + value2MVEL("\'") + "," + value2MVEL("\\\'") + ")+" +
|
||||
value2MVEL("\"") + ";\n" +
|
||||
"}};\n",
|
||||
|
||||
"Value2Pipe":
|
||||
'var Value2Pipe = function(value){\n' + # SPACES ARE IMPORTANT BETWEEN "=".
|
||||
"if (value==null){ \"0\" }else " +
|
||||
"if (value is ArrayList || value is org.elasticsearch.common.mvel2.util.FastList){" +
|
||||
"var out = \"\";\n" +
|
||||
"foreach (v : value) out = (out==\"\") ? v : out + \"|\" + Value2Pipe(v);\n" +
|
||||
"'a'+Value2Pipe(out);\n" +
|
||||
"}else \n" +
|
||||
"if (value is Long || value is Integer || value is Double){ 'n'+value; }else \n" +
|
||||
"if (!(value is String)){ 's'+value.getClass().getName(); }else \n" +
|
||||
'"s"+value.replace("\\\\", "\\\\\\\\").replace("|", "\\\\p");' + # CAN NOT value TO MAKE NUMBER A STRING (OR EVEN TO PREPEND A STRING!)
|
||||
"};\n",
|
||||
|
||||
# "replaceAll":
|
||||
# "var replaceAll = function(output, find, replace){\n" +
|
||||
# "if (output.length()==0) return output;\n"+
|
||||
# "s = output.indexOf(find, 0);\n" +
|
||||
# "while(s>=0){\n" +
|
||||
# "output=output.replace(find, replace);\n" +
|
||||
# "s=s-find.length()+replace.length();\n" +
|
||||
# "s = output.indexOf(find, s);\n" +
|
||||
# "}\n"+
|
||||
# "output;\n"+
|
||||
# '};\n',
|
||||
|
||||
"floorDay":
|
||||
"var floorDay = function(value){ Math.floor(value/86400000))*86400000;};\n",
|
||||
|
||||
"floorInterval":
|
||||
"var floorInterval = function(value, interval){ Math.floor((double)value/(double)interval)*interval;};\n",
|
||||
|
||||
"maximum": # JUST BECAUSE MVEL'S MAX ONLY USES MAX(int, int). G*DDA*NIT!
|
||||
"var maximum = function(a, b){if (a==null) b; else if (b==null) a; else if (a>b) a; else b;\n};\n",
|
||||
|
||||
"minimum": # JUST BECAUSE MVEL'S MAX ONLY USES MAX(int, int). G*DDA*NIT!
|
||||
"var minimum = function(a, b){if (a==null) b; else if (b==null) a; else if (a<b) a; else b;\n};\n",
|
||||
|
||||
"coalesce": # PICK FIRST NOT-NULL VALUE
|
||||
"var coalesce = function(a, b){if (a==null) b; else a; \n};\n",
|
||||
|
||||
"zero2null": # ES MAKES IT DIFFICULT TO DETECT NULL/MISSING VALUES, BUT WHEN DEALING WITH NUMBERS, ES DEFAULTS TO RETURNING ZERO FOR missing VALUES!!
|
||||
"var zero2null = function(a){if (a==0) null; else a; \n};\n",
|
||||
|
||||
"get": # MY OWN PERSONAL *FU* TO THE TWISTED MVEL PROPERTY ACCESS
|
||||
"var get = function(hash, key){\n" +
|
||||
"if (hash==null) null; else hash[key];\n" +
|
||||
"};\n",
|
||||
|
||||
"isNumeric":
|
||||
"var isNumeric = function(value){\n" +
|
||||
"value = value + \"\";\n" +
|
||||
# "try{ value-0; }catch(e){ 0; }"+
|
||||
"var isNum = value.length()>0;\n" +
|
||||
"for (v : value.toCharArray()){\n" +
|
||||
"if (\"0123456789\".indexOf(v)==-1) isNum = false;\n" +
|
||||
"};\n" +
|
||||
"isNum;\n" +
|
||||
"};\n",
|
||||
|
||||
"alpha2zero":
|
||||
"var alpha2zero = function(value){\n" +
|
||||
"var output = 0;\n" +
|
||||
"if (isNumeric(value)) output = value-0;\n" +
|
||||
"return output;" +
|
||||
"};\n",
|
||||
|
||||
# KANBAN SOFTWARE
|
||||
# CAN SEE QUEUE BLOCKAGES AND SEE SINGLE BLOCKERS
|
||||
|
||||
|
||||
"concat":
|
||||
"var concat = function(array){\n" +
|
||||
"if (array==null) \"\"; else {\n" +
|
||||
"var output = \"\";\n" +
|
||||
"for (v : array){ output = output+\"|\"+v+\"|\"; };\n" +
|
||||
"output;\n" +
|
||||
"}};\n",
|
||||
|
||||
# "contains":
|
||||
# "var contains = function(array, value){\n"+
|
||||
# "if (array==null) false; else {\n"+
|
||||
# "var good = false;\n"+
|
||||
# "for (v : array){ if (v==value) good=true; };\n"+
|
||||
# "good;\n"+
|
||||
# "}};\n",
|
||||
|
||||
"getFlagValue": # SPECIFICALLY FOR cf_* FLAGS: CONCATENATE THE ATTRIBUTE NAME WITH ATTRIBUTE VALUE, IF EXISTS
|
||||
"var getFlagValue = function(name){\n" +
|
||||
"if (_source[name]!=null)" +
|
||||
"\" \"+name+_source[name];\n" +
|
||||
"else \n" +
|
||||
"\"\";\n" +
|
||||
"};\n",
|
||||
|
||||
"getDocValue":
|
||||
"var getDocValue = function(name){\n" +
|
||||
"var out = [];\n" +
|
||||
"var v = doc[name];\n" +
|
||||
# "if (v is org.elasticsearch.common.mvel2.ast.Function) v = v();=n" +
|
||||
"if (v==null || v.value==null) { null; } else\n" +
|
||||
"if (v.values.size()<=1){ v.value; } else\n" + # ES MAKES NO DISTINCTION BETWEEN v or [v], SO NEITHER DO I
|
||||
"{for(k : v.values) out.add(k); out;}" +
|
||||
"};\n",
|
||||
|
||||
"getSourceValue":
|
||||
"var getSourceValue = function(name){\n" +
|
||||
"var out = [];\n" +
|
||||
"var v = _source[name];\n" +
|
||||
# "if (v is org.elasticsearch.common.mvel2.ast.Function) v = v();=n" +
|
||||
"if (v==null) { null; } else\n" +
|
||||
"if (v[\"values\"]==null || v.values.size()<=1){ v.value; } else {\n" + # ES MAKES NO DISTINCTION BETWEEN v or [v], SO NEITHER DO I
|
||||
"for(k : v) out.add(k); out;\n" + # .size() MUST BE USED INSTEAD OF .length, THE LATTER WILL CRASH IF JITTED (https://github.com/elasticsearch/elasticsearch/issues/3094)
|
||||
"}};\n",
|
||||
|
||||
"getDocArray":
|
||||
"var getDocArray = function(name){\n" +
|
||||
"var out = [];\n" +
|
||||
"var v = doc[name];\n" +
|
||||
"if (v!=null && v.value!=null) for(k : v.values) out.add(k);" +
|
||||
"out;" +
|
||||
"};\n",
|
||||
|
||||
|
||||
"milli2Month":
|
||||
"var milli2Month = function(value, milliOffset){\n" +
|
||||
"g=new java.util.GregorianCalendar(new java.util.SimpleTimeZone(0, \"GMT\"));\n" +
|
||||
"g.setTimeInMillis(value);\n" +
|
||||
"g.add(java.util.GregorianCalendar.MILLISECOND, -milliOffset);\n" +
|
||||
"m = g.get(java.util.GregorianCalendar.MONTH);\n" +
|
||||
"output = \"\"+g.get(java.util.GregorianCalendar.YEAR)+(m>9?\"\":\"0\")+m;\n" +
|
||||
"output;\n" +
|
||||
"};\n",
|
||||
|
||||
"between":
|
||||
"var between = function(value, prefix, suffix){\n" +
|
||||
"if (value==null){ null; }else{\n" +
|
||||
"var start = value.indexOf(prefix, 0);\n" +
|
||||
"if (start==-1){ null; }else{\n" +
|
||||
"var end = value.indexOf(suffix, start+prefix.length());\n" +
|
||||
"if (end==-1){ null; }else{\n" +
|
||||
"value.substring(start+prefix.length(), end);\n" +
|
||||
"}}}\n" +
|
||||
"};\n"
|
||||
}
|
||||
|
||||
|
||||
def replacePrefix(value, prefix, new_prefix):
|
||||
try:
|
||||
if value.startswith(prefix):
|
||||
return new_prefix+value[len(prefix)::]
|
||||
return value
|
||||
except Exception as e:
|
||||
Log.error("can not replace prefix", e)
|
|
@ -1,248 +0,0 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http:# mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from collections import Mapping
|
||||
|
||||
from jx_base import domains
|
||||
from jx_base.expressions import TRUE, jx_expression, Variable, LeavesOp
|
||||
from jx_base.queries import is_variable_name
|
||||
from jx_elasticsearch import es09
|
||||
from jx_elasticsearch.es09.expressions import unpack_terms
|
||||
from jx_elasticsearch.es09.util import aggregates
|
||||
from jx_elasticsearch import post as es_post
|
||||
from jx_python.containers.cube import Cube
|
||||
from mo_collections.matrix import Matrix
|
||||
from mo_dots import coalesce, split_field, Data, wrap
|
||||
from mo_dots import listwrap, unwrap
|
||||
from mo_dots.lists import FlatList
|
||||
from mo_logs import Log
|
||||
from mo_math import AND, SUM, OR
|
||||
|
||||
|
||||
def is_fieldop(query):
|
||||
# THESE SMOOTH EDGES REQUIRE ALL DATA (SETOP)
|
||||
|
||||
select = listwrap(query.select)
|
||||
if not query.edges:
|
||||
isDeep = len(split_field(query.frum.name)) > 1 # LOOKING INTO NESTED WILL REQUIRE A SCRIPT
|
||||
isSimple = AND(s.value != None and (s.value == "*" or is_variable_name(s.value)) for s in select)
|
||||
noAgg = AND(s.aggregate == "none" for s in select)
|
||||
|
||||
if not isDeep and isSimple and noAgg:
|
||||
return True
|
||||
else:
|
||||
isSmooth = AND((e.domain.type in domains.ALGEBRAIC and e.domain.interval == "none") for e in query.edges)
|
||||
if isSmooth:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def es_fieldop(es, query):
|
||||
FromES = es09.util.build_es_query(query)
|
||||
select = listwrap(query.select)
|
||||
FromES.query = {
|
||||
"bool": {
|
||||
"query": {
|
||||
"match_all": {}
|
||||
},
|
||||
"filter": jx_expression(query.where).to_esfilter()
|
||||
}
|
||||
}
|
||||
FromES.size = coalesce(query.limit, 200000)
|
||||
FromES.fields = FlatList()
|
||||
for s in select.value:
|
||||
if s == "*":
|
||||
FromES.fields = None
|
||||
elif isinstance(s, list):
|
||||
FromES.fields.extend(s)
|
||||
elif isinstance(s, Mapping):
|
||||
FromES.fields.extend(s.values())
|
||||
else:
|
||||
FromES.fields.append(s)
|
||||
FromES.sort = [{s.field: "asc" if s.sort >= 0 else "desc"} for s in query.sort]
|
||||
|
||||
data = es_post(es, FromES, query.limit)
|
||||
|
||||
T = data.hits.hits
|
||||
matricies = {}
|
||||
for s in select:
|
||||
if s.value == "*":
|
||||
matricies[s.name] = Matrix.wrap([t._source for t in T])
|
||||
elif isinstance(s.value, Mapping):
|
||||
# for k, v in s.value.items():
|
||||
# matricies[join_field(split_field(s.name)+[k])] = Matrix.wrap([unwrap(t.fields)[v] for t in T])
|
||||
matricies[s.name] = Matrix.wrap([{k: unwrap(t.fields).get(v, None) for k, v in s.value.items()}for t in T])
|
||||
elif isinstance(s.value, list):
|
||||
matricies[s.name] = Matrix.wrap([tuple(unwrap(t.fields).get(ss, None) for ss in s.value) for t in T])
|
||||
elif not s.value:
|
||||
matricies[s.name] = Matrix.wrap([unwrap(t.fields).get(s.value, None) for t in T])
|
||||
else:
|
||||
try:
|
||||
matricies[s.name] = Matrix.wrap([unwrap(t.fields).get(s.value, None) for t in T])
|
||||
except Exception as e:
|
||||
Log.error("", e)
|
||||
|
||||
cube = Cube(query.select, query.edges, matricies, frum=query)
|
||||
cube.frum = query
|
||||
return cube
|
||||
|
||||
|
||||
def is_setop(query):
|
||||
select = listwrap(query.select)
|
||||
|
||||
if not query.edges:
|
||||
isDeep = len(split_field(query.frum.name)) > 1 # LOOKING INTO NESTED WILL REQUIRE A SCRIPT
|
||||
simpleAgg = AND([s.aggregate in ("count", "none") for s in select]) # CONVERTING esfilter DEFINED PARTS WILL REQUIRE SCRIPT
|
||||
|
||||
# NO EDGES IMPLIES SIMPLER QUERIES: EITHER A SET OPERATION, OR RETURN SINGLE AGGREGATE
|
||||
if simpleAgg or isDeep:
|
||||
return True
|
||||
else:
|
||||
isSmooth = AND((e.domain.type in domains.ALGEBRAIC and e.domain.interval == "none") for e in query.edges)
|
||||
if isSmooth:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def es_setop(es, mvel, query):
|
||||
FromES = es09.util.build_es_query(query)
|
||||
select = listwrap(query.select)
|
||||
|
||||
isDeep = len(split_field(query.frum.name)) > 1 # LOOKING INTO NESTED WILL REQUIRE A SCRIPT
|
||||
isComplex = OR([s.value == None and s.aggregate not in ("count", "none") for s in select]) # CONVERTING esfilter DEFINED PARTS WILL REQUIRE SCRIPT
|
||||
|
||||
if not isDeep and not isComplex:
|
||||
if len(select) == 1 and isinstance(select[0].value, LeavesOp):
|
||||
FromES = wrap({
|
||||
"query": {"bool": {
|
||||
"query": {"match_all": {}},
|
||||
"filter": query.where.to_esfilter()
|
||||
}},
|
||||
"sort": query.sort,
|
||||
"size": 0
|
||||
})
|
||||
elif all(isinstance(v, Variable) for v in select.value):
|
||||
FromES = wrap({
|
||||
"query": {"bool": {
|
||||
"query": {"match_all": {}},
|
||||
"filter": query.where.to_esfilter()
|
||||
}},
|
||||
"fields": select.value,
|
||||
"sort": query.sort,
|
||||
"size": coalesce(query.limit, 200000)
|
||||
})
|
||||
elif not isDeep:
|
||||
simple_query = query.copy()
|
||||
simple_query.where = TRUE # THE FACET FILTER IS FASTER
|
||||
FromES.facets.mvel = {
|
||||
"terms": {
|
||||
"script_field": mvel.code(simple_query),
|
||||
"size": coalesce(simple_query.limit, 200000)
|
||||
},
|
||||
"facet_filter": jx_expression(query.where).to_esfilter()
|
||||
}
|
||||
else:
|
||||
FromES.facets.mvel = {
|
||||
"terms": {
|
||||
"script_field": mvel.code(query),
|
||||
"size": coalesce(query.limit, 200000)
|
||||
},
|
||||
"facet_filter": jx_expression(query.where).to_esfilter()
|
||||
}
|
||||
|
||||
data = es_post(es, FromES, query.limit)
|
||||
|
||||
if len(select) == 1 and isinstance(select[0].value, LeavesOp):
|
||||
# SPECIAL CASE FOR SINGLE COUNT
|
||||
cube = wrap(data).hits.hits._source
|
||||
elif isinstance(select[0].value, Variable):
|
||||
# SPECIAL CASE FOR SINGLE TERM
|
||||
cube = wrap(data).hits.hits.fields
|
||||
else:
|
||||
data_list = unpack_terms(data.facets.mvel, select)
|
||||
if not data_list:
|
||||
cube = Cube(select, [], {s.name: Matrix.wrap([]) for s in select})
|
||||
else:
|
||||
output = transpose(*data_list)
|
||||
cube = Cube(select, [], {s.name: Matrix(list=output[i]) for i, s in enumerate(select)})
|
||||
|
||||
return Data(
|
||||
meta={"esquery": FromES},
|
||||
data=cube
|
||||
)
|
||||
|
||||
|
||||
def is_deep(query):
|
||||
select = listwrap(query.select)
|
||||
if len(select) > 1:
|
||||
return False
|
||||
|
||||
if aggregates[select[0].aggregate] not in ("none", "count"):
|
||||
return False
|
||||
|
||||
if len(query.edges)<=1:
|
||||
return False
|
||||
|
||||
isDeep = len(split_field(query["from"].name)) > 1 # LOOKING INTO NESTED WILL REQUIRE A SCRIPT
|
||||
if not isDeep:
|
||||
return False # BETTER TO USE TERM QUERY
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def es_deepop(es, mvel, query):
|
||||
FromES = es09.util.build_es_query(query)
|
||||
|
||||
select = query.edges
|
||||
|
||||
temp_query = query.copy()
|
||||
temp_query.select = select
|
||||
temp_query.edges = FlatList()
|
||||
FromES.facets.mvel = {
|
||||
"terms": {
|
||||
"script_field": mvel.code(temp_query),
|
||||
"size": query.limit
|
||||
},
|
||||
"facet_filter": jx_expression(query.where).to_esfilter()
|
||||
}
|
||||
|
||||
data = es_post(es, FromES, query.limit)
|
||||
|
||||
rows = unpack_terms(data.facets.mvel, query.edges)
|
||||
terms = transpose(*rows)
|
||||
|
||||
# NUMBER ALL EDGES FOR JSON EXPRESSION INDEXING
|
||||
edges = query.edges
|
||||
for f, e in enumerate(edges):
|
||||
for r in terms[f]:
|
||||
e.domain.getPartByKey(r)
|
||||
|
||||
e.index = f
|
||||
for p, part in enumerate(e.domain.partitions):
|
||||
part.dataIndex = p
|
||||
e.domain.NULL.dataIndex = len(e.domain.partitions)
|
||||
|
||||
# MAKE CUBE
|
||||
dims = [len(e.domain.partitions) for e in query.edges]
|
||||
output = Matrix(*dims)
|
||||
|
||||
# FILL CUBE
|
||||
for r in rows:
|
||||
term_coord = [e.domain.getPartByKey(r[i]).dataIndex for i, e in enumerate(edges)]
|
||||
output[term_coord] = SUM(output[term_coord], r[-1])
|
||||
|
||||
cube = Cube(query.select, query.edges, {query.select.name: output})
|
||||
cube.frum = query
|
||||
return cube
|
|
@ -1,152 +0,0 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http:# mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from jx_elasticsearch.es09.util import aggregates, build_es_query, compileEdges2Term
|
||||
from jx_elasticsearch import post as es_post
|
||||
from jx_python import jx
|
||||
from jx_python.containers.cube import Cube
|
||||
from mo_collections.matrix import Matrix
|
||||
from mo_dots import coalesce
|
||||
from mo_dots import wrap, listwrap
|
||||
from mo_dots.lists import FlatList
|
||||
from mo_math import AND
|
||||
|
||||
|
||||
def is_terms(query):
|
||||
select = listwrap(query.select)
|
||||
|
||||
isSimple = not query.select or AND(aggregates[s.aggregate] in ("none", "count") for s in select)
|
||||
if isSimple:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def es_terms(es, mvel, query):
|
||||
"""
|
||||
RETURN LIST OF ALL EDGE QUERIES
|
||||
|
||||
EVERY FACET IS NAMED <select.name>, <c1>, ... <cN> WHERE <ci> ARE THE ELEMENT COORDINATES
|
||||
WE TRY TO PACK DIMENSIONS INTO THE TERMS TO MINIMIZE THE CROSS-PRODUCT EXPLOSION
|
||||
"""
|
||||
if len(query.edges) == 2:
|
||||
return _es_terms2(es, mvel, query)
|
||||
|
||||
select = listwrap(query.select)
|
||||
FromES = build_es_query(query)
|
||||
packed_term = compileEdges2Term(mvel, query.edges, wrap([]))
|
||||
for s in select:
|
||||
FromES.facets[s.name] = {
|
||||
"terms": {
|
||||
"field": packed_term.field,
|
||||
"script_field": packed_term.expression,
|
||||
"size": coalesce(query.limit, 200000)
|
||||
},
|
||||
"facet_filter": simplify_esfilter(query.where)
|
||||
}
|
||||
|
||||
term2Parts = packed_term.term2parts
|
||||
|
||||
data = es_post(es, FromES, query.limit)
|
||||
|
||||
# GETTING ALL PARTS WILL EXPAND THE EDGES' DOMAINS
|
||||
# BUT HOW TO UNPACK IT FROM THE term FASTER IS UNKNOWN
|
||||
for k, f in data.facets.items():
|
||||
for t in f.terms:
|
||||
term2Parts(t.term)
|
||||
|
||||
# NUMBER ALL EDGES FOR jx INDEXING
|
||||
for f, e in enumerate(query.edges):
|
||||
e.index = f
|
||||
if e.domain.type in ["uid", "default"]:
|
||||
# e.domain.partitions = jx.sort(e.domain.partitions, "value")
|
||||
for p, part in enumerate(e.domain.partitions):
|
||||
part.dataIndex = p
|
||||
e.domain.NULL.dataIndex = len(e.domain.partitions)
|
||||
|
||||
# MAKE CUBE
|
||||
output = {}
|
||||
dims = [len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges]
|
||||
for s in select:
|
||||
output[s.name] = Matrix(*dims)
|
||||
|
||||
# FILL CUBE
|
||||
# EXPECTING ONLY SELECT CLAUSE FACETS
|
||||
for facetName, facet in data.facets.items():
|
||||
for term in facet.terms:
|
||||
term_coord = term2Parts(term.term).dataIndex
|
||||
for s in select:
|
||||
try:
|
||||
output[s.name][term_coord] = term[aggregates[s.aggregate]]
|
||||
except Exception as e:
|
||||
# USUALLY CAUSED BY output[s.name] NOT BEING BIG ENOUGH TO HANDLE NULL COUNTS
|
||||
pass
|
||||
cube = Cube(query.select, query.edges, output)
|
||||
cube.query = query
|
||||
return cube
|
||||
|
||||
|
||||
def _es_terms2(es, mvel, query):
|
||||
"""
|
||||
WE ASSUME THERE ARE JUST TWO EDGES, AND EACH HAS A SIMPLE value
|
||||
"""
|
||||
|
||||
# REQUEST VALUES IN FIRST DIMENSION
|
||||
q1 = query.copy()
|
||||
q1.edges = query.edges[0:1:]
|
||||
values1 = es_terms(es, mvel, q1).edges[0].domain.partitions.value
|
||||
|
||||
select = listwrap(query.select)
|
||||
FromES = build_es_query(query)
|
||||
for s in select:
|
||||
for i, v in enumerate(values1):
|
||||
FromES.facets[s.name + "," + str(i)] = {
|
||||
"terms": {
|
||||
"field": query.edges[1].value,
|
||||
"size": coalesce(query.limit, 200000)
|
||||
},
|
||||
"facet_filter": simplify_esfilter({"and": [
|
||||
query.where,
|
||||
{"term": {query.edges[0].value: v}}
|
||||
]})
|
||||
}
|
||||
|
||||
data = es_post(es, FromES, query.limit)
|
||||
|
||||
# UNION ALL TERMS FROM SECOND DIMENSION
|
||||
values2 = set()
|
||||
for k, f in data.facets.items():
|
||||
values2.update(f.terms.term)
|
||||
values2 = jx.sort(values2)
|
||||
term2index = {v: i for i, v in enumerate(values2)}
|
||||
query.edges[1].domain.partitions = FlatList([{"name": v, "value": v} for v in values2])
|
||||
|
||||
# MAKE CUBE
|
||||
output = {}
|
||||
dims = [len(values1), len(values2)]
|
||||
for s in select:
|
||||
output[s.name] = Matrix(*dims)
|
||||
|
||||
# FILL CUBE
|
||||
# EXPECTING ONLY SELECT CLAUSE FACETS
|
||||
for facetName, facet in data.facets.items():
|
||||
coord = facetName.split(",")
|
||||
s = [s for s in select if s.name == coord[0]][0]
|
||||
i1 = int(coord[1])
|
||||
for term in facet.terms:
|
||||
i2 = term2index[term.term]
|
||||
output[s.name][(i1, i2)] = term[aggregates[s.aggregate]]
|
||||
|
||||
cube = Cube(query.select, query.edges, output)
|
||||
cube.query = query
|
||||
return cube
|
||||
|
|
@ -1,337 +0,0 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http:# mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from jx_base.queries import is_variable_name
|
||||
from jx_elasticsearch.es09.expressions import UID
|
||||
from jx_elasticsearch.es09.util import aggregates, build_es_query, compileEdges2Term
|
||||
from jx_python import domains
|
||||
from jx_python import es09
|
||||
from jx_python.containers.cube import Cube
|
||||
from jx_python.expressions import simplify_esfilter
|
||||
from mo_collections.matrix import Matrix
|
||||
from mo_dots import literal_field, coalesce
|
||||
from mo_dots import wrap, listwrap
|
||||
from mo_dots.lists import FlatList
|
||||
from mo_logs import Log
|
||||
from mo_math import COUNT, PRODUCT
|
||||
|
||||
|
||||
def is_terms_stats(query):
|
||||
# ONLY ALLOWED ONE UNKNOWN DOMAIN
|
||||
num_unknown = COUNT(1 for e in query.edges if e.domain.type not in domains.KNOWN)
|
||||
|
||||
if num_unknown <= 1:
|
||||
if query.sort:
|
||||
Log.error("terms_stats can not be sorted")
|
||||
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def es_terms_stats(esq, mvel, query):
|
||||
select = listwrap(query.select)
|
||||
facetEdges = [] # EDGES THAT WILL REQUIRE A FACET FOR EACH PART
|
||||
termsEdges = FlatList()
|
||||
specialEdge = None
|
||||
special_index = -1
|
||||
|
||||
# A SPECIAL EDGE IS ONE THAT HAS AN UNDEFINED NUMBER OF PARTITIONS AT QUERY TIME
|
||||
# FIND THE specialEdge, IF ONE
|
||||
for f, tedge in enumerate(query.edges):
|
||||
if tedge.domain.type in domains.KNOWN:
|
||||
for p, part in enumerate(tedge.domain.partitions):
|
||||
part.dataIndex = p
|
||||
|
||||
# FACETS ARE ONLY REQUIRED IF SQL JOIN ON DOMAIN IS REQUIRED (RANGE QUERY)
|
||||
# OR IF WE ARE NOT SIMPLY COUNTING
|
||||
# OR IF NO SCRIPTING IS ALLOWED (SOME OTHER CODE IS RESPONSIBLE FOR SETTING isFacet)
|
||||
# OR IF WE JUST WANT TO FORCE IT :)
|
||||
# OF COURSE THE default EDGE IS NOT EXPLICIT, SO MUST BE A TERM
|
||||
|
||||
facetEdges.append(tedge)
|
||||
else:
|
||||
if specialEdge:
|
||||
Log.error("There is more than one open-ended edge: self can not be handled")
|
||||
specialEdge = tedge
|
||||
special_index = f
|
||||
termsEdges.append(tedge)
|
||||
|
||||
if not specialEdge:
|
||||
# WE SERIOUSLY WANT A SPECIAL EDGE, OTHERWISE WE WILL HAVE TOO MANY FACETS
|
||||
# THE BIGGEST EDGE MAY BE COLLAPSED TO A TERM, MAYBE?
|
||||
num_parts = 0
|
||||
special_index = -1
|
||||
for i, e in enumerate(facetEdges):
|
||||
l = len(e.domain.partitions)
|
||||
if ((e.value and is_variable_name(e.value)) or len(e.domain.dimension.fields) == 1) and l > num_parts:
|
||||
num_parts = l
|
||||
specialEdge = e
|
||||
special_index = i
|
||||
|
||||
facetEdges.pop(special_index)
|
||||
termsEdges.append(specialEdge)
|
||||
|
||||
total_facets = PRODUCT(len(f.domain.partitions) for f in facetEdges)*len(select)
|
||||
if total_facets > 100:
|
||||
# WE GOT A PROBLEM, LETS COUNT THE SIZE OF REALITY:
|
||||
counts = esq.query({
|
||||
"from": query.frum,
|
||||
"select": {"aggregate": "count"},
|
||||
"edges": facetEdges,
|
||||
"where": query.where,
|
||||
"limit": query.limit
|
||||
})
|
||||
|
||||
esFacets = []
|
||||
|
||||
def add_facet(value, parts, cube):
|
||||
if value:
|
||||
esFacets.append(parts)
|
||||
|
||||
counts["count"].forall(add_facet)
|
||||
|
||||
Log.note("{{theory_count}} theoretical combinations, {{real_count}} actual combos found", real_count= len(esFacets), theory_count=total_facets)
|
||||
|
||||
if not esFacets:
|
||||
# MAKE EMPTY CUBE
|
||||
matricies = {}
|
||||
dims = [len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges]
|
||||
for s in select:
|
||||
matricies[s.name] = Matrix(*dims)
|
||||
cube = Cube(query.select, query.edges, matricies)
|
||||
cube.frum = query
|
||||
return cube
|
||||
|
||||
else:
|
||||
# GENERATE ALL COMBOS
|
||||
esFacets = getAllEdges(facetEdges)
|
||||
|
||||
calcTerm = compileEdges2Term(mvel, termsEdges, FlatList())
|
||||
term2parts = calcTerm.term2parts
|
||||
|
||||
if len(esFacets) * len(select) > 1000:
|
||||
Log.error("not implemented yet") # WE HAVE SOME SERIOUS PERMUTATIONS, WE MUST ISSUE MULTIPLE QUERIES
|
||||
pass
|
||||
|
||||
FromES = build_es_query(query)
|
||||
|
||||
for s in select:
|
||||
for parts in esFacets:
|
||||
condition = FlatList()
|
||||
constants = FlatList()
|
||||
name = [literal_field(s.name)]
|
||||
for f, fedge in enumerate(facetEdges):
|
||||
name.append(str(parts[f].dataIndex))
|
||||
condition.append(buildCondition(mvel, fedge, parts[f]))
|
||||
constants.append({"name": fedge.domain.name, "value": parts[f]})
|
||||
condition.append(query.where)
|
||||
name = ",".join(name)
|
||||
|
||||
FromES.facets[name] = {
|
||||
"terms_stats": {
|
||||
"key_field": calcTerm.field,
|
||||
"value_field": s.value if is_variable_name(s.value) else None,
|
||||
"value_script": mvel.compile_expression(s.value) if not is_variable_name(s.value) else None,
|
||||
"size": coalesce(query.limit, 200000)
|
||||
}
|
||||
}
|
||||
if condition:
|
||||
FromES.facets[name].facet_filter = simplify_esfilter({"and": condition})
|
||||
|
||||
data = es_post(esq.es, FromES, query.limit)
|
||||
|
||||
if specialEdge.domain.type not in domains.KNOWN:
|
||||
# WE BUILD THE PARTS BASED ON THE RESULTS WE RECEIVED
|
||||
partitions = FlatList()
|
||||
map = {}
|
||||
for facetName, parts in data.facets.items():
|
||||
for stats in parts.terms:
|
||||
if not map[stats]:
|
||||
part = {"value": stats, "name": stats}
|
||||
partitions.append(part)
|
||||
map[stats] = part
|
||||
|
||||
partitions.sort(specialEdge.domain.compare)
|
||||
for p, part in enumerate(partitions):
|
||||
part.dataIndex = p
|
||||
|
||||
specialEdge.domain.map = map
|
||||
specialEdge.domain.partitions = partitions
|
||||
|
||||
# MAKE CUBE
|
||||
matricies = {}
|
||||
dims = [len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges]
|
||||
for s in select:
|
||||
matricies[s.name] = Matrix(*dims)
|
||||
|
||||
name2agg = {s.name: aggregates[s.aggregate] for s in select}
|
||||
|
||||
# FILL CUBE
|
||||
for edgeName, parts in data.facets.items():
|
||||
temp = edgeName.split(",")
|
||||
pre_coord = tuple(int(c) for c in temp[1:])
|
||||
sname = temp[0]
|
||||
|
||||
for stats in parts.terms:
|
||||
if specialEdge:
|
||||
special = term2parts(stats.term)[0]
|
||||
coord = pre_coord[:special_index]+(special.dataIndex, )+pre_coord[special_index:]
|
||||
else:
|
||||
coord = pre_coord
|
||||
matricies[sname][coord] = stats[name2agg[sname]]
|
||||
|
||||
cube = Cube(query.select, query.edges, matricies)
|
||||
cube.frum = query
|
||||
return cube
|
||||
|
||||
|
||||
def register_script_field(FromES, code):
|
||||
if not FromES.script_fields:
|
||||
FromES.script_fields = {}
|
||||
|
||||
# IF CODE IS IDENTICAL, THEN USE THE EXISTING SCRIPT
|
||||
for n, c in FromES.script_fields.items():
|
||||
if c.script == code:
|
||||
return n
|
||||
|
||||
name = "script" + UID()
|
||||
FromES.script_fields[name].script = code
|
||||
return name
|
||||
|
||||
|
||||
def getAllEdges(facetEdges):
|
||||
if not facetEdges:
|
||||
return [()]
|
||||
return _getAllEdges(facetEdges, 0)
|
||||
|
||||
|
||||
def _getAllEdges(facetEdges, edgeDepth):
|
||||
"""
|
||||
RETURN ALL PARTITION COMBINATIONS: A LIST OF ORDERED TUPLES
|
||||
"""
|
||||
if edgeDepth == len(facetEdges):
|
||||
return [()]
|
||||
edge = facetEdges[edgeDepth]
|
||||
|
||||
deeper = _getAllEdges(facetEdges, edgeDepth + 1)
|
||||
|
||||
output = FlatList()
|
||||
partitions = edge.domain.partitions
|
||||
for part in partitions:
|
||||
for deep in deeper:
|
||||
output.append((part,) + deep)
|
||||
return output
|
||||
|
||||
|
||||
def buildCondition(mvel, edge, partition):
|
||||
"""
|
||||
RETURN AN ES FILTER OBJECT
|
||||
"""
|
||||
output = {}
|
||||
|
||||
if edge.domain.isFacet:
|
||||
# MUST USE THIS' esFacet
|
||||
condition = wrap(coalesce(partition.where, {"and": []}))
|
||||
|
||||
if partition.min and partition.max and is_variable_name(edge.value):
|
||||
condition["and"].append({
|
||||
"range": {edge.value: {"gte": partition.min, "lt": partition.max}}
|
||||
})
|
||||
|
||||
# ES WILL FREAK OUT IF WE SEND {"not":{"and":x}} (OR SOMETHING LIKE THAT)
|
||||
return simplify_esfilter(condition)
|
||||
elif edge.range:
|
||||
# THESE REALLY NEED FACETS TO PERFORM THE JOIN-TO-DOMAIN
|
||||
# USE MVEL CODE
|
||||
if edge.domain.type in domains.ALGEBRAIC:
|
||||
output = {"and": []}
|
||||
|
||||
if edge.range.mode and edge.range.mode == "inclusive":
|
||||
# IF THE range AND THE partition OVERLAP, THEN MATCH IS MADE
|
||||
if is_variable_name(edge.range.min):
|
||||
output["and"].append({"range": {edge.range.min: {"lt": es09.expressions.value2value(partition.max)}}})
|
||||
else:
|
||||
# WHOA!! SUPER SLOW!!
|
||||
output["and"].append({"script": {"script": mvel.compile_expression(
|
||||
edge.range.min + " < " + es09.expressions.value2MVEL(partition.max)
|
||||
)}})
|
||||
|
||||
if is_variable_name(edge.range.max):
|
||||
output["and"].append({"or": [
|
||||
{"missing": {"field": edge.range.max}},
|
||||
{"range": {edge.range.max, {"gt": es09.expressions.value2value(partition.min)}}}
|
||||
]})
|
||||
else:
|
||||
# WHOA!! SUPER SLOW!!
|
||||
output["and"].append({"script": {"script": mvel.compile_expression(
|
||||
edge.range.max + " > " + es09.expressions.value2MVEL(partition.min))}})
|
||||
|
||||
else:
|
||||
# SNAPSHOT - IF range INCLUDES partition.min, THEN MATCH IS MADE
|
||||
if is_variable_name(edge.range.min):
|
||||
output["and"].append({"range": {edge.range.min: {"lte": es09.expressions.value2value(partition.min)}}})
|
||||
else:
|
||||
# WHOA!! SUPER SLOW!!
|
||||
output["and"].append({"script": {"script": mvel.compile_expression(
|
||||
edge.range.min + "<=" + es09.expressions.value2MVEL(partition.min)
|
||||
)}})
|
||||
|
||||
if is_variable_name(edge.range.max):
|
||||
output["and"].append({"or": [
|
||||
{"missing": {"field": edge.range.max}},
|
||||
{"range": {edge.range.max, {"gte": es09.expressions.value2value(partition.min)}}}
|
||||
]})
|
||||
else:
|
||||
# WHOA!! SUPER SLOW!!
|
||||
output["and"].append({"script": {"script": mvel.compile_expression(
|
||||
es09.expressions.value2MVEL(partition.min) + " <= " + edge.range.max
|
||||
)}})
|
||||
return output
|
||||
else:
|
||||
Log.error("Do not know how to handle range query on non-continuous domain")
|
||||
|
||||
elif not edge.value:
|
||||
# MUST USE THIS' esFacet, AND NOT(ALL THOSE ABOVE)
|
||||
return partition.esfilter
|
||||
elif is_variable_name(edge.value):
|
||||
# USE FAST ES SYNTAX
|
||||
if edge.domain.type in domains.ALGEBRAIC:
|
||||
output.range = {}
|
||||
output.range[edge.value] = {"gte": es09.expressions.value2query(partition.min), "lt": es09.expressions.value2query(partition.max)}
|
||||
elif edge.domain.type == "set":
|
||||
if partition.value:
|
||||
if partition.value != edge.domain.getKey(partition):
|
||||
Log.error("please ensure the key attribute of the domain matches the value attribute of all partitions, if only because we are now using the former")
|
||||
# DEFAULT TO USING THE .value ATTRIBUTE, IF ONLY BECAUSE OF LEGACY REASONS
|
||||
output.term = {edge.value: partition.value}
|
||||
else:
|
||||
output.term = {edge.value: edge.domain.getKey(partition)}
|
||||
|
||||
elif edge.domain.type == "default":
|
||||
output.term = dict()
|
||||
output.term[edge.value] = partition.value
|
||||
else:
|
||||
Log.error("Edge \"" + edge.name + "\" is not supported")
|
||||
|
||||
return output
|
||||
else:
|
||||
# USE MVEL CODE
|
||||
if edge.domain.type in domains.ALGEBRAIC:
|
||||
output.script = {"script": edge.value + ">=" + es09.expressions.value2MVEL(partition.min) + " and " + edge.value + "<" + es09.expressions.value2MVEL(partition.max)}
|
||||
else:
|
||||
output.script = {"script": "( " + edge.value + " ) ==" + es09.expressions.value2MVEL(partition.value)}
|
||||
|
||||
code = es09.expressions.addFunctions(output.script.script)
|
||||
output.script.script = code.head + code.body
|
||||
return output
|
||||
|
|
@ -1,355 +0,0 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http:# mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
from jx_base.queries import is_variable_name
|
||||
|
||||
from mo_logs.strings import quote
|
||||
|
||||
from mo_logs import Log, strings
|
||||
from mo_dots import Data
|
||||
from mo_dots import coalesce
|
||||
from mo_dots import wrap
|
||||
from mo_dots.lists import FlatList
|
||||
from pyLibrary import convert
|
||||
from mo_math import COUNT
|
||||
from mo_math import Math
|
||||
from mo_math import stats
|
||||
from jx_base import domains
|
||||
from jx_elasticsearch.es09.expressions import value2MVEL
|
||||
from mo_times import durations
|
||||
|
||||
|
||||
DEBUG = False
|
||||
|
||||
|
||||
def build_es_query(query):
|
||||
output = wrap({
|
||||
"query": {"match_all": {}},
|
||||
"from": 0,
|
||||
"size": 100 if DEBUG else 0,
|
||||
"sort": [],
|
||||
"facets": {
|
||||
}
|
||||
})
|
||||
|
||||
if DEBUG:
|
||||
# TO LIMIT RECORDS TO WHAT'S IN FACETS
|
||||
output.query = {
|
||||
"bool": {
|
||||
"query": {
|
||||
"match_all": {}
|
||||
},
|
||||
"filter": query.where.to_esfilter()
|
||||
}
|
||||
}
|
||||
|
||||
return output
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def compileTime2Term(edge):
|
||||
"""
|
||||
RETURN MVEL CODE THAT MAPS TIME AND DURATION DOMAINS DOWN TO AN INTEGER AND
|
||||
AND THE JAVASCRIPT THAT WILL TURN THAT INTEGER BACK INTO A PARTITION (INCLUDING NULLS)
|
||||
"""
|
||||
if edge.esscript:
|
||||
Log.error("edge script not supported yet")
|
||||
|
||||
# IS THERE A LIMIT ON THE DOMAIN?
|
||||
numPartitions = len(edge.domain.partitions)
|
||||
value = edge.value
|
||||
if is_variable_name(value):
|
||||
value = "doc[\"" + value + "\"].value"
|
||||
|
||||
nullTest = compileNullTest(edge)
|
||||
ref = coalesce(edge.domain.min, edge.domain.max, datetime(2000, 1, 1))
|
||||
|
||||
if edge.domain.interval.month > 0:
|
||||
offset = ref.subtract(ref.floorMonth(), durations.DAY).milli
|
||||
if offset > durations.DAY.milli * 28:
|
||||
offset = ref.subtract(ref.ceilingMonth(), durations.DAY).milli
|
||||
partition2int = "milli2Month(" + value + ", " + value2MVEL(offset) + ")"
|
||||
partition2int = "((" + nullTest + ") ? 0 : " + partition2int + ")"
|
||||
|
||||
def int2Partition(value):
|
||||
if Math.round(value) == 0:
|
||||
return edge.domain.NULL
|
||||
|
||||
d = datetime(str(value)[:4:], str(value)[-2:], 1)
|
||||
d = d.addMilli(offset)
|
||||
return edge.domain.getPartByKey(d)
|
||||
else:
|
||||
partition2int = "Math.floor((" + value + "-" + value2MVEL(ref) + ")/" + edge.domain.interval.milli + ")"
|
||||
partition2int = "((" + nullTest + ") ? " + numPartitions + " : " + partition2int + ")"
|
||||
|
||||
def int2Partition(value):
|
||||
if Math.round(value) == numPartitions:
|
||||
return edge.domain.NULL
|
||||
return edge.domain.getPartByKey(ref.add(edge.domain.interval.multiply(value)))
|
||||
|
||||
return Data(toTerm={"head": "", "body": partition2int}, fromTerm=int2Partition)
|
||||
|
||||
|
||||
# RETURN MVEL CODE THAT MAPS DURATION DOMAINS DOWN TO AN INTEGER AND
|
||||
# AND THE JAVASCRIPT THAT WILL TURN THAT INTEGER BACK INTO A PARTITION (INCLUDING NULLS)
|
||||
def compileDuration2Term(edge):
|
||||
if edge.esscript:
|
||||
Log.error("edge script not supported yet")
|
||||
|
||||
# IS THERE A LIMIT ON THE DOMAIN?
|
||||
numPartitions = len(edge.domain.partitions)
|
||||
value = edge.value
|
||||
if is_variable_name(value):
|
||||
value = "doc[\"" + value + "\"].value"
|
||||
|
||||
ref = coalesce(edge.domain.min, edge.domain.max, durations.ZERO)
|
||||
nullTest = compileNullTest(edge)
|
||||
|
||||
ms = edge.domain.interval.milli
|
||||
if edge.domain.interval.month > 0:
|
||||
ms = durations.YEAR.milli / 12 * edge.domain.interval.month
|
||||
|
||||
partition2int = "Math.floor((" + value + "-" + value2MVEL(ref) + ")/" + ms + ")"
|
||||
partition2int = "((" + nullTest + ") ? " + numPartitions + " : " + partition2int + ")"
|
||||
|
||||
def int2Partition(value):
|
||||
if Math.round(value) == numPartitions:
|
||||
return edge.domain.NULL
|
||||
return edge.domain.getPartByKey(ref.add(edge.domain.interval.multiply(value)))
|
||||
|
||||
return Data(toTerm={"head": "", "body": partition2int}, fromTerm=int2Partition)
|
||||
|
||||
|
||||
# RETURN MVEL CODE THAT MAPS THE numeric DOMAIN DOWN TO AN INTEGER AND
|
||||
# AND THE JAVASCRIPT THAT WILL TURN THAT INTEGER BACK INTO A PARTITION (INCLUDING NULLS)
|
||||
def compileNumeric2Term(edge):
|
||||
if edge.script:
|
||||
Log.error("edge script not supported yet")
|
||||
|
||||
if edge.domain.type != "numeric" and edge.domain.type != "count":
|
||||
Log.error("can only translate numeric domains")
|
||||
|
||||
numPartitions = len(edge.domain.partitions)
|
||||
value = edge.value
|
||||
if is_variable_name(value):
|
||||
value = "doc[\"" + value + "\"].value"
|
||||
|
||||
if not edge.domain.max:
|
||||
if not edge.domain.min:
|
||||
ref = 0
|
||||
partition2int = "Math.floor(" + value + ")/" + value2MVEL(edge.domain.interval) + ")"
|
||||
nullTest = "false"
|
||||
else:
|
||||
ref = value2MVEL(edge.domain.min)
|
||||
partition2int = "Math.floor((" + value + "-" + ref + ")/" + value2MVEL(edge.domain.interval) + ")"
|
||||
nullTest = "" + value + "<" + ref
|
||||
elif not edge.domain.min:
|
||||
ref = value2MVEL(edge.domain.max)
|
||||
partition2int = "Math.floor((" + value + "-" + ref + ")/" + value2MVEL(edge.domain.interval) + ")"
|
||||
nullTest = "" + value + ">=" + ref
|
||||
else:
|
||||
top = value2MVEL(edge.domain.max)
|
||||
ref = value2MVEL(edge.domain.min)
|
||||
partition2int = "Math.floor((" + value + "-" + ref + ")/" + value2MVEL(edge.domain.interval) + ")"
|
||||
nullTest = "(" + value + "<" + ref + ") or (" + value + ">=" + top + ")"
|
||||
|
||||
partition2int = "((" + nullTest + ") ? " + numPartitions + " : " + partition2int + ")"
|
||||
offset = convert.value2int(ref)
|
||||
|
||||
def int2Partition(value):
|
||||
if Math.round(value) == numPartitions:
|
||||
return edge.domain.NULL
|
||||
return edge.domain.getPartByKey((value * edge.domain.interval) + offset)
|
||||
|
||||
return Data(toTerm={"head": "", "body": partition2int}, fromTerm=int2Partition)
|
||||
|
||||
|
||||
def compileString2Term(edge):
|
||||
if edge.esscript:
|
||||
Log.error("edge script not supported yet")
|
||||
|
||||
value = edge.value
|
||||
if is_variable_name(value):
|
||||
value = strings.expand_template("getDocValue({{path}})", {"path": quote(value)})
|
||||
else:
|
||||
Log.error("not handled")
|
||||
|
||||
def fromTerm(value):
|
||||
return edge.domain.getPartByKey(value)
|
||||
|
||||
return Data(
|
||||
toTerm={"head": "", "body": value},
|
||||
fromTerm=fromTerm
|
||||
)
|
||||
|
||||
|
||||
def compileNullTest(edge):
|
||||
"""
|
||||
RETURN A MVEL EXPRESSION THAT WILL EVALUATE TO true FOR OUT-OF-BOUNDS
|
||||
"""
|
||||
if edge.domain.type not in domains.ALGEBRAIC:
|
||||
Log.error("can only translate time and duration domains")
|
||||
|
||||
# IS THERE A LIMIT ON THE DOMAIN?
|
||||
value = edge.value
|
||||
if is_variable_name(value):
|
||||
value = "doc[\"" + value + "\"].value"
|
||||
|
||||
if not edge.domain.max:
|
||||
if not edge.domain.min:
|
||||
return False
|
||||
bot = value2MVEL(edge.domain.min)
|
||||
nullTest = "" + value + "<" + bot
|
||||
elif not edge.domain.min:
|
||||
top = value2MVEL(edge.domain.max)
|
||||
nullTest = "" + value + ">=" + top
|
||||
else:
|
||||
top = value2MVEL(edge.domain.max)
|
||||
bot = value2MVEL(edge.domain.min)
|
||||
nullTest = "(" + value + "<" + bot + ") or (" + value + ">=" + top + ")"
|
||||
|
||||
return nullTest
|
||||
|
||||
|
||||
def compileEdges2Term(mvel_compiler, edges, constants):
|
||||
"""
|
||||
TERMS ARE ALWAYS ESCAPED SO THEY CAN BE COMPOUNDED WITH PIPE (|)
|
||||
|
||||
GIVE MVEL CODE THAT REDUCES A UNIQUE TUPLE OF PARTITIONS DOWN TO A UNIQUE TERM
|
||||
GIVE LAMBDA THAT WILL CONVERT THE TERM BACK INTO THE TUPLE
|
||||
RETURNS TUPLE OBJECT WITH "type" and "value" ATTRIBUTES.
|
||||
"type" CAN HAVE A VALUE OF "script", "field" OR "count"
|
||||
CAN USE THE constants (name, value pairs)
|
||||
"""
|
||||
|
||||
# IF THE QUERY IS SIMPLE ENOUGH, THEN DO NOT USE TERM PACKING
|
||||
edge0 = edges[0]
|
||||
|
||||
if len(edges) == 1 and edge0.domain.type in ["set", "default"]:
|
||||
# THE TERM RETURNED WILL BE A MEMBER OF THE GIVEN SET
|
||||
def temp(term):
|
||||
return FlatList([edge0.domain.getPartByKey(term)])
|
||||
|
||||
if edge0.value and is_variable_name(edge0.value):
|
||||
return Data(
|
||||
field=edge0.value,
|
||||
term2parts=temp
|
||||
)
|
||||
elif COUNT(edge0.domain.dimension.fields) == 1:
|
||||
return Data(
|
||||
field=edge0.domain.dimension.fields[0],
|
||||
term2parts=temp
|
||||
)
|
||||
elif not edge0.value and edge0.domain.partitions:
|
||||
script = mvel_compiler.Parts2TermScript(edge0.domain)
|
||||
return Data(
|
||||
expression=script,
|
||||
term2parts=temp
|
||||
)
|
||||
else:
|
||||
return Data(
|
||||
expression=mvel_compiler.compile_expression(edge0.value, constants),
|
||||
term2parts=temp
|
||||
)
|
||||
|
||||
mvel_terms = [] # FUNCTION TO PACK TERMS
|
||||
fromTerm2Part = [] # UNPACK TERMS BACK TO PARTS
|
||||
for e in edges:
|
||||
domain = e.domain
|
||||
fields = domain.dimension.fields
|
||||
|
||||
if not e.value and fields:
|
||||
code, decode = mvel_compiler.Parts2Term(e.domain)
|
||||
t = Data(
|
||||
toTerm=code,
|
||||
fromTerm=decode
|
||||
)
|
||||
elif fields:
|
||||
Log.error("not expected")
|
||||
elif e.domain.type == "time":
|
||||
t = compileTime2Term(e)
|
||||
elif e.domain.type == "duration":
|
||||
t = compileDuration2Term(e)
|
||||
elif e.domain.type in domains.ALGEBRAIC:
|
||||
t = compileNumeric2Term(e)
|
||||
elif e.domain.type == "set" and not fields:
|
||||
def fromTerm(term):
|
||||
return e.domain.getPartByKey(term)
|
||||
|
||||
code, decode = mvel_compiler.Parts2Term(e.domain)
|
||||
t = Data(
|
||||
toTerm=code,
|
||||
fromTerm=decode
|
||||
)
|
||||
else:
|
||||
t = compileString2Term(e)
|
||||
|
||||
if not t.toTerm.body:
|
||||
mvel_compiler.Parts2Term(e.domain)
|
||||
Log.unexpected("what?")
|
||||
|
||||
fromTerm2Part.append(t.fromTerm)
|
||||
mvel_terms.append(t.toTerm.body)
|
||||
|
||||
# REGISTER THE DECODE FUNCTION
|
||||
def temp(term):
|
||||
terms = term.split('|')
|
||||
|
||||
output = FlatList([t2p(t) for t, t2p in zip(terms, fromTerm2Part)])
|
||||
return output
|
||||
|
||||
return Data(
|
||||
expression=mvel_compiler.compile_expression("+'|'+".join(mvel_terms), constants),
|
||||
term2parts=temp
|
||||
)
|
||||
|
||||
|
||||
def fix_es_stats(s):
|
||||
"""
|
||||
ES RETURNS BAD DEFAULT VALUES FOR STATS
|
||||
"""
|
||||
s = wrap(s)
|
||||
if s.count == 0:
|
||||
return stats.zero
|
||||
return s
|
||||
|
||||
|
||||
# MAP NAME TO SQL FUNCTION
|
||||
aggregates = {
|
||||
"none": "none",
|
||||
"one": "count",
|
||||
"sum": "total",
|
||||
"add": "total",
|
||||
"count": "count",
|
||||
"maximum": "max",
|
||||
"minimum": "min",
|
||||
"max": "max",
|
||||
"min": "min",
|
||||
"mean": "mean",
|
||||
"average": "mean",
|
||||
"avg": "mean",
|
||||
"N": "count",
|
||||
"X0": "count",
|
||||
"X1": "total",
|
||||
"X2": "sum_of_squares",
|
||||
"std": "std_deviation",
|
||||
"stddev": "std_deviation",
|
||||
"var": "variance",
|
||||
"variance": "variance"
|
||||
}
|
||||
|
||||
|
|
@ -1,238 +0,0 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http:# mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from collections import Mapping
|
||||
|
||||
from jx_base import container
|
||||
from jx_base.container import Container
|
||||
from jx_base.dimensions import Dimension
|
||||
from jx_base.expressions import jx_expression
|
||||
from jx_base.queries import is_variable_name
|
||||
from jx_base.query import QueryOp
|
||||
from jx_elasticsearch.es14.aggs import es_aggsop, is_aggsop
|
||||
from jx_elasticsearch.es14.deep import is_deepop, es_deepop
|
||||
from jx_elasticsearch.es14.setop import is_setop, es_setop
|
||||
from jx_elasticsearch.es14.util import aggregates
|
||||
from jx_elasticsearch.meta import ElasticsearchMetadata, Table
|
||||
from jx_python import jx
|
||||
from mo_dots import Data, Null, unwrap, coalesce, split_field, literal_field, unwraplist, join_field, wrap, listwrap, FlatList
|
||||
from mo_json import scrub, value2json
|
||||
from mo_json.typed_encoder import TYPE_PREFIX, EXISTS_TYPE
|
||||
from mo_kwargs import override
|
||||
from mo_logs import Log, Except
|
||||
from pyLibrary.env import elasticsearch, http
|
||||
|
||||
|
||||
class ES14(Container):
|
||||
"""
|
||||
SEND jx QUERIES TO ElasticSearch
|
||||
"""
|
||||
|
||||
def __new__(cls, *args, **kwargs):
|
||||
if (len(args) == 1 and args[0].get("index") == "meta") or kwargs.get("index") == "meta":
|
||||
output = ElasticsearchMetadata.__new__(ElasticsearchMetadata, *args, **kwargs)
|
||||
output.__init__(*args, **kwargs)
|
||||
return output
|
||||
else:
|
||||
return Container.__new__(cls)
|
||||
|
||||
@override
|
||||
def __init__(
|
||||
self,
|
||||
host,
|
||||
index,
|
||||
type=None,
|
||||
name=None,
|
||||
port=9200,
|
||||
read_only=True,
|
||||
timeout=None, # NUMBER OF SECONDS TO WAIT FOR RESPONSE, OR SECONDS TO WAIT FOR DOWNLOAD (PASSED TO requests)
|
||||
wait_for_active_shards=1, # ES WRITE CONSISTENCY (https://www.elastic.co/guide/en/elasticsearch/reference/1.7/docs-index_.html#index-consistency)
|
||||
typed=None,
|
||||
kwargs=None
|
||||
):
|
||||
Container.__init__(self)
|
||||
if not container.config.default:
|
||||
container.config.default = {
|
||||
"type": "elasticsearch",
|
||||
"settings": unwrap(kwargs)
|
||||
}
|
||||
self.settings = kwargs
|
||||
self.name = name = coalesce(name, alias, index)
|
||||
if read_only:
|
||||
self.es = elasticsearch.Alias(alias=coalesce(alias, index), kwargs=kwargs)
|
||||
else:
|
||||
self.es = elasticsearch.Cluster(kwargs=kwargs).get_index(read_only=read_only, kwargs=kwargs)
|
||||
|
||||
self._namespace = ElasticsearchMetadata(kwargs=kwargs)
|
||||
self.settings.type = self.es.settings.type
|
||||
self.edges = Data()
|
||||
self.worker = None
|
||||
|
||||
columns = self._namespace.get_snowflake(self.es.settings.alias).columns # ABSOLUTE COLUMNS
|
||||
is_typed = any(c.es_column == EXISTS_TYPE for c in columns)
|
||||
|
||||
if typed == None:
|
||||
# SWITCH ON TYPED MODE
|
||||
self.typed = is_typed
|
||||
else:
|
||||
if is_typed != typed:
|
||||
Log.error("Expecting given typed {{typed}} to match {{is_typed}}", typed=typed, is_typed=is_typed)
|
||||
self.typed = typed
|
||||
|
||||
@property
|
||||
def snowflake(self):
|
||||
return self._namespace.get_snowflake(self.es.settings.alias)
|
||||
|
||||
@property
|
||||
def namespace(self):
|
||||
return self._namespace
|
||||
|
||||
|
||||
def get_table(self, full_name):
|
||||
return Table(full_name, self)
|
||||
|
||||
def get_schema(self, query_path):
|
||||
return self._namespace.get_schema(query_path)
|
||||
|
||||
def __data__(self):
|
||||
settings = self.settings.copy()
|
||||
settings.settings = None
|
||||
return settings
|
||||
|
||||
def __enter__(self):
|
||||
Log.error("No longer used")
|
||||
return self
|
||||
|
||||
def __exit__(self, type, value, traceback):
|
||||
if not self.worker:
|
||||
return
|
||||
|
||||
if isinstance(value, Exception):
|
||||
self.worker.stop()
|
||||
self.worker.join()
|
||||
else:
|
||||
self.worker.join()
|
||||
|
||||
@property
|
||||
def query_path(self):
|
||||
return join_field(split_field(self.name)[1:])
|
||||
|
||||
@property
|
||||
def url(self):
|
||||
return self.es.url
|
||||
|
||||
def query(self, _query):
|
||||
try:
|
||||
query = QueryOp.wrap(_query, container=self, namespace=self.namespace)
|
||||
|
||||
for s in listwrap(query.select):
|
||||
if s.aggregate != None and not aggregates.get(s.aggregate):
|
||||
Log.error(
|
||||
"ES can not aggregate {{name}} because {{aggregate|quote}} is not a recognized aggregate",
|
||||
name=s.name,
|
||||
aggregate=s.aggregate
|
||||
)
|
||||
|
||||
frum = query["from"]
|
||||
if isinstance(frum, QueryOp):
|
||||
result = self.query(frum)
|
||||
q2 = query.copy()
|
||||
q2.frum = result
|
||||
return jx.run(q2)
|
||||
|
||||
if is_deepop(self.es, query):
|
||||
return es_deepop(self.es, query)
|
||||
if is_aggsop(self.es, query):
|
||||
return es_aggsop(self.es, frum, query)
|
||||
if is_setop(self.es, query):
|
||||
return es_setop(self.es, query)
|
||||
Log.error("Can not handle")
|
||||
except Exception as e:
|
||||
e = Except.wrap(e)
|
||||
if "Data too large, data for" in e:
|
||||
http.post(self.es.cluster.url / "_cache/clear")
|
||||
Log.error("Problem (Tried to clear Elasticsearch cache)", e)
|
||||
Log.error("problem", e)
|
||||
|
||||
def addDimension(self, dim):
|
||||
if isinstance(dim, list):
|
||||
Log.error("Expecting dimension to be a object, not a list:\n{{dim}}", dim= dim)
|
||||
self._addDimension(dim, [])
|
||||
|
||||
def _addDimension(self, dim, path):
|
||||
dim.full_name = dim.name
|
||||
for e in dim.edges:
|
||||
d = Dimension(e, dim, self)
|
||||
self.edges[d.full_name] = d
|
||||
|
||||
def __getitem__(self, item):
|
||||
c = self.get_columns(table_name=self.name, column_name=item)
|
||||
if c:
|
||||
if len(c) > 1:
|
||||
Log.error("Do not know how to handle multipole matches")
|
||||
return c[0]
|
||||
|
||||
e = self.edges[item]
|
||||
if not c:
|
||||
Log.warning("Column with name {{column|quote}} can not be found in {{table}}", column=item, table=self.name)
|
||||
return e
|
||||
|
||||
def __getattr__(self, item):
|
||||
return self.edges[item]
|
||||
|
||||
def update(self, command):
|
||||
"""
|
||||
EXPECTING command == {"set":term, "where":where}
|
||||
THE set CLAUSE IS A DICT MAPPING NAMES TO VALUES
|
||||
THE where CLAUSE IS AN ES FILTER
|
||||
"""
|
||||
command = wrap(command)
|
||||
schema = self.es.get_properties()
|
||||
|
||||
# GET IDS OF DOCUMENTS
|
||||
results = self.es.search({
|
||||
"fields": listwrap(schema._routing.path),
|
||||
"query": {"filtered": {
|
||||
"filter": jx_expression(command.where).to_esfilter(Null)
|
||||
}},
|
||||
"size": 10000
|
||||
})
|
||||
|
||||
# SCRIPT IS SAME FOR ALL (CAN ONLY HANDLE ASSIGNMENT TO CONSTANT)
|
||||
scripts = FlatList()
|
||||
for k, v in command.set.items():
|
||||
if not is_variable_name(k):
|
||||
Log.error("Only support simple paths for now")
|
||||
if isinstance(v, Mapping) and v.doc:
|
||||
scripts.append({"doc": v.doc})
|
||||
else:
|
||||
v = scrub(v)
|
||||
scripts.append({"script": "ctx._source." + k + " = " + jx_expression(v).to_es_script(schema).script(schema)})
|
||||
|
||||
if results.hits.hits:
|
||||
updates = []
|
||||
for h in results.hits.hits:
|
||||
for s in scripts:
|
||||
updates.append({"update": {"_id": h._id, "_routing": unwraplist(h.fields[literal_field(schema._routing.path)])}})
|
||||
updates.append(s)
|
||||
content = ("\n".join(value2json(c) for c in updates) + "\n")
|
||||
response = self.es.cluster.post(
|
||||
self.es.path + "/_bulk",
|
||||
data=content,
|
||||
headers={"Content-Type": "application/json"},
|
||||
timeout=self.settings.timeout,
|
||||
params={"wait_for_active_shards": self.settings.wait_for_active_shards}
|
||||
)
|
||||
if response.errors:
|
||||
Log.error("could not update: {{error}}", error=[e.error for i in response["items"] for e in i.values() if e.status not in (200, 201)])
|
||||
|
|
@ -1,469 +0,0 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http:# mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from jx_base.domains import SetDomain
|
||||
from jx_base.expressions import TupleOp, NULL
|
||||
from jx_base.query import DEFAULT_LIMIT, MAX_LIMIT
|
||||
from jx_elasticsearch import post as es_post
|
||||
from jx_elasticsearch.es14.decoders import DefaultDecoder, AggsDecoder, ObjectDecoder, DimFieldListDecoder
|
||||
from jx_elasticsearch.es14.expressions import split_expression_by_depth, AndOp, Variable, NullOp
|
||||
from jx_elasticsearch.es14.setop import get_pull_stats
|
||||
from jx_elasticsearch.es14.util import aggregates
|
||||
from jx_python import jx
|
||||
from jx_python.expressions import jx_expression_to_function
|
||||
from mo_dots import listwrap, Data, wrap, literal_field, set_default, coalesce, Null, split_field, FlatList, unwrap, unwraplist
|
||||
from mo_future import text_type
|
||||
from mo_json.typed_encoder import EXISTS
|
||||
from mo_json.typed_encoder import encode_property
|
||||
from mo_logs import Log
|
||||
from mo_math import Math, MAX, UNION
|
||||
from mo_times.timer import Timer
|
||||
|
||||
|
||||
def is_aggsop(es, query):
|
||||
if query.edges or query.groupby or any(a != None and a != "none" for a in listwrap(query.select).aggregate):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def get_decoders_by_depth(query):
|
||||
"""
|
||||
RETURN A LIST OF DECODER ARRAYS, ONE ARRAY FOR EACH NESTED DEPTH
|
||||
"""
|
||||
schema = query.frum.schema
|
||||
output = FlatList()
|
||||
|
||||
if query.edges:
|
||||
if query.sort and query.format != "cube":
|
||||
# REORDER EDGES/GROUPBY TO MATCH THE SORT
|
||||
query.edges = sort_edges(query, "edges")
|
||||
elif query.groupby:
|
||||
if query.sort and query.format != "cube":
|
||||
query.groupby = sort_edges(query, "groupby")
|
||||
|
||||
for edge in wrap(coalesce(query.edges, query.groupby, [])):
|
||||
limit = coalesce(edge.domain.limit, query.limit, DEFAULT_LIMIT)
|
||||
if edge.value != None and not isinstance(edge.value, NullOp):
|
||||
edge = edge.copy()
|
||||
vars_ = edge.value.vars()
|
||||
for v in vars_:
|
||||
if not schema.leaves(v.var):
|
||||
Log.error("{{var}} does not exist in schema", var=v)
|
||||
elif edge.range:
|
||||
vars_ = edge.range.min.vars() | edge.range.max.vars()
|
||||
for v in vars_:
|
||||
if not schema[v.var]:
|
||||
Log.error("{{var}} does not exist in schema", var=v)
|
||||
elif edge.domain.dimension:
|
||||
vars_ = edge.domain.dimension.fields
|
||||
edge.domain.dimension = edge.domain.dimension.copy()
|
||||
edge.domain.dimension.fields = [schema[v].es_column for v in vars_]
|
||||
elif all(edge.domain.partitions.where):
|
||||
vars_ = set()
|
||||
for p in edge.domain.partitions:
|
||||
vars_ |= p.where.vars()
|
||||
|
||||
try:
|
||||
vars_ |= edge.value.vars()
|
||||
depths = set(len(c.nested_path) - 1 for v in vars_ for c in schema.leaves(v.var))
|
||||
if -1 in depths:
|
||||
Log.error(
|
||||
"Do not know of column {{column}}",
|
||||
column=unwraplist([v for v in vars_ if schema[v] == None])
|
||||
)
|
||||
if len(depths) > 1:
|
||||
Log.error("expression {{expr|quote}} spans tables, can not handle", expr=edge.value)
|
||||
max_depth = MAX(depths)
|
||||
while len(output) <= max_depth:
|
||||
output.append([])
|
||||
except Exception as e:
|
||||
# USUALLY THE SCHEMA IS EMPTY, SO WE ASSUME THIS IS A SIMPLE QUERY
|
||||
max_depth = 0
|
||||
output.append([])
|
||||
|
||||
output[max_depth].append(AggsDecoder(edge, query, limit))
|
||||
return output
|
||||
|
||||
|
||||
def sort_edges(query, prop):
|
||||
ordered_edges = []
|
||||
remaining_edges = getattr(query, prop)
|
||||
for s in query.sort:
|
||||
for e in remaining_edges:
|
||||
if e.value == s.value:
|
||||
if isinstance(e.domain, SetDomain):
|
||||
pass # ALREADY SORTED?
|
||||
else:
|
||||
e.domain.sort = s.sort
|
||||
ordered_edges.append(e)
|
||||
remaining_edges.remove(e)
|
||||
break
|
||||
else:
|
||||
Log.error("Can not sort by {{expr}}, can only sort by an existing edge expression", expr=s.value)
|
||||
|
||||
ordered_edges.extend(remaining_edges)
|
||||
return ordered_edges
|
||||
|
||||
|
||||
def es_aggsop(es, frum, query):
|
||||
query = query.copy() # WE WILL MARK UP THIS QUERY
|
||||
schema = frum.schema
|
||||
select = listwrap(query.select)
|
||||
|
||||
es_query = Data()
|
||||
new_select = Data() # MAP FROM canonical_name (USED FOR NAMES IN QUERY) TO SELECT MAPPING
|
||||
formula = []
|
||||
for s in select:
|
||||
if s.aggregate == "count" and isinstance(s.value, Variable) and s.value.var == ".":
|
||||
if schema.query_path == ".":
|
||||
s.pull = jx_expression_to_function("doc_count")
|
||||
else:
|
||||
s.pull = jx_expression_to_function({"coalesce": ["_nested.doc_count", "doc_count", 0]})
|
||||
elif isinstance(s.value, Variable):
|
||||
if s.aggregate == "count":
|
||||
new_select["count_"+literal_field(s.value.var)] += [s]
|
||||
else:
|
||||
new_select[literal_field(s.value.var)] += [s]
|
||||
elif s.aggregate:
|
||||
formula.append(s)
|
||||
|
||||
for canonical_name, many in new_select.items():
|
||||
for s in many:
|
||||
columns = frum.schema.values(s.value.var)
|
||||
|
||||
if s.aggregate == "count":
|
||||
canonical_names = []
|
||||
for column in columns:
|
||||
cn = literal_field(column.es_column + "_count")
|
||||
if column.jx_type == EXISTS:
|
||||
canonical_names.append(cn + ".doc_count")
|
||||
es_query.aggs[cn].filter.range = {column.es_column: {"gt": 0}}
|
||||
else:
|
||||
canonical_names.append(cn+ ".value")
|
||||
es_query.aggs[cn].value_count.field = column.es_column
|
||||
if len(canonical_names) == 1:
|
||||
s.pull = jx_expression_to_function(canonical_names[0])
|
||||
else:
|
||||
s.pull = jx_expression_to_function({"add": canonical_names})
|
||||
elif s.aggregate == "median":
|
||||
if len(columns) > 1:
|
||||
Log.error("Do not know how to count columns with more than one type (script probably)")
|
||||
# ES USES DIFFERENT METHOD FOR PERCENTILES
|
||||
key = literal_field(canonical_name + " percentile")
|
||||
|
||||
es_query.aggs[key].percentiles.field = columns[0].es_column
|
||||
es_query.aggs[key].percentiles.percents += [50]
|
||||
s.pull = jx_expression_to_function(key + ".values.50\\.0")
|
||||
elif s.aggregate == "percentile":
|
||||
if len(columns) > 1:
|
||||
Log.error("Do not know how to count columns with more than one type (script probably)")
|
||||
# ES USES DIFFERENT METHOD FOR PERCENTILES
|
||||
key = literal_field(canonical_name + " percentile")
|
||||
if isinstance(s.percentile, text_type) or s.percetile < 0 or 1 < s.percentile:
|
||||
Log.error("Expecting percentile to be a float from 0.0 to 1.0")
|
||||
percent = Math.round(s.percentile * 100, decimal=6)
|
||||
|
||||
es_query.aggs[key].percentiles.field = columns[0].es_column
|
||||
es_query.aggs[key].percentiles.percents += [percent]
|
||||
es_query.aggs[key].percentiles.compression = 2
|
||||
s.pull = jx_expression_to_function(key + ".values." + literal_field(text_type(percent)))
|
||||
elif s.aggregate == "cardinality":
|
||||
canonical_names = []
|
||||
for column in columns:
|
||||
cn = literal_field(column.es_column + "_cardinality")
|
||||
canonical_names.append(cn)
|
||||
es_query.aggs[cn].cardinality.field = column.es_column
|
||||
if len(columns) == 1:
|
||||
s.pull = jx_expression_to_function(canonical_names[0] + ".value")
|
||||
else:
|
||||
s.pull = jx_expression_to_function({"add": [cn + ".value" for cn in canonical_names], "default": 0})
|
||||
elif s.aggregate == "stats":
|
||||
if len(columns) > 1:
|
||||
Log.error("Do not know how to count columns with more than one type (script probably)")
|
||||
# REGULAR STATS
|
||||
stats_name = literal_field(canonical_name)
|
||||
es_query.aggs[stats_name].extended_stats.field = columns[0].es_column
|
||||
|
||||
# GET MEDIAN TOO!
|
||||
median_name = literal_field(canonical_name + "_percentile")
|
||||
es_query.aggs[median_name].percentiles.field = columns[0].es_column
|
||||
es_query.aggs[median_name].percentiles.percents += [50]
|
||||
|
||||
s.pull = get_pull_stats(stats_name, median_name)
|
||||
elif s.aggregate == "union":
|
||||
pulls = []
|
||||
for column in columns:
|
||||
stats_name = encode_property(column.es_column)
|
||||
|
||||
if column.nested_path[0] == ".":
|
||||
es_query.aggs[stats_name] = {"terms": {
|
||||
"field": column.es_column,
|
||||
"size": Math.min(s.limit, MAX_LIMIT)
|
||||
}}
|
||||
pulls.append(get_bucket_keys(stats_name))
|
||||
|
||||
else:
|
||||
es_query.aggs[stats_name] = {
|
||||
"nested": {"path": column.nested_path[0]},
|
||||
"aggs": {"_nested": {"terms": {
|
||||
"field": column.es_column,
|
||||
"size": Math.min(s.limit, MAX_LIMIT)
|
||||
}}}
|
||||
}
|
||||
pulls.append(get_bucket_keys(stats_name+"._nested"))
|
||||
if len(pulls) == 0:
|
||||
s.pull = NULL
|
||||
elif len(pulls) == 1:
|
||||
s.pull = pulls[0]
|
||||
else:
|
||||
s.pull = lambda row: UNION(
|
||||
p(row)
|
||||
for p in pulls
|
||||
)
|
||||
else:
|
||||
if len(columns) > 1:
|
||||
Log.error("Do not know how to count columns with more than one type (script probably)")
|
||||
|
||||
# PULL VALUE OUT OF THE stats AGGREGATE
|
||||
es_query.aggs[literal_field(canonical_name)].extended_stats.field = columns[0].es_column
|
||||
s.pull = jx_expression_to_function({"coalesce": [literal_field(canonical_name) + "." + aggregates[s.aggregate], s.default]})
|
||||
|
||||
for i, s in enumerate(formula):
|
||||
canonical_name = literal_field(s.name)
|
||||
|
||||
if isinstance(s.value, TupleOp):
|
||||
if s.aggregate == "count":
|
||||
# TUPLES ALWAYS EXIST, SO COUNTING THEM IS EASY
|
||||
s.pull = "doc_count"
|
||||
else:
|
||||
Log.error("{{agg}} is not a supported aggregate over a tuple", agg=s.aggregate)
|
||||
elif s.aggregate == "count":
|
||||
es_query.aggs[literal_field(canonical_name)].value_count.script = s.value.partial_eval().to_es_script(schema).script(schema)
|
||||
s.pull = jx_expression_to_function(literal_field(canonical_name) + ".value")
|
||||
elif s.aggregate == "median":
|
||||
# ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT
|
||||
key = literal_field(canonical_name + " percentile")
|
||||
|
||||
es_query.aggs[key].percentiles.script = s.value.to_es_script(schema).script(schema)
|
||||
es_query.aggs[key].percentiles.percents += [50]
|
||||
s.pull = jx_expression_to_function(key + ".values.50\\.0")
|
||||
elif s.aggregate == "percentile":
|
||||
# ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT
|
||||
key = literal_field(canonical_name + " percentile")
|
||||
percent = Math.round(s.percentile * 100, decimal=6)
|
||||
|
||||
es_query.aggs[key].percentiles.script = s.value.to_es_script(schema).script(schema)
|
||||
es_query.aggs[key].percentiles.percents += [percent]
|
||||
s.pull = jx_expression_to_function(key + ".values." + literal_field(text_type(percent)))
|
||||
elif s.aggregate == "cardinality":
|
||||
# ES USES DIFFERENT METHOD FOR CARDINALITY
|
||||
key = canonical_name + " cardinality"
|
||||
|
||||
es_query.aggs[key].cardinality.script = s.value.to_es_script(schema).script(schema)
|
||||
s.pull = jx_expression_to_function(key + ".value")
|
||||
elif s.aggregate == "stats":
|
||||
# REGULAR STATS
|
||||
stats_name = literal_field(canonical_name)
|
||||
es_query.aggs[stats_name].extended_stats.script = s.value.to_es_script(schema).script(schema)
|
||||
|
||||
# GET MEDIAN TOO!
|
||||
median_name = literal_field(canonical_name + " percentile")
|
||||
es_query.aggs[median_name].percentiles.script = s.value.to_es_script(schema).script(schema)
|
||||
es_query.aggs[median_name].percentiles.percents += [50]
|
||||
|
||||
s.pull = get_pull_stats(stats_name, median_name)
|
||||
elif s.aggregate=="union":
|
||||
# USE TERMS AGGREGATE TO SIMULATE union
|
||||
stats_name = literal_field(canonical_name)
|
||||
es_query.aggs[stats_name].terms.script_field = s.value.to_es_script(schema).script(schema)
|
||||
s.pull = jx_expression_to_function(stats_name + ".buckets.key")
|
||||
else:
|
||||
# PULL VALUE OUT OF THE stats AGGREGATE
|
||||
s.pull = jx_expression_to_function(canonical_name + "." + aggregates[s.aggregate])
|
||||
es_query.aggs[canonical_name].extended_stats.script = s.value.to_es_script(schema).script(schema)
|
||||
|
||||
decoders = get_decoders_by_depth(query)
|
||||
start = 0
|
||||
|
||||
#<TERRIBLE SECTION> THIS IS WHERE WE WEAVE THE where CLAUSE WITH nested
|
||||
split_where = split_expression_by_depth(query.where, schema=frum.schema)
|
||||
|
||||
if len(split_field(frum.name)) > 1:
|
||||
if any(split_where[2::]):
|
||||
Log.error("Where clause is too deep")
|
||||
|
||||
for d in decoders[1]:
|
||||
es_query = d.append_query(es_query, start)
|
||||
start += d.num_columns
|
||||
|
||||
if split_where[1]:
|
||||
#TODO: INCLUDE FILTERS ON EDGES
|
||||
filter_ = AndOp("and", split_where[1]).to_esfilter(schema)
|
||||
es_query = Data(
|
||||
aggs={"_filter": set_default({"filter": filter_}, es_query)}
|
||||
)
|
||||
|
||||
es_query = wrap({
|
||||
"aggs": {"_nested": set_default(
|
||||
{"nested": {"path": schema.query_path[0]}},
|
||||
es_query
|
||||
)}
|
||||
})
|
||||
else:
|
||||
if any(split_where[1::]):
|
||||
Log.error("Where clause is too deep")
|
||||
|
||||
if decoders:
|
||||
for d in jx.reverse(decoders[0]):
|
||||
es_query = d.append_query(es_query, start)
|
||||
start += d.num_columns
|
||||
|
||||
if split_where[0]:
|
||||
#TODO: INCLUDE FILTERS ON EDGES
|
||||
filter = AndOp("and", split_where[0]).to_esfilter(schema)
|
||||
es_query = Data(
|
||||
aggs={"_filter": set_default({"filter": filter}, es_query)}
|
||||
)
|
||||
# </TERRIBLE SECTION>
|
||||
|
||||
if not es_query:
|
||||
es_query = wrap({"query": {"match_all": {}}})
|
||||
|
||||
es_query.size = 0
|
||||
|
||||
with Timer("ES query time") as es_duration:
|
||||
result = es_post(es, es_query, query.limit)
|
||||
|
||||
try:
|
||||
format_time = Timer("formatting")
|
||||
with format_time:
|
||||
decoders = [d for ds in decoders for d in ds]
|
||||
result.aggregations.doc_count = coalesce(result.aggregations.doc_count, result.hits.total) # IT APPEARS THE OLD doc_count IS GONE
|
||||
|
||||
formatter, groupby_formatter, aggop_formatter, mime_type = format_dispatch[query.format]
|
||||
if query.edges:
|
||||
output = formatter(decoders, result.aggregations, start, query, select)
|
||||
elif query.groupby:
|
||||
output = groupby_formatter(decoders, result.aggregations, start, query, select)
|
||||
else:
|
||||
output = aggop_formatter(decoders, result.aggregations, start, query, select)
|
||||
|
||||
output.meta.timing.formatting = format_time.duration
|
||||
output.meta.timing.es_search = es_duration.duration
|
||||
output.meta.content_type = mime_type
|
||||
output.meta.es_query = es_query
|
||||
return output
|
||||
except Exception as e:
|
||||
if query.format not in format_dispatch:
|
||||
Log.error("Format {{format|quote}} not supported yet", format=query.format, cause=e)
|
||||
Log.error("Some problem", cause=e)
|
||||
|
||||
|
||||
EMPTY = {}
|
||||
EMPTY_LIST = []
|
||||
|
||||
|
||||
def get_bucket_keys(stats_name):
|
||||
buckets = jx_expression_to_function(stats_name + ".buckets")
|
||||
def output(row):
|
||||
return [b['key'] for b in listwrap(buckets(row))]
|
||||
return output
|
||||
|
||||
|
||||
def drill(agg):
|
||||
deeper = agg.get("_filter") or agg.get("_nested")
|
||||
while deeper:
|
||||
agg = deeper
|
||||
deeper = agg.get("_filter") or agg.get("_nested")
|
||||
return agg
|
||||
|
||||
def aggs_iterator(aggs, decoders, coord=True):
|
||||
"""
|
||||
DIG INTO ES'S RECURSIVE aggs DATA-STRUCTURE:
|
||||
RETURN AN ITERATOR OVER THE EFFECTIVE ROWS OF THE RESULTS
|
||||
|
||||
:param aggs: ES AGGREGATE OBJECT
|
||||
:param decoders:
|
||||
:param coord: TURN ON LOCAL COORDINATE LOOKUP
|
||||
"""
|
||||
depth = max(d.start + d.num_columns for d in decoders)
|
||||
|
||||
def _aggs_iterator(agg, d):
|
||||
agg = drill(agg)
|
||||
|
||||
if d > 0:
|
||||
for k, v in agg.items():
|
||||
if k == "_match":
|
||||
v = drill(v)
|
||||
for i, b in enumerate(v.get("buckets", EMPTY_LIST)):
|
||||
b["_index"] = i
|
||||
for a, parts in _aggs_iterator(b, d - 1):
|
||||
yield a, parts + (b,)
|
||||
elif k == "_other":
|
||||
for b in v.get("buckets", EMPTY_LIST):
|
||||
for a, parts in _aggs_iterator(b, d - 1):
|
||||
yield a, parts + (Null,)
|
||||
elif k == "_missing":
|
||||
b = drill(v)
|
||||
for a, parts in _aggs_iterator(b, d - 1):
|
||||
yield a, parts + (b,)
|
||||
elif k.startswith("_join_"):
|
||||
v["key"] = int(k[6:])
|
||||
for a, parts in _aggs_iterator(v, d - 1):
|
||||
yield a, parts + (v,)
|
||||
else:
|
||||
for k, v in agg.items():
|
||||
if k == "_match":
|
||||
v = drill(v)
|
||||
for i, b in enumerate(v.get("buckets", EMPTY_LIST)):
|
||||
b["_index"] = i
|
||||
yield b, (b,)
|
||||
elif k == "_other":
|
||||
for b in v.get("buckets", EMPTY_LIST):
|
||||
yield b, (Null,)
|
||||
elif k == "_missing":
|
||||
b = drill(v,)
|
||||
yield b, (v,)
|
||||
elif k.startswith("_join_"):
|
||||
v["_index"] = int(k[6:])
|
||||
yield v, (v,)
|
||||
|
||||
if coord:
|
||||
for a, parts in _aggs_iterator(unwrap(aggs), depth - 1):
|
||||
coord = tuple(d.get_index(parts) for d in decoders)
|
||||
if any(c is None for c in coord):
|
||||
continue
|
||||
yield parts, coord, a
|
||||
else:
|
||||
for a, parts in _aggs_iterator(unwrap(aggs), depth - 1):
|
||||
yield parts, None, a
|
||||
|
||||
|
||||
|
||||
def count_dim(aggs, decoders):
|
||||
if any(isinstance(d, (DefaultDecoder, DimFieldListDecoder, ObjectDecoder)) for d in decoders):
|
||||
# ENUMERATE THE DOMAINS, IF UNKNOWN AT QUERY TIME
|
||||
for row, coord, agg in aggs_iterator(aggs, decoders, coord=False):
|
||||
for d in decoders:
|
||||
d.count(row)
|
||||
for d in decoders:
|
||||
d.done_count()
|
||||
new_edges = wrap([d.edge for d in decoders])
|
||||
return new_edges
|
||||
|
||||
|
||||
format_dispatch = {}
|
||||
from jx_elasticsearch.es14.format import format_cube
|
||||
|
||||
_ = format_cube
|
||||
|
|
@ -1,753 +0,0 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http:# mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from collections import Mapping
|
||||
|
||||
from jx_base.dimensions import Dimension
|
||||
from jx_base.domains import SimpleSetDomain, DefaultDomain, PARTITION
|
||||
from jx_base.expressions import TupleOp, TRUE
|
||||
from jx_base.query import MAX_LIMIT, DEFAULT_LIMIT
|
||||
from jx_elasticsearch.es14.expressions import Variable, NotOp, InOp, Literal, AndOp, InequalityOp, LeavesOp, LIST_TO_PIPE
|
||||
from jx_python import jx
|
||||
from mo_dots import wrap, set_default, coalesce, literal_field, Data, relative_field, unwraplist
|
||||
from mo_future import text_type
|
||||
from mo_json.typed_encoder import STRING, NUMBER, BOOLEAN
|
||||
from mo_json.typed_encoder import untype_path
|
||||
from mo_logs import Log
|
||||
from mo_logs.strings import quote, expand_template
|
||||
from mo_math import MAX, MIN, Math
|
||||
from pyLibrary.convert import string2boolean
|
||||
|
||||
|
||||
class AggsDecoder(object):
|
||||
def __new__(cls, e=None, query=None, *args, **kwargs):
|
||||
e.allowNulls = coalesce(e.allowNulls, True)
|
||||
|
||||
if e.value and e.domain.type == "default":
|
||||
# if query.groupby:
|
||||
# return object.__new__(DefaultDecoder, e)
|
||||
|
||||
if isinstance(e.value, text_type):
|
||||
Log.error("Expecting Variable or Expression, not plain string")
|
||||
|
||||
if isinstance(e.value, LeavesOp):
|
||||
return object.__new__(ObjectDecoder, e)
|
||||
elif isinstance(e.value, TupleOp):
|
||||
# THIS domain IS FROM A dimension THAT IS A SIMPLE LIST OF fields
|
||||
# JUST PULL THE FIELDS
|
||||
if not all(isinstance(t, Variable) for t in e.value.terms):
|
||||
Log.error("Can only handle variables in tuples")
|
||||
|
||||
e.domain = Data(
|
||||
dimension={"fields": e.value.terms}
|
||||
)
|
||||
return object.__new__(DimFieldListDecoder, e)
|
||||
elif isinstance(e.value, Variable):
|
||||
schema = query.frum.schema
|
||||
cols = schema.leaves(e.value.var)
|
||||
if not cols:
|
||||
return object.__new__(DefaultDecoder, e)
|
||||
if len(cols) != 1:
|
||||
return object.__new__(ObjectDecoder, e)
|
||||
col = cols[0]
|
||||
limit = coalesce(e.domain.limit, query.limit, DEFAULT_LIMIT)
|
||||
|
||||
if col.partitions != None:
|
||||
if col.multi > 1 and len(col.partitions) < 6:
|
||||
return object.__new__(MultivalueDecoder)
|
||||
|
||||
partitions = col.partitions[:limit:]
|
||||
if e.domain.sort==-1:
|
||||
partitions = list(reversed(sorted(partitions)))
|
||||
else:
|
||||
partitions = sorted(partitions)
|
||||
e.domain = SimpleSetDomain(partitions=partitions, limit=limit)
|
||||
else:
|
||||
e.domain = set_default(DefaultDomain(limit=limit), e.domain.__data__())
|
||||
return object.__new__(DefaultDecoder, e)
|
||||
|
||||
else:
|
||||
return object.__new__(DefaultDecoder, e)
|
||||
|
||||
if e.value and e.domain.type in PARTITION:
|
||||
return object.__new__(SetDecoder, e)
|
||||
if isinstance(e.domain.dimension, Dimension):
|
||||
e.domain = e.domain.dimension.getDomain()
|
||||
return object.__new__(SetDecoder, e)
|
||||
if e.value and e.domain.type == "time":
|
||||
return object.__new__(TimeDecoder, e)
|
||||
if e.range:
|
||||
return object.__new__(GeneralRangeDecoder, e)
|
||||
if e.value and e.domain.type == "duration":
|
||||
return object.__new__(DurationDecoder, e)
|
||||
elif e.value and e.domain.type == "range":
|
||||
return object.__new__(RangeDecoder, e)
|
||||
elif not e.value and e.domain.dimension.fields:
|
||||
# THIS domain IS FROM A dimension THAT IS A SIMPLE LIST OF fields
|
||||
# JUST PULL THE FIELDS
|
||||
fields = e.domain.dimension.fields
|
||||
if isinstance(fields, Mapping):
|
||||
Log.error("No longer allowed: All objects are expressions")
|
||||
else:
|
||||
return object.__new__(DimFieldListDecoder, e)
|
||||
elif not e.value and all(e.domain.partitions.where):
|
||||
return object.__new__(GeneralSetDecoder, e)
|
||||
else:
|
||||
Log.error("domain type of {{type}} is not supported yet", type=e.domain.type)
|
||||
|
||||
def __init__(self, edge, query, limit):
|
||||
self.start = None
|
||||
self.edge = edge
|
||||
self.name = literal_field(self.edge.name)
|
||||
self.query = query
|
||||
self.limit = limit
|
||||
self.schema = self.query.frum.schema
|
||||
|
||||
def append_query(self, es_query, start):
|
||||
Log.error("Not supported")
|
||||
|
||||
def count(self, row):
|
||||
pass
|
||||
|
||||
def done_count(self):
|
||||
pass
|
||||
|
||||
def get_value_from_row(self, row):
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_value(self, index):
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_index(self, row):
|
||||
raise NotImplementedError()
|
||||
|
||||
@property
|
||||
def num_columns(self):
|
||||
return 0
|
||||
|
||||
|
||||
class SetDecoder(AggsDecoder):
|
||||
|
||||
def __init__(self, edge, query, limit):
|
||||
AggsDecoder.__init__(self, edge, query, limit)
|
||||
domain = self.domain = edge.domain
|
||||
self.sorted = None
|
||||
self.pull = pull_functions[STRING]
|
||||
|
||||
# WE ASSUME IF THE VARIABLES MATCH, THEN THE SORT TERM AND EDGE TERM MATCH, AND WE SORT BY TERM
|
||||
# self.sorted = {1: "asc", -1: "desc", None: None}[getattr(edge.domain, 'sort', None)]
|
||||
edge_var = set(v.var for v in edge.value.vars())
|
||||
if query.sort:
|
||||
for s in query.sort:
|
||||
if not edge_var - set(v.var for v in s.value.vars()):
|
||||
self.sorted = {1: "asc", -1: "desc"}[s.sort]
|
||||
parts = jx.sort(domain.partitions, {"value": domain.key, "sort": s.sort})
|
||||
edge.domain = self.domain = SimpleSetDomain(key=domain.key, label=domain.label, partitions=parts)
|
||||
|
||||
def append_query(self, es_query, start):
|
||||
self.start = start
|
||||
domain = self.domain
|
||||
|
||||
domain_key = domain.key
|
||||
include, text_include = transpose(*(
|
||||
(
|
||||
float(v) if isinstance(v, (int, float)) else v,
|
||||
text_type(float(v)) if isinstance(v, (int, float)) else v
|
||||
)
|
||||
for v in (p[domain_key] for p in domain.partitions)
|
||||
))
|
||||
value = self.edge.value
|
||||
exists = AndOp("and", [
|
||||
value.exists(),
|
||||
InOp("in", [value, Literal("literal", include)])
|
||||
]).partial_eval()
|
||||
|
||||
limit = coalesce(self.limit, len(domain.partitions))
|
||||
|
||||
if isinstance(value, Variable):
|
||||
es_field = self.query.frum.schema.leaves(value.var)[0].es_column # ALREADY CHECKED THERE IS ONLY ONE
|
||||
terms = set_default({"terms": {
|
||||
"field": es_field,
|
||||
"size": limit,
|
||||
"order": {"_term": self.sorted} if self.sorted else None
|
||||
}}, es_query)
|
||||
else:
|
||||
terms = set_default({"terms": {
|
||||
"script": value.to_es_script(self.schema).script(self.schema),
|
||||
"size": limit
|
||||
}}, es_query)
|
||||
|
||||
if self.edge.allowNulls:
|
||||
missing = set_default(
|
||||
{"filter": NotOp("not", exists).to_esfilter(self.schema)},
|
||||
es_query
|
||||
)
|
||||
else:
|
||||
missing = None
|
||||
|
||||
return wrap({"aggs": {
|
||||
"_match": {
|
||||
"filter": exists.to_esfilter(self.schema),
|
||||
"aggs": {
|
||||
"_filter": terms
|
||||
}
|
||||
},
|
||||
"_missing": missing
|
||||
}})
|
||||
|
||||
def get_value(self, index):
|
||||
return self.domain.getKeyByIndex(index)
|
||||
|
||||
def get_value_from_row(self, row):
|
||||
return self.pull(row[self.start].get('key'))
|
||||
|
||||
def get_index(self, row):
|
||||
try:
|
||||
part = row[self.start]
|
||||
return self.domain.getIndexByKey(part.get('key'))
|
||||
except Exception as e:
|
||||
Log.error("problem", cause=e)
|
||||
|
||||
@property
|
||||
def num_columns(self):
|
||||
return 1
|
||||
|
||||
|
||||
def _range_composer(edge, domain, es_query, to_float, schema):
|
||||
# USE RANGES
|
||||
_min = coalesce(domain.min, MIN(domain.partitions.min))
|
||||
_max = coalesce(domain.max, MAX(domain.partitions.max))
|
||||
|
||||
if edge.allowNulls:
|
||||
missing_filter = set_default(
|
||||
{
|
||||
"filter": NotOp("not", AndOp("and", [
|
||||
edge.value.exists(),
|
||||
InequalityOp("gte", [edge.value, Literal(None, to_float(_min))]),
|
||||
InequalityOp("lt", [edge.value, Literal(None, to_float(_max))])
|
||||
]).partial_eval()).to_esfilter(schema)
|
||||
},
|
||||
es_query
|
||||
)
|
||||
else:
|
||||
missing_filter = None
|
||||
|
||||
if isinstance(edge.value, Variable):
|
||||
calc = {"field": schema.leaves(edge.value.var)[0].es_column}
|
||||
else:
|
||||
calc = {"script": edge.value.to_es_script(schema).script(schema)}
|
||||
|
||||
return wrap({"aggs": {
|
||||
"_match": set_default(
|
||||
{"range": calc},
|
||||
{"range": {"ranges": [{"from": to_float(p.min), "to": to_float(p.max)} for p in domain.partitions]}},
|
||||
es_query
|
||||
),
|
||||
"_missing": missing_filter
|
||||
}})
|
||||
|
||||
|
||||
class TimeDecoder(AggsDecoder):
|
||||
def append_query(self, es_query, start):
|
||||
self.start = start
|
||||
schema = self.query.frum.schema
|
||||
return _range_composer(self.edge, self.edge.domain, es_query, lambda x: x.unix, schema)
|
||||
|
||||
def get_value(self, index):
|
||||
return self.edge.domain.getKeyByIndex(index)
|
||||
|
||||
def get_index(self, row):
|
||||
domain = self.edge.domain
|
||||
part = row[self.start]
|
||||
if part == None:
|
||||
return len(domain.partitions)
|
||||
|
||||
f = coalesce(part.get('from'), part.get('key'))
|
||||
t = coalesce(part.get('to'), part.get('key'))
|
||||
if f == None or t == None:
|
||||
return len(domain.partitions)
|
||||
else:
|
||||
for p in domain.partitions:
|
||||
if p.min.unix <= f < p.max.unix:
|
||||
return p.dataIndex
|
||||
sample = part.copy
|
||||
sample.buckets = None
|
||||
Log.error("Expecting to find {{part}}", part=sample)
|
||||
|
||||
@property
|
||||
def num_columns(self):
|
||||
return 1
|
||||
|
||||
|
||||
class GeneralRangeDecoder(AggsDecoder):
|
||||
"""
|
||||
Accept an algebraic domain, and an edge with a `range` attribute
|
||||
This class assumes the `snapshot` version - where we only include
|
||||
partitions that have their `min` value in the range.
|
||||
"""
|
||||
|
||||
def __init__(self, edge, query, limit):
|
||||
AggsDecoder.__init__(self, edge, query, limit)
|
||||
if edge.domain.type == "time":
|
||||
self.to_float = lambda x: x.unix
|
||||
elif edge.domain.type == "range":
|
||||
self.to_float = lambda x: x
|
||||
else:
|
||||
Log.error("Unknown domain of type {{type}} for range edge", type=edge.domain.type)
|
||||
|
||||
def append_query(self, es_query, start):
|
||||
self.start = start
|
||||
|
||||
edge = self.edge
|
||||
range = edge.range
|
||||
domain = edge.domain
|
||||
|
||||
aggs = {}
|
||||
for i, p in enumerate(domain.partitions):
|
||||
filter_ = AndOp("and", [
|
||||
InequalityOp("lte", [range.min, Literal("literal", self.to_float(p.min))]),
|
||||
InequalityOp("gt", [range.max, Literal("literal", self.to_float(p.min))])
|
||||
])
|
||||
aggs["_join_" + text_type(i)] = set_default(
|
||||
{"filter": filter_.to_esfilter(self.schema)},
|
||||
es_query
|
||||
)
|
||||
|
||||
return wrap({"aggs": aggs})
|
||||
|
||||
def get_value(self, index):
|
||||
return self.edge.domain.getKeyByIndex(index)
|
||||
|
||||
def get_index(self, row):
|
||||
domain = self.edge.domain
|
||||
part = row[self.start]
|
||||
if part == None:
|
||||
return len(domain.partitions)
|
||||
return part["_index"]
|
||||
|
||||
@property
|
||||
def num_columns(self):
|
||||
return 1
|
||||
|
||||
|
||||
class GeneralSetDecoder(AggsDecoder):
|
||||
"""
|
||||
EXPECTING ALL PARTS IN partitions TO HAVE A where CLAUSE
|
||||
"""
|
||||
|
||||
def append_query(self, es_query, start):
|
||||
self.start = start
|
||||
|
||||
parts = self.edge.domain.partitions
|
||||
filters = []
|
||||
notty = []
|
||||
|
||||
for p in parts:
|
||||
w = p.where
|
||||
filters.append(AndOp("and", [w] + notty).to_esfilter(self.schema))
|
||||
notty.append(NotOp("not", w))
|
||||
|
||||
missing_filter = None
|
||||
if self.edge.allowNulls: # TODO: Use Expression.missing().esfilter() TO GET OPTIMIZED FILTER
|
||||
missing_filter = set_default(
|
||||
{"filter": AndOp("and", notty).to_esfilter(self.schema)},
|
||||
es_query
|
||||
)
|
||||
|
||||
return wrap({"aggs": {
|
||||
"_match": set_default(
|
||||
{"filters": {"filters": filters}},
|
||||
es_query
|
||||
),
|
||||
"_missing": missing_filter
|
||||
}})
|
||||
|
||||
def get_value(self, index):
|
||||
return self.edge.domain.getKeyByIndex(index)
|
||||
|
||||
def get_index(self, row):
|
||||
domain = self.edge.domain
|
||||
part = row[self.start]
|
||||
# if part == None:
|
||||
# return len(domain.partitions)
|
||||
return part.get("_index", len(domain.partitions))
|
||||
|
||||
@property
|
||||
def num_columns(self):
|
||||
return 1
|
||||
|
||||
|
||||
class DurationDecoder(AggsDecoder):
|
||||
def append_query(self, es_query, start):
|
||||
self.start = start
|
||||
return _range_composer(self.edge, self.edge.domain, es_query, lambda x: x.seconds, self.schema)
|
||||
|
||||
def get_value(self, index):
|
||||
return self.edge.domain.getKeyByIndex(index)
|
||||
|
||||
def get_index(self, row):
|
||||
domain = self.edge.domain
|
||||
part = row[self.start]
|
||||
if part == None:
|
||||
return len(domain.partitions)
|
||||
|
||||
f = coalesce(part.get('from'), part.get('key'))
|
||||
t = coalesce(part.get('to'), part.get('key'))
|
||||
if f == None or t == None:
|
||||
return len(domain.partitions)
|
||||
else:
|
||||
for p in domain.partitions:
|
||||
if p.min.seconds <= f < p.max.seconds:
|
||||
return p.dataIndex
|
||||
sample = part.copy
|
||||
sample.buckets = None
|
||||
Log.error("Expecting to find {{part}}", part=sample)
|
||||
|
||||
@property
|
||||
def num_columns(self):
|
||||
return 1
|
||||
|
||||
|
||||
class RangeDecoder(AggsDecoder):
|
||||
def append_query(self, es_query, start):
|
||||
self.start = start
|
||||
return _range_composer(self.edge, self.edge.domain, es_query, lambda x: x, self.schema)
|
||||
|
||||
def get_value(self, index):
|
||||
return self.edge.domain.getKeyByIndex(index)
|
||||
|
||||
def get_index(self, row):
|
||||
domain = self.edge.domain
|
||||
part = row[self.start]
|
||||
if part == None:
|
||||
return len(domain.partitions)
|
||||
|
||||
f = coalesce(part.get('from'), part.get('key'))
|
||||
t = coalesce(part.get('to'), part.get('key'))
|
||||
if f == None or t == None:
|
||||
return len(domain.partitions)
|
||||
else:
|
||||
for p in domain.partitions:
|
||||
if p.min <= f < p.max:
|
||||
return p.dataIndex
|
||||
sample = part.copy
|
||||
sample.buckets = None
|
||||
Log.error("Expecting to find {{part}}", part=sample)
|
||||
|
||||
@property
|
||||
def num_columns(self):
|
||||
return 1
|
||||
|
||||
|
||||
class MultivalueDecoder(SetDecoder):
|
||||
def __init__(self, edge, query, limit):
|
||||
AggsDecoder.__init__(self, edge, query, limit)
|
||||
self.var = edge.value.var
|
||||
self.values = query.frum.schema[edge.value.var][0].partitions
|
||||
self.parts = []
|
||||
|
||||
def append_query(self, es_query, start):
|
||||
self.start = start
|
||||
|
||||
es_field = self.query.frum.schema.leaves(self.var)[0].es_column
|
||||
es_query = wrap({"aggs": {
|
||||
"_match": set_default({"terms": {
|
||||
"script": expand_template(LIST_TO_PIPE, {"expr": 'doc[' + quote(es_field) + '].values'})
|
||||
}}, es_query)
|
||||
}})
|
||||
|
||||
return es_query
|
||||
|
||||
def get_value_from_row(self, row):
|
||||
values = row[self.start]['key'].replace("||", "\b").split("|")
|
||||
if len(values) == 2:
|
||||
return None
|
||||
return unwraplist([v.replace("\b", "|") for v in values[1:-1]])
|
||||
|
||||
def get_index(self, row):
|
||||
find = self.get_value_from_row(row)
|
||||
try:
|
||||
return self.parts.index(find)
|
||||
except Exception:
|
||||
self.parts.append(find)
|
||||
return len(self.parts)-1
|
||||
|
||||
@property
|
||||
def num_columns(self):
|
||||
return 1
|
||||
|
||||
|
||||
class ObjectDecoder(AggsDecoder):
|
||||
def __init__(self, edge, query, limit):
|
||||
AggsDecoder.__init__(self, edge, query, limit)
|
||||
if isinstance(edge.value, LeavesOp):
|
||||
prefix = edge.value.term.var
|
||||
flatter = lambda k: literal_field(relative_field(k, prefix))
|
||||
else:
|
||||
prefix = edge.value.var
|
||||
flatter = lambda k: relative_field(k, prefix)
|
||||
|
||||
self.put, self.fields = transpose(*[
|
||||
(flatter(untype_path(c.names["."])), c.es_column)
|
||||
for c in query.frum.schema.leaves(prefix)
|
||||
])
|
||||
|
||||
self.domain = self.edge.domain = wrap({"dimension": {"fields": self.fields}})
|
||||
self.domain.limit = Math.min(coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT)
|
||||
self.parts = list()
|
||||
self.key2index = {}
|
||||
self.computed_domain = False
|
||||
|
||||
def append_query(self, es_query, start):
|
||||
self.start = start
|
||||
for i, v in enumerate(self.fields):
|
||||
nest = wrap({"aggs": {
|
||||
"_match": set_default({"terms": {
|
||||
"field": v,
|
||||
"size": self.domain.limit
|
||||
}}, es_query),
|
||||
"_missing": set_default(
|
||||
{"filter": {"missing": {"field": v}}},
|
||||
es_query
|
||||
)
|
||||
}})
|
||||
es_query = nest
|
||||
return es_query
|
||||
|
||||
def count(self, row):
|
||||
value = self.get_value_from_row(row)
|
||||
i = self.key2index.get(value)
|
||||
if i is None:
|
||||
i = self.key2index[value] = len(self.parts)
|
||||
self.parts.append(value)
|
||||
|
||||
def done_count(self):
|
||||
self.computed_domain = True
|
||||
self.edge.domain = self.domain = SimpleSetDomain(
|
||||
key="value",
|
||||
partitions=[{"value": p, "dataIndex": i} for i, p in enumerate(self.parts)]
|
||||
)
|
||||
|
||||
def get_index(self, row):
|
||||
value = self.get_value_from_row(row)
|
||||
if self.computed_domain:
|
||||
return self.domain.getIndexByKey(value)
|
||||
|
||||
if value is None:
|
||||
return -1
|
||||
i = self.key2index.get(value)
|
||||
if i is None:
|
||||
i = self.key2index[value] = len(self.parts)
|
||||
self.parts.append(value)
|
||||
return i
|
||||
|
||||
def get_value_from_row(self, row):
|
||||
part = row[self.start:self.start + self.num_columns:]
|
||||
if not part[0]['doc_count']:
|
||||
return None
|
||||
|
||||
output = Data()
|
||||
for k, v in zip(self.put, part):
|
||||
output[k] = v.get('key')
|
||||
return output
|
||||
|
||||
@property
|
||||
def num_columns(self):
|
||||
return len(self.fields)
|
||||
|
||||
|
||||
class DefaultDecoder(SetDecoder):
|
||||
# FOR DECODING THE default DOMAIN TYPE (UNKNOWN-AT-QUERY-TIME SET OF VALUES)
|
||||
|
||||
def __init__(self, edge, query, limit):
|
||||
AggsDecoder.__init__(self, edge, query, limit)
|
||||
self.domain = edge.domain
|
||||
self.domain.limit = Math.min(coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT)
|
||||
self.parts = list()
|
||||
self.key2index = {}
|
||||
self.computed_domain = False
|
||||
self.script = self.edge.value.partial_eval().to_es_script(self.schema)
|
||||
self.pull = pull_functions[self.script.data_type]
|
||||
self.missing = self.script.miss.partial_eval()
|
||||
self.exists = NotOp("not", self.missing).partial_eval()
|
||||
|
||||
# WHEN SORT VALUE AND EDGE VALUE MATCHES, WE SORT BY TERM
|
||||
sort_candidates = [s for s in self.query.sort if s.value == self.edge.value]
|
||||
if sort_candidates:
|
||||
self.es_order = {"_term": {1: "asc", -1: "desc"}[sort_candidates[0].sort]}
|
||||
else:
|
||||
self.es_order = None
|
||||
|
||||
def append_query(self, es_query, start):
|
||||
self.start = start
|
||||
|
||||
if not isinstance(self.edge.value, Variable):
|
||||
if self.exists is TRUE:
|
||||
# IF True THEN WE DO NOT NEED THE _filter OR THE _missing (THIS RARELY HAPPENS THOUGH)
|
||||
output = wrap({"aggs": {
|
||||
"_match": set_default(
|
||||
{"terms": {
|
||||
"script": self.script.expr,
|
||||
"size": self.domain.limit,
|
||||
"order": self.es_order
|
||||
}},
|
||||
es_query
|
||||
)
|
||||
}})
|
||||
else:
|
||||
output = wrap({"aggs": {
|
||||
"_match": { # _match AND _filter REVERSED SO _match LINES UP WITH _missing
|
||||
"filter": self.exists.to_esfilter(self.schema),
|
||||
"aggs": {
|
||||
"_filter": set_default(
|
||||
{"terms": {
|
||||
"script": self.script.expr,
|
||||
"size": self.domain.limit,
|
||||
"order": self.es_order
|
||||
}},
|
||||
es_query
|
||||
)
|
||||
}
|
||||
},
|
||||
"_missing": set_default(
|
||||
{"filter": self.missing.to_esfilter(self.schema)},
|
||||
es_query
|
||||
)
|
||||
}})
|
||||
return output
|
||||
else:
|
||||
output = wrap({"aggs": {
|
||||
"_match": set_default(
|
||||
{"terms": {
|
||||
"field": self.schema.leaves(self.edge.value.var)[0].es_column,
|
||||
"size": self.domain.limit,
|
||||
"order": self.es_order
|
||||
}},
|
||||
es_query
|
||||
),
|
||||
"_missing": set_default(
|
||||
{"filter": self.missing.to_esfilter(self.schema)},
|
||||
es_query
|
||||
)
|
||||
}})
|
||||
return output
|
||||
|
||||
def count(self, row):
|
||||
part = row[self.start]
|
||||
if part['doc_count']:
|
||||
if part.get('key') != None:
|
||||
self.parts.append(self.pull(part.get('key')))
|
||||
else:
|
||||
self.edge.allowNulls = True # OK! WE WILL ALLOW NULLS
|
||||
|
||||
def done_count(self):
|
||||
self.edge.domain = self.domain = SimpleSetDomain(
|
||||
partitions=jx.sort(set(self.parts))
|
||||
)
|
||||
self.parts = None
|
||||
self.computed_domain = True
|
||||
|
||||
def get_index(self, row):
|
||||
if self.computed_domain:
|
||||
try:
|
||||
part = row[self.start]
|
||||
return self.domain.getIndexByKey(self.pull(part.get('key')))
|
||||
except Exception as e:
|
||||
Log.error("problem", cause=e)
|
||||
else:
|
||||
try:
|
||||
part = row[self.start]
|
||||
key = self.pull(part.get('key'))
|
||||
i = self.key2index.get(key)
|
||||
if i is None:
|
||||
i = len(self.parts)
|
||||
part = {"key": key, "dataIndex": i}
|
||||
self.parts.append(part)
|
||||
self.key2index[key] = i
|
||||
return i
|
||||
except Exception as e:
|
||||
Log.error("problem", cause=e)
|
||||
|
||||
@property
|
||||
def num_columns(self):
|
||||
return 1
|
||||
|
||||
|
||||
class DimFieldListDecoder(SetDecoder):
|
||||
def __init__(self, edge, query, limit):
|
||||
AggsDecoder.__init__(self, edge, query, limit)
|
||||
edge.allowNulls = False
|
||||
self.fields = edge.domain.dimension.fields
|
||||
self.domain = self.edge.domain
|
||||
self.domain.limit = Math.min(coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT)
|
||||
self.parts = list()
|
||||
|
||||
def append_query(self, es_query, start):
|
||||
# TODO: USE "reverse_nested" QUERY TO PULL THESE
|
||||
self.start = start
|
||||
for i, v in enumerate(self.fields):
|
||||
exists = v.exists().partial_eval()
|
||||
nest = wrap({"aggs": {"_match": {
|
||||
"filter": exists.to_esfilter(self.schema),
|
||||
"aggs": {"_filter": set_default({"terms": {
|
||||
"field": self.schema.leaves(v.var)[0].es_column,
|
||||
"size": self.domain.limit
|
||||
}}, es_query)}
|
||||
}}})
|
||||
nest.aggs._missing = set_default(
|
||||
{"filter": NotOp("not", exists).to_esfilter(self.schema)},
|
||||
es_query
|
||||
)
|
||||
es_query = nest
|
||||
|
||||
if self.domain.where:
|
||||
filter_ = self.domain.where.partial_eval().to_esfilter(self.schema)
|
||||
es_query = {"aggs": {"_filter": set_default({"filter": filter_}, es_query)}}
|
||||
|
||||
return es_query
|
||||
|
||||
def count(self, row):
|
||||
part = row[self.start:self.start + len(self.fields):]
|
||||
if part[0]['doc_count']:
|
||||
value = tuple(p.get("key") for p in part)
|
||||
self.parts.append(value)
|
||||
|
||||
def done_count(self):
|
||||
columns = map(text_type, range(len(self.fields)))
|
||||
parts = wrap([{text_type(i): p for i, p in enumerate(part)} for part in set(self.parts)])
|
||||
self.parts = None
|
||||
sorted_parts = jx.sort(parts, columns)
|
||||
|
||||
self.edge.domain = self.domain = SimpleSetDomain(
|
||||
key="value",
|
||||
partitions=[{"value": tuple(v[k] for k in columns), "dataIndex": i} for i, v in enumerate(sorted_parts)]
|
||||
)
|
||||
|
||||
def get_index(self, row):
|
||||
part = row[self.start:self.start + len(self.fields):]
|
||||
if part[0]['doc_count']==0:
|
||||
return None
|
||||
find = tuple(p.get("key") for p in part)
|
||||
output = self.domain.getIndexByKey(find)
|
||||
return output
|
||||
@property
|
||||
def num_columns(self):
|
||||
return len(self.fields)
|
||||
|
||||
|
||||
pull_functions = {
|
||||
STRING: lambda x: x,
|
||||
NUMBER: lambda x: float(x) if x !=None else None,
|
||||
BOOLEAN: string2boolean
|
||||
}
|
||||
|
|
@ -1,238 +0,0 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http:# mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from jx_base.expressions import NULL
|
||||
from jx_base.query import DEFAULT_LIMIT
|
||||
from jx_elasticsearch import post as es_post
|
||||
from jx_elasticsearch.es14.expressions import split_expression_by_depth, AndOp, Variable, LeavesOp
|
||||
from jx_elasticsearch.es14.setop import format_dispatch, get_pull_function, get_pull
|
||||
from jx_elasticsearch.es14.util import jx_sort_to_es_sort, es_query_template
|
||||
from jx_python.expressions import compile_expression, jx_expression_to_function
|
||||
from mo_dots import split_field, FlatList, listwrap, literal_field, coalesce, Data, concat_field, set_default, relative_field, startswith_field
|
||||
from mo_json.typed_encoder import NESTED
|
||||
from mo_json.typed_encoder import untype_path, EXISTS_TYPE
|
||||
from mo_logs import Log
|
||||
from mo_threads import Thread
|
||||
from mo_times.timer import Timer
|
||||
from pyLibrary import convert
|
||||
|
||||
EXPRESSION_PREFIX = "_expr."
|
||||
|
||||
_ = convert
|
||||
|
||||
|
||||
def is_deepop(es, query):
|
||||
if query.edges or query.groupby:
|
||||
return False
|
||||
if all(s.aggregate not in (None, "none") for s in listwrap(query.select)):
|
||||
return False
|
||||
if len(split_field(query.frum.name)) > 1:
|
||||
return True
|
||||
|
||||
# ASSUME IT IS NESTED IF WE ARE ASKING FOR NESTED COLUMNS
|
||||
# vars_ = query_get_all_vars(query)
|
||||
# columns = query.frum.get_columns()
|
||||
# if any(c for c in columns if len(c.nested_path) != 1 and c.name in vars_):
|
||||
# return True
|
||||
return False
|
||||
|
||||
|
||||
def es_deepop(es, query):
|
||||
schema = query.frum.schema
|
||||
query_path = schema.query_path[0]
|
||||
|
||||
# TODO: FIX THE GREAT SADNESS CAUSED BY EXECUTING post_expressions
|
||||
# THE EXPRESSIONS SHOULD BE PUSHED TO THE CONTAINER: ES ALLOWS
|
||||
# {"inner_hit":{"script_fields":[{"script":""}...]}}, BUT THEN YOU
|
||||
# LOOSE "_source" BUT GAIN "fields", FORCING ALL FIELDS TO BE EXPLICIT
|
||||
post_expressions = {}
|
||||
es_query, es_filters = es_query_template(query_path)
|
||||
|
||||
# SPLIT WHERE CLAUSE BY DEPTH
|
||||
wheres = split_expression_by_depth(query.where, schema)
|
||||
for i, f in enumerate(es_filters):
|
||||
script = AndOp("and", wheres[i]).partial_eval().to_esfilter(schema)
|
||||
set_default(f, script)
|
||||
|
||||
if not wheres[1]:
|
||||
# WITHOUT NESTED CONDITIONS, WE MUST ALSO RETURN DOCS WITH NO NESTED RECORDS
|
||||
more_filter = {
|
||||
"and": [
|
||||
es_filters[0],
|
||||
{"missing": {"field": untype_path(query_path) + "." + EXISTS_TYPE}}
|
||||
]
|
||||
}
|
||||
else:
|
||||
more_filter = None
|
||||
|
||||
es_query.size = coalesce(query.limit, DEFAULT_LIMIT)
|
||||
|
||||
# es_query.sort = jx_sort_to_es_sort(query.sort)
|
||||
map_to_es_columns = schema.map_to_es()
|
||||
# {c.names["."]: c.es_column for c in schema.leaves(".")}
|
||||
query_for_es = query.map(map_to_es_columns)
|
||||
es_query.sort = jx_sort_to_es_sort(query_for_es.sort, schema)
|
||||
|
||||
es_query.fields = []
|
||||
|
||||
is_list = isinstance(query.select, list)
|
||||
new_select = FlatList()
|
||||
|
||||
i = 0
|
||||
for s in listwrap(query.select):
|
||||
if isinstance(s.value, LeavesOp) and isinstance(s.value.term, Variable):
|
||||
# IF THERE IS A *, THEN INSERT THE EXTRA COLUMNS
|
||||
leaves = schema.leaves(s.value.term.var)
|
||||
col_names = set()
|
||||
for c in leaves:
|
||||
if c.nested_path[0] == ".":
|
||||
if c.jx_type == NESTED:
|
||||
continue
|
||||
es_query.fields += [c.es_column]
|
||||
c_name = untype_path(c.names[query_path])
|
||||
col_names.add(c_name)
|
||||
new_select.append({
|
||||
"name": concat_field(s.name, c_name),
|
||||
"nested_path": c.nested_path[0],
|
||||
"put": {"name": concat_field(s.name, literal_field(c_name)), "index": i, "child": "."},
|
||||
"pull": get_pull_function(c)
|
||||
})
|
||||
i += 1
|
||||
|
||||
# REMOVE DOTS IN PREFIX IF NAME NOT AMBIGUOUS
|
||||
for n in new_select:
|
||||
if n.name.startswith("..") and n.name.lstrip(".") not in col_names:
|
||||
n.put.name = n.name = n.name.lstrip(".")
|
||||
col_names.add(n.name)
|
||||
elif isinstance(s.value, Variable):
|
||||
net_columns = schema.leaves(s.value.var)
|
||||
if not net_columns:
|
||||
new_select.append({
|
||||
"name": s.name,
|
||||
"nested_path": ".",
|
||||
"put": {"name": s.name, "index": i, "child": "."},
|
||||
"pull": NULL
|
||||
})
|
||||
else:
|
||||
for n in net_columns:
|
||||
pull = get_pull_function(n)
|
||||
if n.nested_path[0] == ".":
|
||||
if n.jx_type == NESTED:
|
||||
continue
|
||||
es_query.fields += [n.es_column]
|
||||
|
||||
# WE MUST FIGURE OUT WHICH NAMESSPACE s.value.var IS USING SO WE CAN EXTRACT THE child
|
||||
for np in n.nested_path:
|
||||
c_name = untype_path(n.names[np])
|
||||
if startswith_field(c_name, s.value.var):
|
||||
child = relative_field(c_name, s.value.var)
|
||||
break
|
||||
else:
|
||||
child = relative_field(untype_path(n.names[n.nested_path[0]]), s.value.var)
|
||||
|
||||
new_select.append({
|
||||
"name": s.name,
|
||||
"pull": pull,
|
||||
"nested_path": n.nested_path[0],
|
||||
"put": {
|
||||
"name": s.name,
|
||||
"index": i,
|
||||
"child": child
|
||||
}
|
||||
})
|
||||
i += 1
|
||||
else:
|
||||
expr = s.value
|
||||
for v in expr.vars():
|
||||
for c in schema[v.var]:
|
||||
if c.nested_path[0] == ".":
|
||||
es_query.fields += [c.es_column]
|
||||
# else:
|
||||
# Log.error("deep field not expected")
|
||||
|
||||
pull_name = EXPRESSION_PREFIX + s.name
|
||||
map_to_local = MapToLocal(schema)
|
||||
pull = jx_expression_to_function(pull_name)
|
||||
post_expressions[pull_name] = compile_expression(expr.map(map_to_local).to_python())
|
||||
|
||||
new_select.append({
|
||||
"name": s.name if is_list else ".",
|
||||
"pull": pull,
|
||||
"value": expr.__data__(),
|
||||
"put": {"name": s.name, "index": i, "child": "."}
|
||||
})
|
||||
i += 1
|
||||
|
||||
# <COMPLICATED> ES needs two calls to get all documents
|
||||
more = []
|
||||
def get_more(please_stop):
|
||||
more.append(es_post(
|
||||
es,
|
||||
Data(
|
||||
query={"filtered": {"filter": more_filter}},
|
||||
fields=es_query.fields
|
||||
),
|
||||
query.limit
|
||||
))
|
||||
if more_filter:
|
||||
need_more = Thread.run("get more", target=get_more)
|
||||
|
||||
with Timer("call to ES") as call_timer:
|
||||
data = es_post(es, es_query, query.limit)
|
||||
|
||||
# EACH A HIT IS RETURNED MULTIPLE TIMES FOR EACH INNER HIT, WITH INNER HIT INCLUDED
|
||||
def inners():
|
||||
for t in data.hits.hits:
|
||||
for i in t.inner_hits[literal_field(query_path)].hits.hits:
|
||||
t._inner = i._source
|
||||
for k, e in post_expressions.items():
|
||||
t[k] = e(t)
|
||||
yield t
|
||||
if more_filter:
|
||||
Thread.join(need_more)
|
||||
for t in more[0].hits.hits:
|
||||
yield t
|
||||
#</COMPLICATED>
|
||||
|
||||
try:
|
||||
formatter, groupby_formatter, mime_type = format_dispatch[query.format]
|
||||
|
||||
output = formatter(inners(), new_select, query)
|
||||
output.meta.timing.es = call_timer.duration
|
||||
output.meta.content_type = mime_type
|
||||
output.meta.es_query = es_query
|
||||
return output
|
||||
except Exception as e:
|
||||
Log.error("problem formatting", e)
|
||||
|
||||
|
||||
class MapToLocal(object):
|
||||
"""
|
||||
MAP FROM RELATIVE/ABSOLUTE NAMESPACE TO PYTHON THAT WILL EXTRACT RESULT
|
||||
"""
|
||||
def __init__(self, map_to_columns):
|
||||
self.map_to_columns = map_to_columns
|
||||
|
||||
def __getitem__(self, item):
|
||||
return self.get(item)
|
||||
|
||||
def get(self, item):
|
||||
cs = self.map_to_columns[item]
|
||||
if len(cs) == 0:
|
||||
return "Null"
|
||||
elif len(cs) == 1:
|
||||
return get_pull(cs[0])
|
||||
else:
|
||||
return "coalesce(" + (",".join(get_pull(c) for c in cs)) + ")"
|
||||
|
||||
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -1,316 +0,0 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http:# mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from jx_base.expressions import TupleOp
|
||||
from jx_elasticsearch.es14.aggs import count_dim, aggs_iterator, format_dispatch, drill
|
||||
from jx_python.containers.cube import Cube
|
||||
from mo_collections.matrix import Matrix
|
||||
from mo_dots import Data, set_default, wrap, split_field, coalesce
|
||||
from mo_future import sort_using_key
|
||||
from mo_logs import Log
|
||||
from mo_logs.strings import quote
|
||||
from pyLibrary import convert
|
||||
|
||||
FunctionType = type(lambda: 1)
|
||||
|
||||
def format_cube(decoders, aggs, start, query, select):
|
||||
# decoders = sorted(decoders, key=lambda d: -d.edge.dim) # REVERSE DECODER ORDER, BECAUSE ES QUERY WAS BUILT IN REVERSE ORDER
|
||||
new_edges = count_dim(aggs, decoders)
|
||||
|
||||
dims = []
|
||||
for e in new_edges:
|
||||
if isinstance(e.value, TupleOp):
|
||||
e.allowNulls = False
|
||||
|
||||
extra = 0 if e.allowNulls is False else 1
|
||||
dims.append(len(e.domain.partitions) + extra)
|
||||
|
||||
dims = tuple(dims)
|
||||
matricies = [(s, Matrix(dims=dims, zeros=s.default)) for s in select]
|
||||
for row, coord, agg in aggs_iterator(aggs, decoders):
|
||||
for s, m in matricies:
|
||||
try:
|
||||
v = s.pull(agg)
|
||||
m[coord] = v
|
||||
except Exception as e:
|
||||
# THIS HAPPENS WHEN ES RETURNS MORE TUPLE COMBINATIONS THAN DOCUMENTS
|
||||
if agg.get('doc_count') != 0:
|
||||
Log.error("Programmer error", cause=e)
|
||||
|
||||
cube = Cube(
|
||||
query.select,
|
||||
sort_using_key(new_edges, key=lambda e: e.dim), # ENSURE EDGES ARE IN SAME ORDER AS QUERY
|
||||
{s.name: m for s, m in matricies}
|
||||
)
|
||||
cube.frum = query
|
||||
return cube
|
||||
|
||||
|
||||
def format_cube_from_aggop(decoders, aggs, start, query, select):
|
||||
agg = drill(aggs)
|
||||
matricies = [(s, Matrix(dims=[], zeros=s.default)) for s in select]
|
||||
for s, m in matricies:
|
||||
m[tuple()] = s.pull(agg)
|
||||
cube = Cube(query.select, [], {s.name: m for s, m in matricies})
|
||||
cube.frum = query
|
||||
return cube
|
||||
|
||||
|
||||
def format_table(decoders, aggs, start, query, select):
|
||||
new_edges = count_dim(aggs, decoders)
|
||||
header = new_edges.name + select.name
|
||||
|
||||
def data():
|
||||
dims = tuple(len(e.domain.partitions) + (0 if e.allowNulls is False else 1) for e in new_edges)
|
||||
is_sent = Matrix(dims=dims, zeros=0)
|
||||
|
||||
if query.sort and not query.groupby:
|
||||
all_coord = is_sent._all_combos() # TRACK THE EXPECTED COMBINATIONS
|
||||
for row, coord, agg in aggs_iterator(aggs, decoders):
|
||||
missing_coord = all_coord.next()
|
||||
while coord != missing_coord:
|
||||
record = [d.get_value(missing_coord[i]) for i, d in enumerate(decoders)]
|
||||
for s in select:
|
||||
if s.aggregate == "count":
|
||||
record.append(0)
|
||||
else:
|
||||
record.append(None)
|
||||
yield record
|
||||
missing_coord = all_coord.next()
|
||||
|
||||
output = [d.get_value(c) for c, d in zip(coord, decoders)]
|
||||
for s in select:
|
||||
output.append(s.pull(agg))
|
||||
yield output
|
||||
else:
|
||||
for row, coord, agg in aggs_iterator(aggs, decoders):
|
||||
is_sent[coord] = 1
|
||||
|
||||
output = [d.get_value(c) for c, d in zip(coord, decoders)]
|
||||
for s in select:
|
||||
output.append(s.pull(agg))
|
||||
yield output
|
||||
|
||||
# EMIT THE MISSING CELLS IN THE CUBE
|
||||
if not query.groupby:
|
||||
for c, v in is_sent:
|
||||
if not v:
|
||||
record = [d.get_value(c[i]) for i, d in enumerate(decoders)]
|
||||
for s in select:
|
||||
if s.aggregate == "count":
|
||||
record.append(0)
|
||||
else:
|
||||
record.append(None)
|
||||
yield record
|
||||
|
||||
return Data(
|
||||
meta={"format": "table"},
|
||||
header=header,
|
||||
data=list(data())
|
||||
)
|
||||
|
||||
|
||||
def format_table_from_groupby(decoders, aggs, start, query, select):
|
||||
header = [d.edge.name.replace("\\.", ".") for d in decoders] + select.name
|
||||
|
||||
def data():
|
||||
for row, coord, agg in aggs_iterator(aggs, decoders):
|
||||
if agg.get('doc_count', 0) == 0:
|
||||
continue
|
||||
output = [d.get_value_from_row(row) for d in decoders]
|
||||
for s in select:
|
||||
output.append(s.pull(agg))
|
||||
yield output
|
||||
|
||||
return Data(
|
||||
meta={"format": "table"},
|
||||
header=header,
|
||||
data=list(data())
|
||||
)
|
||||
|
||||
|
||||
def format_table_from_aggop(decoders, aggs, start, query, select):
|
||||
header = select.name
|
||||
agg = drill(aggs)
|
||||
row = []
|
||||
for s in select:
|
||||
row.append(s.pull(agg))
|
||||
|
||||
return Data(
|
||||
meta={"format": "table"},
|
||||
header=header,
|
||||
data=[row]
|
||||
)
|
||||
|
||||
|
||||
def format_tab(decoders, aggs, start, query, select):
|
||||
table = format_table(decoders, aggs, start, query, select)
|
||||
|
||||
def data():
|
||||
yield "\t".join(map(quote, table.header))
|
||||
for d in table.data:
|
||||
yield "\t".join(map(quote, d))
|
||||
|
||||
return data()
|
||||
|
||||
|
||||
def format_csv(decoders, aggs, start, query, select):
|
||||
table = format_table(decoders, aggs, start, query, select)
|
||||
|
||||
def data():
|
||||
yield ", ".join(map(quote, table.header))
|
||||
for d in table.data:
|
||||
yield ", ".join(map(quote, d))
|
||||
|
||||
return data()
|
||||
|
||||
|
||||
def format_list_from_groupby(decoders, aggs, start, query, select):
|
||||
def data():
|
||||
for row, coord, agg in aggs_iterator(aggs, decoders):
|
||||
if agg.get('doc_count', 0) == 0:
|
||||
continue
|
||||
output = Data()
|
||||
for g, d in zip(query.groupby, decoders):
|
||||
output[coalesce(g.put.name, g.name)] = d.get_value_from_row(row)
|
||||
|
||||
for s in select:
|
||||
output[s.name] = s.pull(agg)
|
||||
yield output
|
||||
|
||||
for g in query.groupby:
|
||||
g.put.name = coalesce(g.put.name, g.name)
|
||||
|
||||
output = Data(
|
||||
meta={"format": "list"},
|
||||
data=list(data())
|
||||
)
|
||||
return output
|
||||
|
||||
|
||||
def format_list(decoders, aggs, start, query, select):
|
||||
new_edges = count_dim(aggs, decoders)
|
||||
|
||||
def data():
|
||||
dims = tuple(len(e.domain.partitions) + (0 if e.allowNulls is False else 1) for e in new_edges)
|
||||
|
||||
is_sent = Matrix(dims=dims, zeros=0)
|
||||
if query.sort and not query.groupby:
|
||||
# TODO: USE THE format_table() TO PRODUCE THE NEEDED VALUES INSTEAD OF DUPLICATING LOGIC HERE
|
||||
all_coord = is_sent._all_combos() # TRACK THE EXPECTED COMBINATIONS
|
||||
for _, coord, agg in aggs_iterator(aggs, decoders):
|
||||
missing_coord = all_coord.next()
|
||||
while coord != missing_coord:
|
||||
# INSERT THE MISSING COORDINATE INTO THE GENERATION
|
||||
output = Data()
|
||||
for i, d in enumerate(decoders):
|
||||
output[query.edges[i].name] = d.get_value(missing_coord[i])
|
||||
|
||||
for s in select:
|
||||
if s.aggregate == "count":
|
||||
output[s.name] = 0
|
||||
yield output
|
||||
missing_coord = all_coord.next()
|
||||
|
||||
output = Data()
|
||||
for e, c, d in zip(query.edges, coord, decoders):
|
||||
output[e.name] = d.get_value(c)
|
||||
|
||||
for s in select:
|
||||
output[s.name] = s.pull(agg)
|
||||
yield output
|
||||
else:
|
||||
|
||||
for row, coord, agg in aggs_iterator(aggs, decoders):
|
||||
is_sent[coord] = 1
|
||||
|
||||
output = Data()
|
||||
for e, c, d in zip(query.edges, coord, decoders):
|
||||
output[e.name] = d.get_value(c)
|
||||
|
||||
for s in select:
|
||||
output[s.name] = s.pull(agg)
|
||||
yield output
|
||||
|
||||
# EMIT THE MISSING CELLS IN THE CUBE
|
||||
if not query.groupby:
|
||||
for c, v in is_sent:
|
||||
if not v:
|
||||
output = Data()
|
||||
for i, d in enumerate(decoders):
|
||||
output[query.edges[i].name] = d.get_value(c[i])
|
||||
|
||||
for s in select:
|
||||
if s.aggregate == "count":
|
||||
output[s.name] = 0
|
||||
yield output
|
||||
|
||||
output = Data(
|
||||
meta={"format": "list"},
|
||||
data=list(data())
|
||||
)
|
||||
return output
|
||||
|
||||
|
||||
def format_list_from_aggop(decoders, aggs, start, query, select):
|
||||
agg = drill(aggs)
|
||||
|
||||
if isinstance(query.select, list):
|
||||
item = Data()
|
||||
for s in select:
|
||||
item[s.name] = s.pull(agg)
|
||||
else:
|
||||
item = select[0].pull(agg)
|
||||
|
||||
if query.edges or query.groupby:
|
||||
return wrap({
|
||||
"meta": {"format": "list"},
|
||||
"data": [item]
|
||||
})
|
||||
else:
|
||||
return wrap({
|
||||
"meta": {"format": "value"},
|
||||
"data": item
|
||||
})
|
||||
|
||||
|
||||
def format_line(decoders, aggs, start, query, select):
|
||||
list = format_list(decoders, aggs, start, query, select)
|
||||
|
||||
def data():
|
||||
for d in list.data:
|
||||
yield convert.value2json(d)
|
||||
|
||||
return data()
|
||||
|
||||
|
||||
set_default(format_dispatch, {
|
||||
None: (format_cube, format_table_from_groupby, format_cube_from_aggop, "application/json"),
|
||||
"cube": (format_cube, format_cube, format_cube_from_aggop, "application/json"),
|
||||
"table": (format_table, format_table_from_groupby, format_table_from_aggop, "application/json"),
|
||||
"list": (format_list, format_list_from_groupby, format_list_from_aggop, "application/json"),
|
||||
# "csv": (format_csv, format_csv_from_groupby, "text/csv"),
|
||||
# "tab": (format_tab, format_tab_from_groupby, "text/tab-separated-values"),
|
||||
# "line": (format_line, format_line_from_groupby, "application/json")
|
||||
})
|
||||
|
||||
|
||||
def _get(v, k, d):
|
||||
for p in split_field(k):
|
||||
try:
|
||||
v = v.get(p)
|
||||
if v is None:
|
||||
return d
|
||||
except Exception:
|
||||
v = [vv.get(p) for vv in v]
|
||||
return v
|
|
@ -1,378 +0,0 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http:# mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from collections import Mapping
|
||||
|
||||
from jx_base.domains import ALGEBRAIC
|
||||
from jx_base.expressions import IDENTITY
|
||||
from jx_base.query import DEFAULT_LIMIT
|
||||
from jx_elasticsearch import post as es_post
|
||||
from jx_elasticsearch.es14.expressions import Variable, LeavesOp
|
||||
from jx_elasticsearch.es14.util import jx_sort_to_es_sort, es_query_template, es_and, es_or, es_script
|
||||
from jx_python.containers.cube import Cube
|
||||
from jx_python.expressions import jx_expression_to_function
|
||||
from mo_collections.matrix import Matrix
|
||||
from mo_dots import coalesce, split_field, set_default, Data, unwraplist, literal_field, unwrap, wrap, concat_field, relative_field, join_field, listwrap
|
||||
from mo_dots.lists import FlatList
|
||||
from mo_json.typed_encoder import NESTED
|
||||
from mo_json.typed_encoder import untype_path, unnest_path, untyped
|
||||
from mo_logs import Log
|
||||
from mo_math import AND
|
||||
from mo_math import MAX
|
||||
from mo_times.timer import Timer
|
||||
|
||||
format_dispatch = {}
|
||||
|
||||
|
||||
def is_setop(es, query):
|
||||
select = listwrap(query.select)
|
||||
|
||||
if not query.edges:
|
||||
isDeep = len(split_field(query.frum.name)) > 1 # LOOKING INTO NESTED WILL REQUIRE A SCRIPT
|
||||
simpleAgg = AND([s.aggregate in ("count", "none") for s in select]) # CONVERTING esfilter DEFINED PARTS WILL REQUIRE SCRIPT
|
||||
|
||||
# NO EDGES IMPLIES SIMPLER QUERIES: EITHER A SET OPERATION, OR RETURN SINGLE AGGREGATE
|
||||
if simpleAgg or isDeep:
|
||||
return True
|
||||
else:
|
||||
isSmooth = AND((e.domain.type in ALGEBRAIC and e.domain.interval == "none") for e in query.edges)
|
||||
if isSmooth:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def es_setop(es, query):
|
||||
schema = query.frum.schema
|
||||
|
||||
es_query, filters = es_query_template(schema.query_path[0])
|
||||
nested_filter = None
|
||||
set_default(filters[0], query.where.partial_eval().to_esfilter(schema))
|
||||
es_query.size = coalesce(query.limit, DEFAULT_LIMIT)
|
||||
es_query.fields = FlatList()
|
||||
|
||||
selects = wrap([s.copy() for s in listwrap(query.select)])
|
||||
new_select = FlatList()
|
||||
schema = query.frum.schema
|
||||
# columns = schema.columns
|
||||
# nested_columns = set(c.names["."] for c in columns if c.nested_path[0] != ".")
|
||||
|
||||
es_query.sort = jx_sort_to_es_sort(query.sort, schema)
|
||||
|
||||
put_index = 0
|
||||
for select in selects:
|
||||
# IF THERE IS A *, THEN INSERT THE EXTRA COLUMNS
|
||||
if isinstance(select.value, LeavesOp) and isinstance(select.value.term, Variable):
|
||||
term = select.value.term
|
||||
leaves = schema.leaves(term.var)
|
||||
for c in leaves:
|
||||
full_name = concat_field(select.name, relative_field(untype_path(c.names["."]), term.var))
|
||||
if c.jx_type == NESTED:
|
||||
es_query.fields = ["_source"]
|
||||
new_select.append({
|
||||
"name": full_name,
|
||||
"value": Variable(c.es_column),
|
||||
"put": {"name": literal_field(full_name), "index": put_index, "child": "."},
|
||||
"pull": get_pull_source(c.es_column)
|
||||
})
|
||||
put_index += 1
|
||||
elif c.nested_path[0] != ".":
|
||||
pass # THE NESTED PARENT WILL CAPTURE THIS
|
||||
else:
|
||||
es_query.fields += [c.es_column]
|
||||
new_select.append({
|
||||
"name": full_name,
|
||||
"value": Variable(c.es_column),
|
||||
"put": {"name": literal_field(full_name), "index": put_index, "child": "."}
|
||||
})
|
||||
put_index += 1
|
||||
elif isinstance(select.value, Variable):
|
||||
s_column = select.value.var
|
||||
# LEAVES OF OBJECT
|
||||
leaves = schema.leaves(s_column)
|
||||
nested_selects = {}
|
||||
if leaves:
|
||||
if s_column == '.' or any(c.jx_type == NESTED for c in leaves):
|
||||
# PULL WHOLE NESTED ARRAYS
|
||||
es_query.fields = ["_source"]
|
||||
for c in leaves:
|
||||
if len(c.nested_path) == 1:
|
||||
jx_name = untype_path(c.names["."])
|
||||
new_select.append({
|
||||
"name": select.name,
|
||||
"value": Variable(c.es_column),
|
||||
"put": {"name": select.name, "index": put_index, "child": relative_field(jx_name, s_column)},
|
||||
"pull": get_pull_source(c.es_column)
|
||||
})
|
||||
else:
|
||||
# PULL ONLY WHAT'S NEEDED
|
||||
for c in leaves:
|
||||
if len(c.nested_path) == 1:
|
||||
jx_name = untype_path(c.names["."])
|
||||
if c.jx_type == NESTED:
|
||||
es_query.fields = ["_source"]
|
||||
new_select.append({
|
||||
"name": select.name,
|
||||
"value": Variable(c.es_column),
|
||||
"put": {"name": select.name, "index": put_index, "child": relative_field(jx_name, s_column)},
|
||||
"pull": get_pull_source(c.es_column)
|
||||
})
|
||||
|
||||
else:
|
||||
es_query.fields += [c.es_column]
|
||||
new_select.append({
|
||||
"name": select.name,
|
||||
"value": Variable(c.es_column),
|
||||
"put": {"name": select.name, "index": put_index, "child": relative_field(jx_name, s_column)}
|
||||
})
|
||||
else:
|
||||
if not nested_filter:
|
||||
where = filters[0].copy()
|
||||
nested_filter = [where]
|
||||
for k in filters[0].keys():
|
||||
filters[0][k] = None
|
||||
set_default(
|
||||
filters[0],
|
||||
es_and([where, es_or(nested_filter)])
|
||||
)
|
||||
|
||||
nested_path = c.nested_path[0]
|
||||
if nested_path not in nested_selects:
|
||||
where = nested_selects[nested_path] = Data()
|
||||
nested_filter += [where]
|
||||
where.nested.path = nested_path
|
||||
where.nested.query.match_all = {}
|
||||
where.nested.inner_hits._source = False
|
||||
where.nested.inner_hits.fields += [c.es_column]
|
||||
|
||||
child = relative_field(untype_path(c.names[schema.query_path[0]]), s_column)
|
||||
pull = accumulate_nested_doc(nested_path, Variable(relative_field(s_column, unnest_path(nested_path))))
|
||||
new_select.append({
|
||||
"name": select.name,
|
||||
"value": select.value,
|
||||
"put": {
|
||||
"name": select.name,
|
||||
"index": put_index,
|
||||
"child": child
|
||||
},
|
||||
"pull": pull
|
||||
})
|
||||
else:
|
||||
nested_selects[nested_path].nested.inner_hits.fields += [c.es_column]
|
||||
else:
|
||||
new_select.append({
|
||||
"name": select.name,
|
||||
"value": Variable("$dummy"),
|
||||
"put": {"name": select.name, "index": put_index, "child": "."}
|
||||
})
|
||||
put_index += 1
|
||||
else:
|
||||
painless = select.value.partial_eval().to_es_script(schema)
|
||||
es_query.script_fields[literal_field(select.name)] = es_script(painless.script(schema))
|
||||
new_select.append({
|
||||
"name": select.name,
|
||||
"pull": jx_expression_to_function("fields." + literal_field(select.name)),
|
||||
"put": {"name": select.name, "index": put_index, "child": "."}
|
||||
})
|
||||
put_index += 1
|
||||
|
||||
for n in new_select:
|
||||
if n.pull:
|
||||
continue
|
||||
elif isinstance(n.value, Variable):
|
||||
if es_query.fields[0] == "_source":
|
||||
es_query.fields = ["_source"]
|
||||
n.pull = get_pull_source(n.value.var)
|
||||
else:
|
||||
n.pull = jx_expression_to_function(concat_field("fields", literal_field(n.value.var)))
|
||||
else:
|
||||
Log.error("Do not know what to do")
|
||||
|
||||
with Timer("call to ES") as call_timer:
|
||||
Log.note("{{data}}", data=es_query)
|
||||
data = es_post(es, es_query, query.limit)
|
||||
|
||||
T = data.hits.hits
|
||||
|
||||
try:
|
||||
formatter, groupby_formatter, mime_type = format_dispatch[query.format]
|
||||
|
||||
output = formatter(T, new_select, query)
|
||||
output.meta.timing.es = call_timer.duration
|
||||
output.meta.content_type = mime_type
|
||||
output.meta.es_query = es_query
|
||||
return output
|
||||
except Exception as e:
|
||||
Log.error("problem formatting", e)
|
||||
|
||||
|
||||
def accumulate_nested_doc(nested_path, expr=IDENTITY):
|
||||
"""
|
||||
:param nested_path: THE PATH USED TO EXTRACT THE NESTED RECORDS
|
||||
:param expr: FUNCTION USED ON THE NESTED OBJECT TO GET SPECIFIC VALUE
|
||||
:return: THE DE_TYPED NESTED OBJECT ARRAY
|
||||
"""
|
||||
name = literal_field(nested_path)
|
||||
def output(doc):
|
||||
acc = []
|
||||
for h in doc.inner_hits[name].hits.hits:
|
||||
i = h._nested.offset
|
||||
obj = Data()
|
||||
for f, v in h.fields.items():
|
||||
local_path = untype_path(relative_field(f, nested_path))
|
||||
obj[local_path] = unwraplist(v)
|
||||
# EXTEND THE LIST TO THE LENGTH WE REQUIRE
|
||||
for _ in range(len(acc), i+1):
|
||||
acc.append(None)
|
||||
acc[i] = expr(obj)
|
||||
return acc
|
||||
return output
|
||||
|
||||
|
||||
def format_list(T, select, query=None):
|
||||
data = []
|
||||
if isinstance(query.select, list):
|
||||
for row in T:
|
||||
r = Data()
|
||||
for s in select:
|
||||
v = s.pull(row)
|
||||
r[s.put.name][s.put.child] = unwraplist(v)
|
||||
data.append(r if r else None)
|
||||
elif isinstance(query.select.value, LeavesOp):
|
||||
for row in T:
|
||||
r = Data()
|
||||
for s in select:
|
||||
r[s.put.name][s.put.child] = unwraplist(s.pull(row))
|
||||
data.append(r if r else None)
|
||||
else:
|
||||
for row in T:
|
||||
r = None
|
||||
for s in select:
|
||||
v = unwraplist(s.pull(row))
|
||||
if v is None:
|
||||
continue
|
||||
if s.put.child == ".":
|
||||
r = v
|
||||
else:
|
||||
if r is None:
|
||||
r = Data()
|
||||
r[s.put.child] = v
|
||||
|
||||
data.append(r)
|
||||
|
||||
return Data(
|
||||
meta={"format": "list"},
|
||||
data=data
|
||||
)
|
||||
|
||||
|
||||
def format_table(T, select, query=None):
|
||||
data = []
|
||||
num_columns = (MAX(select.put.index) + 1)
|
||||
for row in T:
|
||||
r = [None] * num_columns
|
||||
for s in select:
|
||||
value = unwraplist(s.pull(row))
|
||||
|
||||
if value == None:
|
||||
continue
|
||||
|
||||
index, child = s.put.index, s.put.child
|
||||
if child == ".":
|
||||
r[index] = value
|
||||
else:
|
||||
if r[index] is None:
|
||||
r[index] = Data()
|
||||
r[index][child] = value
|
||||
|
||||
data.append(r)
|
||||
|
||||
header = [None] * num_columns
|
||||
|
||||
if isinstance(query.select, Mapping) and not isinstance(query.select.value, LeavesOp):
|
||||
for s in select:
|
||||
header[s.put.index] = s.name
|
||||
else:
|
||||
for s in select:
|
||||
if header[s.put.index]:
|
||||
continue
|
||||
if s.name == ".":
|
||||
header[s.put.index] = "."
|
||||
else:
|
||||
header[s.put.index] = s.name
|
||||
|
||||
return Data(
|
||||
meta={"format": "table"},
|
||||
header=header,
|
||||
data=data
|
||||
)
|
||||
|
||||
|
||||
def format_cube(T, select, query=None):
|
||||
table = format_table(T, select, query)
|
||||
|
||||
if len(table.data) == 0:
|
||||
return Cube(
|
||||
select,
|
||||
edges=[{"name": "rownum", "domain": {"type": "rownum", "min": 0, "max": 0, "interval": 1}}],
|
||||
data={h: Matrix(list=[]) for i, h in enumerate(table.header)}
|
||||
)
|
||||
|
||||
cols = transpose(*unwrap(table.data))
|
||||
return Cube(
|
||||
select,
|
||||
edges=[{"name": "rownum", "domain": {"type": "rownum", "min": 0, "max": len(table.data), "interval": 1}}],
|
||||
data={h: Matrix(list=cols[i]) for i, h in enumerate(table.header)}
|
||||
)
|
||||
|
||||
|
||||
set_default(format_dispatch, {
|
||||
None: (format_cube, None, "application/json"),
|
||||
"cube": (format_cube, None, "application/json"),
|
||||
"table": (format_table, None, "application/json"),
|
||||
"list": (format_list, None, "application/json")
|
||||
})
|
||||
|
||||
|
||||
def get_pull(column):
|
||||
if column.nested_path[0] == ".":
|
||||
return concat_field("fields", literal_field(column.es_column))
|
||||
else:
|
||||
depth = len(split_field(column.nested_path[0]))
|
||||
rel_name = split_field(column.es_column)[depth:]
|
||||
return join_field(["_inner"] + rel_name)
|
||||
|
||||
|
||||
def get_pull_function(column):
|
||||
return jx_expression_to_function(get_pull(column))
|
||||
|
||||
|
||||
def get_pull_source(es_column):
|
||||
def output(row):
|
||||
return untyped(row._source[es_column])
|
||||
return output
|
||||
|
||||
|
||||
def get_pull_stats(stats_name, median_name):
|
||||
return jx_expression_to_function({"select": [
|
||||
{"name": "count", "value": stats_name + ".count"},
|
||||
{"name": "sum", "value": stats_name + ".sum"},
|
||||
{"name": "min", "value": stats_name + ".min"},
|
||||
{"name": "max", "value": stats_name + ".max"},
|
||||
{"name": "avg", "value": stats_name + ".avg"},
|
||||
{"name": "sos", "value": stats_name + ".sum_of_squares"},
|
||||
{"name": "std", "value": stats_name + ".std_deviation"},
|
||||
{"name": "var", "value": stats_name + ".variance"},
|
||||
{"name": "median", "value": median_name + ".values.50\\.0"}
|
||||
]})
|
||||
|
|
@ -1,135 +0,0 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http:# mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from jx_elasticsearch.es14.expressions import Variable
|
||||
from mo_dots import wrap
|
||||
from mo_future import text_type
|
||||
from mo_json.typed_encoder import STRING, BOOLEAN, NUMBER, OBJECT
|
||||
from mo_logs import Log
|
||||
|
||||
|
||||
def es_query_template(path):
|
||||
"""
|
||||
RETURN TEMPLATE AND PATH-TO-FILTER AS A 2-TUPLE
|
||||
:param path: THE NESTED PATH (NOT INCLUDING TABLE NAME)
|
||||
:return:
|
||||
"""
|
||||
|
||||
if not isinstance(path, text_type):
|
||||
Log.error("expecting path to be a string")
|
||||
|
||||
if path != ".":
|
||||
f0 = {}
|
||||
f1 = {}
|
||||
output = wrap({
|
||||
"query": {"filtered": {"filter": es_and([
|
||||
f0,
|
||||
{"nested": {
|
||||
"path": path,
|
||||
"filter": f1,
|
||||
"inner_hits": {"size": 100000}
|
||||
}}
|
||||
])}},
|
||||
"from": 0,
|
||||
"size": 0,
|
||||
"sort": []
|
||||
})
|
||||
return output, wrap([f0, f1])
|
||||
else:
|
||||
f0 = {}
|
||||
output = wrap({
|
||||
"query": {"filtered": {"filter": es_and([f0])}},
|
||||
"from": 0,
|
||||
"size": 0,
|
||||
"sort": []
|
||||
})
|
||||
return output, wrap([f0])
|
||||
|
||||
|
||||
def jx_sort_to_es_sort(sort, schema):
|
||||
if not sort:
|
||||
return []
|
||||
|
||||
output = []
|
||||
for s in sort:
|
||||
if isinstance(s.value, Variable):
|
||||
cols = schema.leaves(s.value.var)
|
||||
if s.sort == -1:
|
||||
types = OBJECT, STRING, NUMBER, BOOLEAN
|
||||
else:
|
||||
types = BOOLEAN, NUMBER, STRING, OBJECT
|
||||
|
||||
for type in types:
|
||||
for c in cols:
|
||||
if c.jx_type == type:
|
||||
if s.sort == -1:
|
||||
output.append({c.es_column: "desc"})
|
||||
else:
|
||||
output.append(c.es_column)
|
||||
else:
|
||||
from mo_logs import Log
|
||||
|
||||
Log.error("do not know how to handle")
|
||||
return output
|
||||
|
||||
|
||||
# FOR ELASTICSEARCH aggs
|
||||
aggregates = {
|
||||
"none": "none",
|
||||
"one": "count",
|
||||
"cardinality": "cardinality",
|
||||
"sum": "sum",
|
||||
"add": "sum",
|
||||
"count": "value_count",
|
||||
"maximum": "max",
|
||||
"minimum": "min",
|
||||
"max": "max",
|
||||
"min": "min",
|
||||
"mean": "avg",
|
||||
"average": "avg",
|
||||
"avg": "avg",
|
||||
"median": "median",
|
||||
"percentile": "percentile",
|
||||
"N": "count",
|
||||
"s0": "count",
|
||||
"s1": "sum",
|
||||
"s2": "sum_of_squares",
|
||||
"std": "std_deviation",
|
||||
"stddev": "std_deviation",
|
||||
"union": "union",
|
||||
"var": "variance",
|
||||
"variance": "variance",
|
||||
"stats": "stats"
|
||||
}
|
||||
|
||||
NON_STATISTICAL_AGGS = {"none", "one"}
|
||||
|
||||
|
||||
def es_and(terms):
|
||||
return wrap({"and": terms})
|
||||
|
||||
|
||||
def es_or(terms):
|
||||
return wrap({"or": terms})
|
||||
|
||||
|
||||
def es_not(term):
|
||||
return wrap({"not": term})
|
||||
|
||||
|
||||
def es_script(term):
|
||||
return wrap({"script": term})
|
||||
|
||||
|
||||
def es_missing(term):
|
||||
return {"missing": {"field": term}}
|
|
@ -7,26 +7,28 @@
|
|||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from jx_base import container
|
||||
from mo_future import is_text, is_binary
|
||||
from jx_base import Column, container
|
||||
from jx_base.container import Container
|
||||
from jx_base.dimensions import Dimension
|
||||
from jx_base.expressions import jx_expression
|
||||
from jx_base.query import QueryOp
|
||||
from jx_base.language import is_op
|
||||
from jx_elasticsearch.es52.aggs import es_aggsop, is_aggsop
|
||||
from jx_elasticsearch.es52.deep import is_deepop, es_deepop
|
||||
from jx_elasticsearch.es52.setop import is_setop, es_setop
|
||||
from jx_elasticsearch.es52.deep import es_deepop, is_deepop
|
||||
from jx_elasticsearch.es52.setop import es_setop, is_setop
|
||||
from jx_elasticsearch.es52.util import aggregates
|
||||
from jx_elasticsearch.meta import ElasticsearchMetadata, Table
|
||||
from jx_python import jx
|
||||
from mo_dots import Data, unwrap, coalesce, split_field, join_field, wrap, listwrap
|
||||
from mo_json import value2json
|
||||
from mo_dots import Data, coalesce, is_list, join_field, listwrap, split_field, startswith_field, unwrap, wrap
|
||||
from mo_future import sort_using_key
|
||||
from mo_json import EXISTS, OBJECT, value2json
|
||||
from mo_json.typed_encoder import EXISTS_TYPE
|
||||
from mo_kwargs import override
|
||||
from mo_logs import Log, Except
|
||||
from mo_logs import Except, Log
|
||||
from mo_times import Date
|
||||
from pyLibrary.env import elasticsearch, http
|
||||
|
||||
|
||||
|
@ -86,6 +88,41 @@ class ES52(Container):
|
|||
Log.error("Expecting given typed {{typed}} to match {{is_typed}}", typed=typed, is_typed=is_typed)
|
||||
self.typed = typed
|
||||
|
||||
if not typed:
|
||||
# ADD EXISTENCE COLUMNS
|
||||
all_paths = {".": None} # MAP FROM path TO parent TO MAKE A TREE
|
||||
|
||||
def nested_path_of(v):
|
||||
if not v:
|
||||
return []
|
||||
else:
|
||||
return [v] + nested_path_of(all_paths[v])
|
||||
|
||||
all = sort_using_key(set(step for path in self.snowflake.query_paths for step in path), key=lambda p: len(split_field(p)))
|
||||
for step in sorted(all):
|
||||
if step in all_paths:
|
||||
continue
|
||||
else:
|
||||
best = '.'
|
||||
for candidate in all_paths.keys():
|
||||
if startswith_field(step, candidate):
|
||||
if startswith_field(candidate, best):
|
||||
best = candidate
|
||||
all_paths[step] = best
|
||||
for p in all_paths.keys():
|
||||
nested_path = nested_path_of(all_paths[p])
|
||||
if not nested_path:
|
||||
nested_path = ['.']
|
||||
self.namespace.meta.columns.add(Column(
|
||||
name=p,
|
||||
es_column=p,
|
||||
es_index=self.name,
|
||||
es_type=OBJECT,
|
||||
jx_type=EXISTS,
|
||||
nested_path=nested_path,
|
||||
last_updated=Date.now()
|
||||
))
|
||||
|
||||
@property
|
||||
def snowflake(self):
|
||||
return self._namespace.get_snowflake(self.es.settings.alias)
|
||||
|
@ -140,7 +177,7 @@ class ES52(Container):
|
|||
)
|
||||
|
||||
frum = query["from"]
|
||||
if isinstance(frum, QueryOp):
|
||||
if is_op(frum, QueryOp):
|
||||
result = self.query(frum)
|
||||
q2 = query.copy()
|
||||
q2.frum = result
|
||||
|
@ -161,7 +198,7 @@ class ES52(Container):
|
|||
Log.error("problem", e)
|
||||
|
||||
def addDimension(self, dim):
|
||||
if isinstance(dim, list):
|
||||
if is_list(dim):
|
||||
Log.error("Expecting dimension to be a object, not a list:\n{{dim}}", dim= dim)
|
||||
self._addDimension(dim, [])
|
||||
|
||||
|
@ -198,12 +235,11 @@ class ES52(Container):
|
|||
es_index = self.es.cluster.get_index(read_only=False, alias=None, kwargs=self.es.settings)
|
||||
|
||||
schema = table.schema
|
||||
es_filter = jx_expression(command.where).to_esfilter(schema)
|
||||
|
||||
# GET IDS OF DOCUMENTS
|
||||
query = {
|
||||
"from": command['update'],
|
||||
"select": ["_id"] + [
|
||||
"select": [{"value": "_id"}] + [
|
||||
{"name": k, "value": v}
|
||||
for k, v in command.set.items()
|
||||
],
|
||||
|
@ -234,7 +270,8 @@ class ES52(Container):
|
|||
Log.error("could not update: {{error}}", error=[e.error for i in response["items"] for e in i.values() if e.status not in (200, 201)])
|
||||
|
||||
# DELETE BY QUERY, IF NEEDED
|
||||
if '.' in listwrap(command.clear):
|
||||
if "." in listwrap(command.clear):
|
||||
es_filter = self.es.cluster.lang[jx_expression(command.where)].to_esfilter(schema)
|
||||
self.es.delete_record(es_filter)
|
||||
return
|
||||
|
||||
|
|
|
@ -7,34 +7,45 @@
|
|||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from collections import deque
|
||||
|
||||
from jx_base.domains import SetDomain
|
||||
from jx_base.expressions import TupleOp, NULL
|
||||
from jx_base.query import DEFAULT_LIMIT, MAX_LIMIT
|
||||
from jx_base.expressions import NULL, TupleOp, Variable as Variable_
|
||||
from jx_base.query import DEFAULT_LIMIT
|
||||
from jx_base.language import is_op
|
||||
from jx_elasticsearch import post as es_post
|
||||
from jx_elasticsearch.es52.decoders import DefaultDecoder, AggsDecoder, ObjectDecoder, DimFieldListDecoder
|
||||
from jx_elasticsearch.es52.expressions import split_expression_by_depth, AndOp, Variable, NullOp
|
||||
from jx_elasticsearch.es52.decoders import AggsDecoder
|
||||
from jx_elasticsearch.es52.es_query import Aggs, ComplexAggs, ExprAggs, FilterAggs, NestedAggs, TermsAggs, simplify
|
||||
from jx_elasticsearch.es52.expressions import AndOp, ES52, split_expression_by_path
|
||||
from jx_elasticsearch.es52.painless import Painless
|
||||
from jx_elasticsearch.es52.setop import get_pull_stats
|
||||
from jx_elasticsearch.es52.util import aggregates
|
||||
from jx_python import jx
|
||||
from jx_python.expressions import jx_expression_to_function
|
||||
from mo_dots import listwrap, Data, wrap, literal_field, set_default, coalesce, Null, split_field, FlatList, unwrap, unwraplist
|
||||
from mo_future import text_type
|
||||
from mo_json.typed_encoder import encode_property, EXISTS
|
||||
from mo_dots import Data, Null, coalesce, join_field, listwrap, literal_field, unwrap, unwraplist, wrap
|
||||
from mo_future import first, is_text, text_type
|
||||
from mo_json import EXISTS, NESTED, OBJECT
|
||||
from mo_json.typed_encoder import encode_property
|
||||
from mo_logs import Log
|
||||
from mo_logs.strings import quote, expand_template
|
||||
from mo_math import Math, MAX, UNION
|
||||
from mo_logs.strings import expand_template, quote
|
||||
import mo_math
|
||||
from mo_times.timer import Timer
|
||||
|
||||
DEBUG = False
|
||||
|
||||
COMPARE_TUPLE = """
|
||||
(a, b)->{
|
||||
int i=0;
|
||||
for (dummy in a){ //ONLY THIS FOR LOOP IS ACCEPTED (ALL OTHER FORMS THROW NullPointerException)
|
||||
if (a[i]==null) return -1*({{dir}});
|
||||
if (b[i]==null) return 1*({{dir}});
|
||||
if (a[i]==null){
|
||||
if (b[i]==null){
|
||||
return 0;
|
||||
}else{
|
||||
return -1*({{dir}});
|
||||
}//endif
|
||||
}else if (b[i]==null) return {{dir}};
|
||||
|
||||
if (a[i]!=b[i]) {
|
||||
if (a[i] instanceof Boolean){
|
||||
|
@ -82,12 +93,15 @@ def is_aggsop(es, query):
|
|||
return False
|
||||
|
||||
|
||||
def get_decoders_by_depth(query):
|
||||
def get_decoders_by_path(query):
|
||||
"""
|
||||
RETURN A LIST OF DECODER ARRAYS, ONE ARRAY FOR EACH NESTED DEPTH
|
||||
RETURN MAP FROM QUERY PATH TO LIST OF DECODER ARRAYS
|
||||
|
||||
:param query:
|
||||
:return:
|
||||
"""
|
||||
schema = query.frum.schema
|
||||
output = FlatList()
|
||||
output = Data()
|
||||
|
||||
if query.edges:
|
||||
if query.sort and query.format != "cube":
|
||||
|
@ -99,7 +113,7 @@ def get_decoders_by_depth(query):
|
|||
|
||||
for edge in wrap(coalesce(query.edges, query.groupby, [])):
|
||||
limit = coalesce(edge.domain.limit, query.limit, DEFAULT_LIMIT)
|
||||
if edge.value != None and not isinstance(edge.value, NullOp):
|
||||
if edge.value != None and not edge.value is NULL:
|
||||
edge = edge.copy()
|
||||
vars_ = edge.value.vars()
|
||||
for v in vars_:
|
||||
|
@ -119,32 +133,25 @@ def get_decoders_by_depth(query):
|
|||
for p in edge.domain.partitions:
|
||||
vars_ |= p.where.vars()
|
||||
|
||||
try:
|
||||
vars_ |= edge.value.vars()
|
||||
depths = set(len(c.nested_path) - 1 for v in vars_ for c in schema.leaves(v.var))
|
||||
if -1 in depths:
|
||||
Log.error(
|
||||
"Do not know of column {{column}}",
|
||||
column=unwraplist([v for v in vars_ if schema[v] == None])
|
||||
)
|
||||
if len(depths) > 1:
|
||||
Log.error("expression {{expr|quote}} spans tables, can not handle", expr=edge.value)
|
||||
max_depth = MAX(depths)
|
||||
while len(output) <= max_depth:
|
||||
output.append([])
|
||||
except Exception as e:
|
||||
# USUALLY THE SCHEMA IS EMPTY, SO WE ASSUME THIS IS A SIMPLE QUERY
|
||||
max_depth = 0
|
||||
output.append([])
|
||||
vars_ |= edge.value.vars()
|
||||
depths = set(c.nested_path[0] for v in vars_ for c in schema.leaves(v.var))
|
||||
if not depths:
|
||||
Log.error(
|
||||
"Do not know of column {{column}}",
|
||||
column=unwraplist([v for v in vars_ if schema[v] == None])
|
||||
)
|
||||
if len(depths) > 1:
|
||||
Log.error("expression {{expr|quote}} spans tables, can not handle", expr=edge.value)
|
||||
|
||||
output[max_depth].append(AggsDecoder(edge, query, limit))
|
||||
decoder = AggsDecoder(edge, query, limit)
|
||||
output[literal_field(first(depths))] += [decoder]
|
||||
return output
|
||||
|
||||
|
||||
def sort_edges(query, prop):
|
||||
ordered_edges = []
|
||||
remaining_edges = getattr(query, prop)
|
||||
for s in query.sort:
|
||||
for s in jx.reverse(query.sort):
|
||||
for e in remaining_edges:
|
||||
if e.value == s.value:
|
||||
if isinstance(e.domain, SetDomain):
|
||||
|
@ -158,45 +165,59 @@ def sort_edges(query, prop):
|
|||
Log.error("Can not sort by {{expr}}, can only sort by an existing edge expression", expr=s.value)
|
||||
|
||||
ordered_edges.extend(remaining_edges)
|
||||
for i, o in enumerate(ordered_edges):
|
||||
o.dim = i # REORDER THE EDGES
|
||||
return ordered_edges
|
||||
|
||||
|
||||
def es_aggsop(es, frum, query):
|
||||
query = query.copy() # WE WILL MARK UP THIS QUERY
|
||||
schema = frum.schema
|
||||
query_path = schema.query_path[0]
|
||||
select = listwrap(query.select)
|
||||
|
||||
es_query = Data()
|
||||
new_select = Data() # MAP FROM canonical_name (USED FOR NAMES IN QUERY) TO SELECT MAPPING
|
||||
formula = []
|
||||
for s in select:
|
||||
if s.aggregate == "count" and isinstance(s.value, Variable) and s.value.var == ".":
|
||||
if schema.query_path == ".":
|
||||
s.pull = jx_expression_to_function("doc_count")
|
||||
else:
|
||||
s.pull = jx_expression_to_function({"coalesce": ["_nested.doc_count", "doc_count", 0]})
|
||||
elif isinstance(s.value, Variable):
|
||||
if is_op(s.value, Variable_):
|
||||
s.query_path = query_path
|
||||
if s.aggregate == "count":
|
||||
new_select["count_"+literal_field(s.value.var)] += [s]
|
||||
else:
|
||||
new_select[literal_field(s.value.var)] += [s]
|
||||
elif s.aggregate:
|
||||
split_select = split_expression_by_path(s.value, schema, lang=Painless)
|
||||
for si_key, si_value in split_select.items():
|
||||
if si_value:
|
||||
if s.query_path:
|
||||
Log.error("can not handle more than one depth per select")
|
||||
s.query_path = si_key
|
||||
formula.append(s)
|
||||
|
||||
for canonical_name, many in new_select.items():
|
||||
acc = Aggs()
|
||||
for _, many in new_select.items():
|
||||
for s in many:
|
||||
columns = frum.schema.values(s.value.var)
|
||||
canonical_name = s.name
|
||||
if s.aggregate in ("value_count", "count"):
|
||||
columns = frum.schema.values(s.value.var, exclude_type=(OBJECT, NESTED))
|
||||
else:
|
||||
columns = frum.schema.values(s.value.var)
|
||||
|
||||
if s.aggregate == "count":
|
||||
canonical_names = []
|
||||
for column in columns:
|
||||
cn = literal_field(column.es_column + "_count")
|
||||
es_name = column.es_column + "_count"
|
||||
if column.jx_type == EXISTS:
|
||||
canonical_names.append(cn + ".doc_count")
|
||||
es_query.aggs[cn].filter.range = {column.es_column: {"gt": 0}}
|
||||
if column.nested_path[0] == query_path:
|
||||
canonical_names.append("doc_count")
|
||||
acc.add(NestedAggs(column.nested_path[0]).add(
|
||||
ComplexAggs(s)
|
||||
))
|
||||
else:
|
||||
canonical_names.append(cn+ ".value")
|
||||
es_query.aggs[cn].value_count.field = column.es_column
|
||||
canonical_names.append("value")
|
||||
acc.add(NestedAggs(column.nested_path[0]).add(
|
||||
ExprAggs(es_name, {"value_count": {"field": column.es_column}}, s)
|
||||
))
|
||||
if len(canonical_names) == 1:
|
||||
s.pull = jx_expression_to_function(canonical_names[0])
|
||||
else:
|
||||
|
@ -205,49 +226,48 @@ def es_aggsop(es, frum, query):
|
|||
if len(columns) > 1:
|
||||
Log.error("Do not know how to count columns with more than one type (script probably)")
|
||||
# ES USES DIFFERENT METHOD FOR PERCENTILES
|
||||
key = literal_field(canonical_name + " percentile")
|
||||
|
||||
es_query.aggs[key].percentiles.field = columns[0].es_column
|
||||
es_query.aggs[key].percentiles.percents += [50]
|
||||
s.pull = jx_expression_to_function(key + ".values.50\\.0")
|
||||
key = canonical_name + " percentile"
|
||||
acc.add(ExprAggs(key, {"percentiles": {
|
||||
"field": first(columns).es_column,
|
||||
"percents": [50]
|
||||
}}, s))
|
||||
s.pull = jx_expression_to_function("values.50\\.0")
|
||||
elif s.aggregate == "percentile":
|
||||
if len(columns) > 1:
|
||||
Log.error("Do not know how to count columns with more than one type (script probably)")
|
||||
# ES USES DIFFERENT METHOD FOR PERCENTILES
|
||||
key = literal_field(canonical_name + " percentile")
|
||||
if isinstance(s.percentile, text_type) or s.percetile < 0 or 1 < s.percentile:
|
||||
key = canonical_name + " percentile"
|
||||
if is_text(s.percentile) or s.percetile < 0 or 1 < s.percentile:
|
||||
Log.error("Expecting percentile to be a float from 0.0 to 1.0")
|
||||
percent = Math.round(s.percentile * 100, decimal=6)
|
||||
percent = mo_math.round(s.percentile * 100, decimal=6)
|
||||
|
||||
es_query.aggs[key].percentiles.field = columns[0].es_column
|
||||
es_query.aggs[key].percentiles.percents += [percent]
|
||||
es_query.aggs[key].percentiles.tdigest.compression = 2
|
||||
s.pull = jx_expression_to_function(key + ".values." + literal_field(text_type(percent)))
|
||||
acc.add(ExprAggs(key, {"percentiles": {
|
||||
"field": first(columns).es_column,
|
||||
"percents": [percent],
|
||||
"tdigest": {"compression": 2}
|
||||
}}, s))
|
||||
s.pull = jx_expression_to_function(join_field(["values", text_type(percent)]))
|
||||
elif s.aggregate == "cardinality":
|
||||
canonical_names = []
|
||||
for column in columns:
|
||||
cn = literal_field(column.es_column + "_cardinality")
|
||||
canonical_names.append(cn)
|
||||
es_query.aggs[cn].cardinality.field = column.es_column
|
||||
if len(columns) == 1:
|
||||
s.pull = jx_expression_to_function(canonical_names[0] + ".value")
|
||||
else:
|
||||
s.pull = jx_expression_to_function({"add": [cn + ".value" for cn in canonical_names], "default": 0})
|
||||
path = column.es_column + "_cardinality"
|
||||
acc.add(ExprAggs(path, {"cardinality": {"field": column.es_column}}, s))
|
||||
s.pull = jx_expression_to_function("value")
|
||||
elif s.aggregate == "stats":
|
||||
if len(columns) > 1:
|
||||
Log.error("Do not know how to count columns with more than one type (script probably)")
|
||||
# REGULAR STATS
|
||||
stats_name = literal_field(canonical_name)
|
||||
es_query.aggs[stats_name].extended_stats.field = columns[0].es_column
|
||||
|
||||
complex = ComplexAggs(s).add(ExprAggs(canonical_name, {"extended_stats": {"field": first(columns).es_column}}, None))
|
||||
# GET MEDIAN TOO!
|
||||
median_name = literal_field(canonical_name + "_percentile")
|
||||
es_query.aggs[median_name].percentiles.field = columns[0].es_column
|
||||
es_query.aggs[median_name].percentiles.percents += [50]
|
||||
complex.add(ExprAggs(canonical_name + "_percentile", {"percentiles": {
|
||||
"field": first(columns).es_column,
|
||||
"percents": [50]
|
||||
}}, None))
|
||||
|
||||
acc.add(complex)
|
||||
s.pull = get_pull_stats(stats_name, median_name)
|
||||
elif s.aggregate == "union":
|
||||
pulls = []
|
||||
for column in columns:
|
||||
script = {"scripted_metric": {
|
||||
'init_script': 'params._agg.terms = new HashSet()',
|
||||
|
@ -255,41 +275,61 @@ def es_aggsop(es, frum, query):
|
|||
'combine_script': 'return params._agg.terms.toArray()',
|
||||
'reduce_script': 'HashSet output = new HashSet(); for (a in params._aggs) { if (a!=null) for (v in a) {output.add(v)} } return output.toArray()',
|
||||
}}
|
||||
stats_name = column.es_column
|
||||
acc.add(NestedAggs(column.nested_path[0]).add(ExprAggs(stats_name, script, s)))
|
||||
s.pull = jx_expression_to_function("value")
|
||||
elif s.aggregate == "count_values":
|
||||
# RETURN MAP FROM VALUE TO THE NUMBER OF TIMES FOUND IN THE DOCUMENTS
|
||||
# NOT A NESTED DOC, RATHER A MULTIVALUE FIELD
|
||||
for column in columns:
|
||||
script = {"scripted_metric": {
|
||||
'params': {"_agg": {}},
|
||||
'init_script': 'params._agg.terms = new HashMap()',
|
||||
'map_script': 'for (v in doc['+quote(column.es_column)+'].values) params._agg.terms.put(v, Optional.ofNullable(params._agg.terms.get(v)).orElse(0)+1);',
|
||||
'combine_script': 'return params._agg.terms',
|
||||
'reduce_script': '''
|
||||
HashMap output = new HashMap();
|
||||
for (agg in params._aggs) {
|
||||
if (agg!=null){
|
||||
for (e in agg.entrySet()) {
|
||||
String key = String.valueOf(e.getKey());
|
||||
output.put(key, e.getValue() + Optional.ofNullable(output.get(key)).orElse(0));
|
||||
}
|
||||
}
|
||||
}
|
||||
return output;
|
||||
'''
|
||||
}}
|
||||
stats_name = encode_property(column.es_column)
|
||||
if column.nested_path[0] == ".":
|
||||
es_query.aggs[stats_name] = script
|
||||
pulls.append(jx_expression_to_function(stats_name + ".value"))
|
||||
else:
|
||||
es_query.aggs[stats_name] = {
|
||||
"nested": {"path": column.nested_path[0]},
|
||||
"aggs": {"_nested": script}
|
||||
}
|
||||
pulls.append(jx_expression_to_function(stats_name + "._nested.value"))
|
||||
|
||||
if len(pulls) == 0:
|
||||
s.pull = NULL
|
||||
elif len(pulls) == 1:
|
||||
s.pull = pulls[0]
|
||||
else:
|
||||
s.pull = lambda row: UNION(p(row) for p in pulls)
|
||||
acc.add(NestedAggs(column.nested_path[0]).add(ExprAggs(stats_name, script, s)))
|
||||
s.pull = jx_expression_to_function("value")
|
||||
else:
|
||||
if len(columns) > 1:
|
||||
Log.error("Do not know how to count columns with more than one type (script probably)")
|
||||
elif len(columns) <1:
|
||||
# PULL VALUE OUT OF THE stats AGGREGATE
|
||||
s.pull = jx_expression_to_function({"null":{}})
|
||||
if not columns:
|
||||
s.pull = jx_expression_to_function(NULL)
|
||||
else:
|
||||
# PULL VALUE OUT OF THE stats AGGREGATE
|
||||
es_query.aggs[literal_field(canonical_name)].extended_stats.field = columns[0].es_column
|
||||
s.pull = jx_expression_to_function({"coalesce": [literal_field(canonical_name) + "." + aggregates[s.aggregate], s.default]})
|
||||
for c in columns:
|
||||
acc.add(NestedAggs(c.nested_path[0]).add(
|
||||
ExprAggs(canonical_name, {"extended_stats": {"field": c.es_column}}, s)
|
||||
))
|
||||
s.pull = jx_expression_to_function(aggregates[s.aggregate])
|
||||
|
||||
for i, s in enumerate(formula):
|
||||
canonical_name = literal_field(s.name)
|
||||
s_path = [k for k, v in split_expression_by_path(s.value, schema=schema, lang=Painless).items() if v]
|
||||
if len(s_path) == 0:
|
||||
# FOR CONSTANTS
|
||||
nest = NestedAggs(query_path)
|
||||
acc.add(nest)
|
||||
elif len(s_path) == 1:
|
||||
nest = NestedAggs(first(s_path))
|
||||
acc.add(nest)
|
||||
else:
|
||||
Log.error("do not know how to handle")
|
||||
|
||||
if isinstance(s.value, TupleOp):
|
||||
canonical_name = s.name
|
||||
if is_op(s.value, TupleOp):
|
||||
if s.aggregate == "count":
|
||||
# TUPLES ALWAYS EXIST, SO COUNTING THEM IS EASY
|
||||
s.pull = "doc_count"
|
||||
s.pull = jx_expression_to_function("doc_count")
|
||||
elif s.aggregate in ('max', 'maximum', 'min', 'minimum'):
|
||||
if s.aggregate in ('max', 'maximum'):
|
||||
dir = 1
|
||||
|
@ -298,136 +338,110 @@ def es_aggsop(es, frum, query):
|
|||
dir = -1
|
||||
op = 'min'
|
||||
|
||||
nully = TupleOp("tuple", [NULL]*len(s.value.terms)).partial_eval().to_es_script(schema).expr
|
||||
selfy = s.value.partial_eval().to_es_script(schema).expr
|
||||
nully = Painless[TupleOp([NULL]*len(s.value.terms))].partial_eval().to_es_script(schema)
|
||||
selfy = text_type(Painless[s.value].partial_eval().to_es_script(schema))
|
||||
|
||||
script = {"scripted_metric": {
|
||||
'init_script': 'params._agg.best = ' + nully + ';',
|
||||
'map_script': 'params._agg.best = ' + expand_template(MAX_OF_TUPLE, {"expr1": "params._agg.best", "expr2": selfy, "dir": dir, "op": op}) + ";",
|
||||
'combine_script': 'return params._agg.best',
|
||||
'reduce_script': 'return params._aggs.stream().max(' + expand_template(COMPARE_TUPLE, {"dir": dir, "op": op}) + ').get()',
|
||||
'reduce_script': 'return params._aggs.stream().'+op+'(' + expand_template(COMPARE_TUPLE, {"dir": dir, "op": op}) + ').get()',
|
||||
}}
|
||||
if schema.query_path[0] == ".":
|
||||
es_query.aggs[canonical_name] = script
|
||||
s.pull = jx_expression_to_function(literal_field(canonical_name) + ".value")
|
||||
else:
|
||||
es_query.aggs[canonical_name] = {
|
||||
"nested": {"path": schema.query_path[0]},
|
||||
"aggs": {"_nested": script}
|
||||
}
|
||||
s.pull = jx_expression_to_function(literal_field(canonical_name) + "._nested.value")
|
||||
nest.add(NestedAggs(query_path).add(
|
||||
ExprAggs(canonical_name, script, s)
|
||||
))
|
||||
s.pull = jx_expression_to_function("value")
|
||||
else:
|
||||
Log.error("{{agg}} is not a supported aggregate over a tuple", agg=s.aggregate)
|
||||
Log.error("{{agg}} is not a supported aggregate over a tuple", agg=s.aggregate)
|
||||
elif s.aggregate == "count":
|
||||
es_query.aggs[literal_field(canonical_name)].value_count.script = s.value.partial_eval().to_es_script(schema).script(schema)
|
||||
s.pull = jx_expression_to_function(literal_field(canonical_name) + ".value")
|
||||
nest.add(ExprAggs(canonical_name, {"value_count": {"script": text_type(Painless[s.value].partial_eval().to_es_script(schema))}}, s))
|
||||
s.pull = jx_expression_to_function("value")
|
||||
elif s.aggregate == "median":
|
||||
# ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT
|
||||
key = literal_field(canonical_name + " percentile")
|
||||
|
||||
es_query.aggs[key].percentiles.script = s.value.to_es_script(schema).script(schema)
|
||||
es_query.aggs[key].percentiles.percents += [50]
|
||||
s.pull = jx_expression_to_function(key + ".values.50\\.0")
|
||||
nest.add(ExprAggs(key, {"percentiles": {
|
||||
"script": text_type(Painless[s.value].to_es_script(schema)),
|
||||
"percents": [50]
|
||||
}}, s))
|
||||
s.pull = jx_expression_to_function(join_field(["50.0"]))
|
||||
elif s.aggregate == "percentile":
|
||||
# ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT
|
||||
key = literal_field(canonical_name + " percentile")
|
||||
percent = Math.round(s.percentile * 100, decimal=6)
|
||||
|
||||
es_query.aggs[key].percentiles.script = s.value.to_es_script(schema).script(schema)
|
||||
es_query.aggs[key].percentiles.percents += [percent]
|
||||
s.pull = jx_expression_to_function(key + ".values." + literal_field(text_type(percent)))
|
||||
percent = mo_math.round(s.percentile * 100, decimal=6)
|
||||
nest.add(ExprAggs(key, {"percentiles": {
|
||||
"script": text_type(Painless[s.value].to_es_script(schema)),
|
||||
"percents": [percent]
|
||||
}}, s))
|
||||
s.pull = jx_expression_to_function(join_field(["values", text_type(percent)]))
|
||||
elif s.aggregate == "cardinality":
|
||||
# ES USES DIFFERENT METHOD FOR CARDINALITY
|
||||
key = canonical_name + " cardinality"
|
||||
|
||||
es_query.aggs[key].cardinality.script = s.value.to_es_script(schema).script(schema)
|
||||
s.pull = jx_expression_to_function(key + ".value")
|
||||
nest.add(ExprAggs(key, {"cardinality": {"script": text_type(Painless[s.value].to_es_script(schema))}}, s))
|
||||
s.pull = jx_expression_to_function("value")
|
||||
elif s.aggregate == "stats":
|
||||
# REGULAR STATS
|
||||
stats_name = literal_field(canonical_name)
|
||||
es_query.aggs[stats_name].extended_stats.script = s.value.to_es_script(schema).script(schema)
|
||||
stats_name = canonical_name
|
||||
nest.add(ComplexAggs(s).add(ExprAggs(stats_name, {"extended_stats": {"script": text_type(Painless[s.value].to_es_script(schema))}}, None)))
|
||||
|
||||
# GET MEDIAN TOO!
|
||||
median_name = literal_field(canonical_name + " percentile")
|
||||
es_query.aggs[median_name].percentiles.script = s.value.to_es_script(schema).script(schema)
|
||||
es_query.aggs[median_name].percentiles.percents += [50]
|
||||
|
||||
s.pull = get_pull_stats(stats_name, median_name)
|
||||
median_name = canonical_name + " percentile"
|
||||
nest.add(ExprAggs(median_name, {"percentiles": {
|
||||
"script": text_type(Painless[s.value].to_es_script(schema)),
|
||||
"percents": [50]
|
||||
}}, s))
|
||||
s.pull = get_pull_stats(None, stats_name, median_name)
|
||||
elif s.aggregate == "union":
|
||||
# USE TERMS AGGREGATE TO SIMULATE union
|
||||
stats_name = literal_field(canonical_name)
|
||||
es_query.aggs[stats_name].terms.script_field = s.value.to_es_script(schema).script(schema)
|
||||
s.pull = jx_expression_to_function(stats_name + ".buckets.key")
|
||||
nest.add(TermsAggs(canonical_name, {"script_field": text_type(Painless[s.value].to_es_script(schema))}, s))
|
||||
s.pull = jx_expression_to_function("key")
|
||||
else:
|
||||
# PULL VALUE OUT OF THE stats AGGREGATE
|
||||
s.pull = jx_expression_to_function(canonical_name + "." + aggregates[s.aggregate])
|
||||
es_query.aggs[canonical_name].extended_stats.script = s.value.to_es_script(schema).script(schema)
|
||||
s.pull = jx_expression_to_function(aggregates[s.aggregate])
|
||||
nest.add(ExprAggs(canonical_name, {"extended_stats": {"script": text_type(Painless[s.value].to_es_script(schema))}}, s))
|
||||
|
||||
acc = NestedAggs(query_path).add(acc)
|
||||
split_decoders = get_decoders_by_path(query)
|
||||
split_wheres = split_expression_by_path(query.where, schema=frum.schema, lang=ES52)
|
||||
|
||||
decoders = get_decoders_by_depth(query)
|
||||
start = 0
|
||||
decoders = [None] * (len(query.edges) + len(query.groupby))
|
||||
paths = list(reversed(sorted(split_wheres.keys() | split_decoders.keys())))
|
||||
for path in paths:
|
||||
literal_path = literal_field(path)
|
||||
decoder = split_decoders[literal_path]
|
||||
where = split_wheres[literal_path]
|
||||
|
||||
# <TERRIBLE SECTION> THIS IS WHERE WE WEAVE THE where CLAUSE WITH nested
|
||||
split_where = split_expression_by_depth(query.where, schema=frum.schema)
|
||||
|
||||
if len(split_field(frum.name)) > 1:
|
||||
if any(split_where[2::]):
|
||||
Log.error("Where clause is too deep")
|
||||
|
||||
for d in decoders[1]:
|
||||
es_query = d.append_query(es_query, start)
|
||||
for d in decoder:
|
||||
decoders[d.edge.dim] = d
|
||||
acc = d.append_query(path, acc)
|
||||
start += d.num_columns
|
||||
|
||||
if split_where[1]:
|
||||
#TODO: INCLUDE FILTERS ON EDGES
|
||||
filter_ = AndOp("and", split_where[1]).to_esfilter(schema)
|
||||
es_query = Data(
|
||||
aggs={"_filter": set_default({"filter": filter_}, es_query)}
|
||||
)
|
||||
if where:
|
||||
acc = FilterAggs("_filter", AndOp(where), None).add(acc)
|
||||
acc = NestedAggs(path).add(acc)
|
||||
|
||||
es_query = wrap({
|
||||
"aggs": {"_nested": set_default(
|
||||
{"nested": {"path": schema.query_path[0]}},
|
||||
es_query
|
||||
)}
|
||||
})
|
||||
else:
|
||||
if any(split_where[1::]):
|
||||
Log.error("Where clause is too deep")
|
||||
|
||||
if decoders:
|
||||
for d in jx.reverse(decoders[0]):
|
||||
es_query = d.append_query(es_query, start)
|
||||
start += d.num_columns
|
||||
|
||||
if split_where[0]:
|
||||
#TODO: INCLUDE FILTERS ON EDGES
|
||||
filter = AndOp("and", split_where[0]).to_esfilter(schema)
|
||||
es_query = Data(
|
||||
aggs={"_filter": set_default({"filter": filter}, es_query)}
|
||||
)
|
||||
# </TERRIBLE SECTION>
|
||||
|
||||
if not es_query:
|
||||
es_query = wrap({"query": {"match_all": {}}})
|
||||
acc = NestedAggs('.').add(acc)
|
||||
acc = simplify(acc)
|
||||
es_query = wrap(acc.to_es(schema))
|
||||
|
||||
es_query.size = 0
|
||||
|
||||
with Timer("ES query time") as es_duration:
|
||||
with Timer("ES query time", silent=not DEBUG) as es_duration:
|
||||
result = es_post(es, es_query, query.limit)
|
||||
|
||||
try:
|
||||
format_time = Timer("formatting")
|
||||
format_time = Timer("formatting", silent=not DEBUG)
|
||||
with format_time:
|
||||
decoders = [d for ds in decoders for d in ds]
|
||||
result.aggregations.doc_count = coalesce(result.aggregations.doc_count, result.hits.total) # IT APPEARS THE OLD doc_count IS GONE
|
||||
# result.aggregations.doc_count = coalesce(result.aggregations.doc_count, result.hits.total) # IT APPEARS THE OLD doc_count IS GONE
|
||||
aggs = unwrap(result.aggregations)
|
||||
|
||||
formatter, groupby_formatter, aggop_formatter, mime_type = format_dispatch[query.format]
|
||||
if query.edges:
|
||||
output = formatter(decoders, result.aggregations, start, query, select)
|
||||
output = formatter(aggs, acc, query, decoders, select)
|
||||
elif query.groupby:
|
||||
output = groupby_formatter(decoders, result.aggregations, start, query, select)
|
||||
output = groupby_formatter(aggs, acc, query, decoders, select)
|
||||
else:
|
||||
output = aggop_formatter(decoders, result.aggregations, start, query, select)
|
||||
output = aggop_formatter(aggs, acc, query, decoders, select)
|
||||
|
||||
output.meta.timing.formatting = format_time.duration
|
||||
output.meta.timing.es_search = es_duration.duration
|
||||
|
@ -445,89 +459,121 @@ EMPTY_LIST = []
|
|||
|
||||
|
||||
def drill(agg):
|
||||
deeper = agg.get("_filter") or agg.get("_nested")
|
||||
while deeper:
|
||||
agg = deeper
|
||||
deeper = agg.get("_filter") or agg.get("_nested")
|
||||
return agg
|
||||
while True:
|
||||
deeper = agg.get("_filter")
|
||||
if deeper:
|
||||
agg = deeper
|
||||
continue
|
||||
return agg
|
||||
|
||||
|
||||
def aggs_iterator(aggs, decoders, coord=True):
|
||||
def _children(agg, children):
|
||||
for child in children:
|
||||
name = child.name
|
||||
v = agg[name]
|
||||
if name == "_match":
|
||||
for i, b in enumerate(v.get("buckets", EMPTY_LIST)):
|
||||
yield i, b, child, b
|
||||
elif name.startswith("_match"):
|
||||
i = int(name[6:])
|
||||
yield i, v, child, v
|
||||
elif name.startswith("_missing"):
|
||||
if len(name) == 8:
|
||||
i = None
|
||||
else:
|
||||
i = int(name[8:])
|
||||
yield None, v, child, v
|
||||
else:
|
||||
yield None, v, child, None
|
||||
|
||||
|
||||
def aggs_iterator(aggs, es_query, decoders, give_me_zeros=False):
|
||||
"""
|
||||
DIG INTO ES'S RECURSIVE aggs DATA-STRUCTURE:
|
||||
RETURN AN ITERATOR OVER THE EFFECTIVE ROWS OF THE RESULTS
|
||||
|
||||
:param aggs: ES AGGREGATE OBJECT
|
||||
:param decoders:
|
||||
:param coord: TURN ON LOCAL COORDINATE LOOKUP
|
||||
:param es_query: THE ABSTRACT ES QUERY WE WILL TRACK ALONGSIDE aggs
|
||||
:param decoders: TO CONVERT PARTS INTO COORDINATES
|
||||
"""
|
||||
depth = max(d.start + d.num_columns for d in decoders)
|
||||
coord = [0] * len(decoders)
|
||||
parts = deque()
|
||||
stack = []
|
||||
|
||||
def _aggs_iterator(agg, d):
|
||||
agg = drill(agg)
|
||||
gen = _children(aggs, es_query.children)
|
||||
while True:
|
||||
try:
|
||||
index, c_agg, c_query, part = gen.next()
|
||||
except StopIteration:
|
||||
try:
|
||||
gen = stack.pop()
|
||||
except IndexError:
|
||||
return
|
||||
parts.popleft()
|
||||
continue
|
||||
|
||||
if d > 0:
|
||||
for k, v in agg.items():
|
||||
if k == "_match":
|
||||
v = drill(v)
|
||||
for i, b in enumerate(v.get("buckets", EMPTY_LIST)):
|
||||
b["_index"] = i
|
||||
for a, parts in _aggs_iterator(b, d - 1):
|
||||
yield a, parts + (b,)
|
||||
elif k == "_other":
|
||||
for b in v.get("buckets", EMPTY_LIST):
|
||||
for a, parts in _aggs_iterator(b, d - 1):
|
||||
yield a, parts + (Null,)
|
||||
elif k == "_missing":
|
||||
b = drill(v)
|
||||
for a, parts in _aggs_iterator(b, d - 1):
|
||||
yield a, parts + (b,)
|
||||
elif k.startswith("_join_"):
|
||||
v["key"] = int(k[6:])
|
||||
for a, parts in _aggs_iterator(v, d - 1):
|
||||
yield a, parts + (v,)
|
||||
else:
|
||||
for k, v in agg.items():
|
||||
if k == "_match":
|
||||
v = drill(v)
|
||||
for i, b in enumerate(v.get("buckets", EMPTY_LIST)):
|
||||
b["_index"] = i
|
||||
yield b, (b,)
|
||||
elif k == "_other":
|
||||
for b in v.get("buckets", EMPTY_LIST):
|
||||
yield b, (Null,)
|
||||
elif k == "_missing":
|
||||
b = drill(v,)
|
||||
yield b, (v,)
|
||||
elif k.startswith("_join_"):
|
||||
v["_index"] = int(k[6:])
|
||||
yield v, (v,)
|
||||
if c_agg.get('doc_count') == 0 and not give_me_zeros:
|
||||
continue
|
||||
parts.appendleft(part)
|
||||
for d in c_query.decoders:
|
||||
coord[d.edge.dim] = d.get_index(tuple(p for p in parts if p is not None), c_query, index)
|
||||
|
||||
if coord:
|
||||
for a, parts in _aggs_iterator(unwrap(aggs), depth - 1):
|
||||
coord = tuple(d.get_index(parts) for d in decoders)
|
||||
if any(c is None for c in coord):
|
||||
children = c_query.children
|
||||
selects = c_query.selects
|
||||
if selects or not children:
|
||||
parts.popleft() # c_agg WAS ON TOP
|
||||
yield (
|
||||
tuple(p for p in parts if p is not None),
|
||||
tuple(coord),
|
||||
c_agg,
|
||||
selects
|
||||
)
|
||||
continue
|
||||
|
||||
stack.append(gen)
|
||||
gen = _children(c_agg, children)
|
||||
|
||||
|
||||
def count_dim(aggs, es_query, decoders):
|
||||
if not any(hasattr(d, "done_count") for d in decoders):
|
||||
return [d.edge for d in decoders]
|
||||
|
||||
def _count_dim(parts, aggs, es_query):
|
||||
children = es_query.children
|
||||
if not children:
|
||||
return
|
||||
|
||||
for child in children:
|
||||
name = child.name
|
||||
agg = aggs[name]
|
||||
if agg.get('doc_count') == 0:
|
||||
continue
|
||||
yield parts, coord, a
|
||||
else:
|
||||
for a, parts in _aggs_iterator(unwrap(aggs), depth - 1):
|
||||
yield parts, None, a
|
||||
elif name == "_match":
|
||||
for i, b in enumerate(agg.get("buckets", EMPTY_LIST)):
|
||||
if not b.get('doc_count'):
|
||||
continue
|
||||
b["_index"] = i
|
||||
new_parts = (b,) + parts
|
||||
for d in child.decoders:
|
||||
d.count(new_parts)
|
||||
_count_dim(new_parts, b, child)
|
||||
elif name.startswith("_missing"):
|
||||
new_parts = (agg,) + parts
|
||||
for d in child.decoders:
|
||||
d.count(new_parts)
|
||||
_count_dim(new_parts, agg, child)
|
||||
else:
|
||||
_count_dim(parts, agg, child)
|
||||
|
||||
|
||||
def count_dim(aggs, decoders):
|
||||
if any(isinstance(d, (DefaultDecoder, DimFieldListDecoder, ObjectDecoder)) for d in decoders):
|
||||
# ENUMERATE THE DOMAINS, IF UNKNOWN AT QUERY TIME
|
||||
for row, coord, agg in aggs_iterator(aggs, decoders, coord=False):
|
||||
for d in decoders:
|
||||
d.count(row)
|
||||
for d in decoders:
|
||||
d.done_count()
|
||||
new_edges = wrap([d.edge for d in decoders])
|
||||
return new_edges
|
||||
_count_dim(tuple(), aggs, es_query)
|
||||
for d in decoders:
|
||||
done_count = getattr(d, "done_count", Null)
|
||||
done_count()
|
||||
return [d.edge for d in decoders]
|
||||
|
||||
|
||||
format_dispatch = {}
|
||||
from jx_elasticsearch.es52.format import format_cube
|
||||
|
||||
from jx_elasticsearch.es52.format import format_cube
|
||||
_ = format_cube
|
||||
|
||||
|
|
|
@ -7,26 +7,29 @@
|
|||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from collections import Mapping
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from mo_future import is_text, is_binary
|
||||
from jx_base.dimensions import Dimension
|
||||
from jx_base.domains import SimpleSetDomain, DefaultDomain, PARTITION
|
||||
from jx_base.expressions import TupleOp, TRUE
|
||||
from jx_base.query import MAX_LIMIT, DEFAULT_LIMIT
|
||||
from jx_elasticsearch.es52.expressions import Variable, NotOp, InOp, Literal, AndOp, InequalityOp, LeavesOp, LIST_TO_PIPE
|
||||
from jx_elasticsearch.es52.util import es_missing
|
||||
from jx_base.domains import DefaultDomain, PARTITION, SimpleSetDomain
|
||||
from jx_base.expressions import ExistsOp, FirstOp, GtOp, GteOp, LeavesOp, LtOp, LteOp, MissingOp, TupleOp, Variable
|
||||
from jx_base.query import DEFAULT_LIMIT, MAX_LIMIT
|
||||
from jx_base.language import is_op
|
||||
from jx_elasticsearch.es52.es_query import Aggs, FilterAggs, FiltersAggs, NestedAggs, RangeAggs, TermsAggs
|
||||
from jx_elasticsearch.es52.expressions import AndOp, InOp, Literal, NotOp
|
||||
from jx_elasticsearch.es52.painless import LIST_TO_PIPE, Painless
|
||||
from jx_elasticsearch.es52.util import pull_functions
|
||||
from jx_python import jx
|
||||
from mo_dots import wrap, set_default, coalesce, literal_field, Data, relative_field, unwraplist
|
||||
from mo_future import text_type, transpose
|
||||
from mo_json.typed_encoder import untype_path, STRING, NUMBER, BOOLEAN
|
||||
from mo_dots import Data, coalesce, concat_field, is_data, literal_field, relative_field, set_default, wrap
|
||||
from mo_future import first, text_type, transpose
|
||||
from mo_json import EXISTS, OBJECT, STRING
|
||||
from mo_json.typed_encoder import EXISTS_TYPE, NESTED_TYPE, untype_path
|
||||
from mo_logs import Log
|
||||
from mo_logs.strings import quote, expand_template
|
||||
from mo_math import MAX, MIN, Math
|
||||
from pyLibrary.convert import string2boolean
|
||||
from mo_logs.strings import expand_template, quote
|
||||
import mo_math
|
||||
from mo_math import MAX, MIN
|
||||
|
||||
DEBUG = False
|
||||
|
||||
|
||||
class AggsDecoder(object):
|
||||
|
@ -37,91 +40,100 @@ class AggsDecoder(object):
|
|||
# if query.groupby:
|
||||
# return object.__new__(DefaultDecoder, e)
|
||||
|
||||
if isinstance(e.value, text_type):
|
||||
if is_text(e.value):
|
||||
Log.error("Expecting Variable or Expression, not plain string")
|
||||
|
||||
if isinstance(e.value, LeavesOp):
|
||||
return object.__new__(ObjectDecoder, e)
|
||||
elif isinstance(e.value, TupleOp):
|
||||
if is_op(e.value, LeavesOp):
|
||||
return object.__new__(ObjectDecoder)
|
||||
elif is_op(e.value, TupleOp):
|
||||
# THIS domain IS FROM A dimension THAT IS A SIMPLE LIST OF fields
|
||||
# JUST PULL THE FIELDS
|
||||
if not all(isinstance(t, Variable) for t in e.value.terms):
|
||||
if not all(is_op(t, Variable) for t in e.value.terms):
|
||||
Log.error("Can only handle variables in tuples")
|
||||
|
||||
e.domain = Data(
|
||||
dimension={"fields": e.value.terms}
|
||||
)
|
||||
return object.__new__(DimFieldListDecoder, e)
|
||||
return object.__new__(DimFieldListDecoder)
|
||||
|
||||
elif isinstance(e.value, Variable):
|
||||
elif is_op(e.value, Variable):
|
||||
schema = query.frum.schema
|
||||
cols = schema.leaves(e.value.var)
|
||||
if not cols:
|
||||
return object.__new__(DefaultDecoder, e)
|
||||
return object.__new__(DefaultDecoder)
|
||||
if len(cols) != 1:
|
||||
return object.__new__(ObjectDecoder, e)
|
||||
col = cols[0]
|
||||
return object.__new__(ObjectDecoder)
|
||||
col = first(cols)
|
||||
limit = coalesce(e.domain.limit, query.limit, DEFAULT_LIMIT)
|
||||
|
||||
if col.partitions != None:
|
||||
if col.multi > 1 and len(col.partitions) < 6:
|
||||
if col.cardinality == None:
|
||||
DEBUG and Log.warning(
|
||||
"metadata for column {{name|quote}} (id={{id}}) is not ready",
|
||||
name=concat_field(col.es_index, col.es_column),
|
||||
id=id(col)
|
||||
)
|
||||
e.domain = set_default(DefaultDomain(limit=limit), e.domain.__data__())
|
||||
return object.__new__(DefaultDecoder)
|
||||
elif col.partitions == None:
|
||||
e.domain = set_default(DefaultDomain(limit=limit), e.domain.__data__())
|
||||
return object.__new__(DefaultDecoder)
|
||||
else:
|
||||
DEBUG and Log.note("id={{id}} has parts!!!", id=id(col))
|
||||
if col.multi > 1 and len(col.partitions) < 10:
|
||||
return object.__new__(MultivalueDecoder)
|
||||
|
||||
partitions = col.partitions[:limit:]
|
||||
if e.domain.sort==-1:
|
||||
if e.domain.sort == -1:
|
||||
partitions = list(reversed(sorted(partitions)))
|
||||
else:
|
||||
partitions = sorted(partitions)
|
||||
e.domain = SimpleSetDomain(partitions=partitions, limit=limit)
|
||||
else:
|
||||
e.domain = set_default(DefaultDomain(limit=limit), e.domain.__data__())
|
||||
return object.__new__(DefaultDecoder, e)
|
||||
|
||||
else:
|
||||
return object.__new__(DefaultDecoder, e)
|
||||
return object.__new__(DefaultDecoder)
|
||||
|
||||
if e.value and e.domain.type in PARTITION:
|
||||
return object.__new__(SetDecoder, e)
|
||||
return object.__new__(SetDecoder)
|
||||
if isinstance(e.domain.dimension, Dimension):
|
||||
e.domain = e.domain.dimension.getDomain()
|
||||
return object.__new__(SetDecoder, e)
|
||||
return object.__new__(SetDecoder)
|
||||
if e.value and e.domain.type == "time":
|
||||
return object.__new__(TimeDecoder, e)
|
||||
return object.__new__(TimeDecoder)
|
||||
if e.range:
|
||||
return object.__new__(GeneralRangeDecoder, e)
|
||||
return object.__new__(GeneralRangeDecoder)
|
||||
if e.value and e.domain.type == "duration":
|
||||
return object.__new__(DurationDecoder, e)
|
||||
return object.__new__(DurationDecoder)
|
||||
elif e.value and e.domain.type == "range":
|
||||
return object.__new__(RangeDecoder, e)
|
||||
return object.__new__(RangeDecoder)
|
||||
elif not e.value and e.domain.dimension.fields:
|
||||
# THIS domain IS FROM A dimension THAT IS A SIMPLE LIST OF fields
|
||||
# JUST PULL THE FIELDS
|
||||
fields = e.domain.dimension.fields
|
||||
if isinstance(fields, Mapping):
|
||||
if is_data(fields):
|
||||
Log.error("No longer allowed: All objects are expressions")
|
||||
else:
|
||||
return object.__new__(DimFieldListDecoder, e)
|
||||
return object.__new__(DimFieldListDecoder)
|
||||
elif not e.value and all(e.domain.partitions.where):
|
||||
return object.__new__(GeneralSetDecoder, e)
|
||||
return object.__new__(GeneralSetDecoder)
|
||||
else:
|
||||
Log.error("domain type of {{type}} is not supported yet", type=e.domain.type)
|
||||
|
||||
def __init__(self, edge, query, limit):
|
||||
self.start = None
|
||||
self.edge = edge
|
||||
self.name = literal_field(self.edge.name)
|
||||
self.query = query
|
||||
self.limit = limit
|
||||
self.schema = self.query.frum.schema
|
||||
|
||||
def append_query(self, es_query, start):
|
||||
def append_query(self, query_path, es_query):
|
||||
Log.error("Not supported")
|
||||
|
||||
def count(self, row):
|
||||
pass
|
||||
|
||||
def done_count(self):
|
||||
pass
|
||||
# DO NOT IMPLEMENT IF domain HAS KNOWN PARTITIONS
|
||||
# def done_count(self):
|
||||
# pass
|
||||
|
||||
def get_value_from_row(self, row):
|
||||
raise NotImplementedError()
|
||||
|
@ -129,7 +141,7 @@ class AggsDecoder(object):
|
|||
def get_value(self, index):
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_index(self, row):
|
||||
def get_index(self, row, es_query=None, index=None):
|
||||
raise NotImplementedError()
|
||||
|
||||
@property
|
||||
|
@ -155,70 +167,74 @@ class SetDecoder(AggsDecoder):
|
|||
parts = jx.sort(domain.partitions, {"value": domain.key, "sort": s.sort})
|
||||
edge.domain = self.domain = SimpleSetDomain(key=domain.key, label=domain.label, partitions=parts)
|
||||
|
||||
def append_query(self, es_query, start):
|
||||
self.start = start
|
||||
def append_query(self, query_path, es_query):
|
||||
domain = self.domain
|
||||
|
||||
domain_key = domain.key
|
||||
include, text_include = transpose(*(
|
||||
(
|
||||
float(v) if isinstance(v, (int, float)) else v,
|
||||
text_type(float(v)) if isinstance(v, (int, float)) else v
|
||||
)
|
||||
for v in (p[domain_key] for p in domain.partitions)
|
||||
))
|
||||
value = self.edge.value
|
||||
exists = AndOp("and", [
|
||||
value.exists(),
|
||||
InOp("in", [value, Literal("literal", include)])
|
||||
]).partial_eval()
|
||||
value = Painless[self.edge.value]
|
||||
cnv = pull_functions[value.type]
|
||||
include = tuple(cnv(p[domain_key]) for p in domain.partitions)
|
||||
|
||||
exists = Painless[AndOp([
|
||||
InOp([value, Literal(include)])
|
||||
])].partial_eval()
|
||||
|
||||
limit = coalesce(self.limit, len(domain.partitions))
|
||||
|
||||
if isinstance(value, Variable):
|
||||
es_field = self.query.frum.schema.leaves(value.var)[0].es_column # ALREADY CHECKED THERE IS ONLY ONE
|
||||
terms = set_default({"terms": {
|
||||
"field": es_field,
|
||||
"size": limit,
|
||||
"order": {"_term": self.sorted} if self.sorted else None
|
||||
}}, es_query)
|
||||
else:
|
||||
terms = set_default({"terms": {
|
||||
"script": {
|
||||
"lang": "painless",
|
||||
"inline": value.to_es_script(self.schema).script(self.schema)
|
||||
if is_op(value, Variable):
|
||||
es_field = first(self.query.frum.schema.leaves(value.var)).es_column # ALREADY CHECKED THERE IS ONLY ONE
|
||||
match = TermsAggs(
|
||||
"_match",
|
||||
{
|
||||
"field": es_field,
|
||||
"size": limit,
|
||||
"order": {"_term": self.sorted} if self.sorted else None
|
||||
},
|
||||
"size": limit
|
||||
}}, es_query)
|
||||
|
||||
if self.edge.allowNulls:
|
||||
missing = set_default(
|
||||
{"filter": NotOp("not", exists).to_esfilter(self.schema)},
|
||||
es_query
|
||||
self
|
||||
)
|
||||
else:
|
||||
missing = None
|
||||
match = TermsAggs(
|
||||
"_match",
|
||||
{
|
||||
"script": text_type(value.to_es_script(self.schema)),
|
||||
"size": limit
|
||||
},
|
||||
self
|
||||
)
|
||||
output = Aggs().add(FilterAggs("_filter", exists, None).add(match.add(es_query)))
|
||||
|
||||
return wrap({"aggs": {
|
||||
"_match": {
|
||||
"filter": exists.to_esfilter(self.schema),
|
||||
"aggs": {
|
||||
"_filter": terms
|
||||
}
|
||||
},
|
||||
"_missing": missing
|
||||
}})
|
||||
if self.edge.allowNulls:
|
||||
# FIND NULLS AT EACH NESTED LEVEL
|
||||
for p in self.schema.query_path:
|
||||
if p == query_path:
|
||||
# MISSING AT THE QUERY DEPTH
|
||||
output.add(
|
||||
NestedAggs(p).add(FilterAggs("_missing0", NotOp(exists), self).add(es_query))
|
||||
)
|
||||
else:
|
||||
# PARENT HAS NO CHILDREN, SO MISSING
|
||||
column = first(self.schema.values(query_path, (OBJECT, EXISTS)))
|
||||
output.add(
|
||||
NestedAggs(column.nested_path[0]).add(
|
||||
FilterAggs(
|
||||
"_missing1",
|
||||
NotOp(ExistsOp(Variable(column.es_column.replace(NESTED_TYPE, EXISTS_TYPE)))),
|
||||
self
|
||||
).add(es_query)
|
||||
)
|
||||
)
|
||||
return output
|
||||
|
||||
def get_value(self, index):
|
||||
return self.domain.getKeyByIndex(index)
|
||||
|
||||
def get_value_from_row(self, row):
|
||||
return self.pull(row[self.start].get('key'))
|
||||
def get_value_from_row(self, parts):
|
||||
key = parts[0].get('key')
|
||||
return self.pull(key)
|
||||
|
||||
def get_index(self, row):
|
||||
def get_index(self, row, es_query=None, index=None):
|
||||
try:
|
||||
part = row[self.start]
|
||||
return self.domain.getIndexByKey(part.get('key'))
|
||||
key = row[0].get('key')
|
||||
return self.domain.getIndexByKey(key)
|
||||
except Exception as e:
|
||||
Log.error("problem", cause=e)
|
||||
|
||||
|
@ -227,52 +243,43 @@ class SetDecoder(AggsDecoder):
|
|||
return 1
|
||||
|
||||
|
||||
def _range_composer(edge, domain, es_query, to_float, schema):
|
||||
def _range_composer(self, edge, domain, es_query, to_float, schema):
|
||||
# USE RANGES
|
||||
_min = coalesce(domain.min, MIN(domain.partitions.min))
|
||||
_max = coalesce(domain.max, MAX(domain.partitions.max))
|
||||
|
||||
output = Aggs()
|
||||
if edge.allowNulls:
|
||||
missing_filter = set_default(
|
||||
{
|
||||
"filter": NotOp("not", AndOp("and", [
|
||||
edge.value.exists(),
|
||||
InequalityOp("gte", [edge.value, Literal(None, to_float(_min))]),
|
||||
InequalityOp("lt", [edge.value, Literal(None, to_float(_max))])
|
||||
]).partial_eval()).to_esfilter(schema)
|
||||
},
|
||||
es_query
|
||||
)
|
||||
else:
|
||||
missing_filter = None
|
||||
output.add(FilterAggs(
|
||||
"_missing",
|
||||
NotOp(AndOp([
|
||||
edge.value.exists(),
|
||||
GteOp([edge.value, Literal(to_float(_min))]),
|
||||
LtOp([edge.value, Literal(to_float(_max))])
|
||||
]).partial_eval()),
|
||||
self
|
||||
).add(es_query))
|
||||
|
||||
if isinstance(edge.value, Variable):
|
||||
calc = {"field": schema.leaves(edge.value.var)[0].es_column}
|
||||
if is_op(edge.value, Variable):
|
||||
calc = {"field": first(schema.leaves(edge.value.var)).es_column}
|
||||
else:
|
||||
calc = {"script": edge.value.to_es_script(schema).script(schema)}
|
||||
calc = {"script": text_type(Painless[edge.value].to_es_script(schema))}
|
||||
calc['ranges'] = [{"from": to_float(p.min), "to": to_float(p.max)} for p in domain.partitions]
|
||||
|
||||
return wrap({"aggs": {
|
||||
"_match": set_default(
|
||||
{"range": calc},
|
||||
{"range": {"ranges": [{"from": to_float(p.min), "to": to_float(p.max)} for p in domain.partitions]}},
|
||||
es_query
|
||||
),
|
||||
"_missing": missing_filter
|
||||
}})
|
||||
return output.add(RangeAggs("_match", calc, self).add(es_query))
|
||||
|
||||
|
||||
class TimeDecoder(AggsDecoder):
|
||||
def append_query(self, es_query, start):
|
||||
self.start = start
|
||||
def append_query(self, query_path, es_query):
|
||||
schema = self.query.frum.schema
|
||||
return _range_composer(self.edge, self.edge.domain, es_query, lambda x: x.unix, schema)
|
||||
return _range_composer(self, self.edge, self.edge.domain, es_query, lambda x: x.unix, schema)
|
||||
|
||||
def get_value(self, index):
|
||||
return self.edge.domain.getKeyByIndex(index)
|
||||
|
||||
def get_index(self, row):
|
||||
def get_index(self, row, es_query=None, index=None):
|
||||
domain = self.edge.domain
|
||||
part = row[self.start]
|
||||
part = row[0]
|
||||
if part == None:
|
||||
return len(domain.partitions)
|
||||
|
||||
|
@ -309,35 +316,31 @@ class GeneralRangeDecoder(AggsDecoder):
|
|||
else:
|
||||
Log.error("Unknown domain of type {{type}} for range edge", type=edge.domain.type)
|
||||
|
||||
def append_query(self, es_query, start):
|
||||
self.start = start
|
||||
|
||||
def append_query(self, query_path, es_query):
|
||||
edge = self.edge
|
||||
range = edge.range
|
||||
domain = edge.domain
|
||||
|
||||
aggs = {}
|
||||
aggs = Aggs()
|
||||
for i, p in enumerate(domain.partitions):
|
||||
filter_ = AndOp("and", [
|
||||
InequalityOp("lte", [range.min, Literal("literal", self.to_float(p.min))]),
|
||||
InequalityOp("gt", [range.max, Literal("literal", self.to_float(p.min))])
|
||||
filter_ = AndOp([
|
||||
LteOp([range.min, Literal(self.to_float(p.min))]),
|
||||
GtOp([range.max, Literal(self.to_float(p.min))])
|
||||
])
|
||||
aggs["_join_" + text_type(i)] = set_default(
|
||||
{"filter": filter_.to_esfilter(self.schema)},
|
||||
es_query
|
||||
)
|
||||
aggs.add(FilterAggs("_match" + text_type(i), filter_, self).add(es_query))
|
||||
|
||||
return wrap({"aggs": aggs})
|
||||
return aggs
|
||||
|
||||
def get_value(self, index):
|
||||
return self.edge.domain.getKeyByIndex(index)
|
||||
|
||||
def get_index(self, row):
|
||||
def get_index(self, row, es_query=None, index=None):
|
||||
domain = self.edge.domain
|
||||
part = row[self.start]
|
||||
part = row[0]
|
||||
if part == None:
|
||||
return len(domain.partitions)
|
||||
return part["_index"]
|
||||
index = int(es_query.name[6:])
|
||||
return index
|
||||
|
||||
@property
|
||||
def num_columns(self):
|
||||
|
@ -349,42 +352,30 @@ class GeneralSetDecoder(AggsDecoder):
|
|||
EXPECTING ALL PARTS IN partitions TO HAVE A where CLAUSE
|
||||
"""
|
||||
|
||||
def append_query(self, es_query, start):
|
||||
self.start = start
|
||||
|
||||
def append_query(self, query_path, es_query):
|
||||
parts = self.edge.domain.partitions
|
||||
filters = []
|
||||
notty = []
|
||||
|
||||
for p in parts:
|
||||
w = p.where
|
||||
filters.append(AndOp("and", [w] + notty).to_esfilter(self.schema))
|
||||
notty.append(NotOp("not", w))
|
||||
filters.append(AndOp([w] + notty))
|
||||
notty.append(NotOp(w))
|
||||
|
||||
missing_filter = None
|
||||
output = Aggs().add(FiltersAggs("_match", filters, self).add(es_query))
|
||||
if self.edge.allowNulls: # TODO: Use Expression.missing().esfilter() TO GET OPTIMIZED FILTER
|
||||
missing_filter = set_default(
|
||||
{"filter": AndOp("and", notty).to_esfilter(self.schema)},
|
||||
es_query
|
||||
)
|
||||
output.add(FilterAggs("_missing", AndOp(notty), self).add(es_query))
|
||||
|
||||
return wrap({"aggs": {
|
||||
"_match": set_default(
|
||||
{"filters": {"filters": filters}},
|
||||
es_query
|
||||
),
|
||||
"_missing": missing_filter
|
||||
}})
|
||||
return output
|
||||
|
||||
def get_value(self, index):
|
||||
return self.edge.domain.getKeyByIndex(index)
|
||||
|
||||
def get_index(self, row):
|
||||
def get_index(self, row, es_query=None, index=None):
|
||||
domain = self.edge.domain
|
||||
part = row[self.start]
|
||||
# if part == None:
|
||||
# return len(domain.partitions)
|
||||
return part.get("_index", len(domain.partitions))
|
||||
if index == None:
|
||||
return len(domain.partitions)
|
||||
else:
|
||||
return index
|
||||
|
||||
@property
|
||||
def num_columns(self):
|
||||
|
@ -392,16 +383,15 @@ class GeneralSetDecoder(AggsDecoder):
|
|||
|
||||
|
||||
class DurationDecoder(AggsDecoder):
|
||||
def append_query(self, es_query, start):
|
||||
self.start = start
|
||||
return _range_composer(self.edge, self.edge.domain, es_query, lambda x: x.seconds, self.schema)
|
||||
def append_query(self, query_path, es_query):
|
||||
return _range_composer(self, self.edge, self.edge.domain, es_query, lambda x: x.seconds, self.schema)
|
||||
|
||||
def get_value(self, index):
|
||||
return self.edge.domain.getKeyByIndex(index)
|
||||
|
||||
def get_index(self, row):
|
||||
def get_index(self, row, es_query=None, index=None):
|
||||
domain = self.edge.domain
|
||||
part = row[self.start]
|
||||
part = row[0]
|
||||
if part == None:
|
||||
return len(domain.partitions)
|
||||
|
||||
|
@ -423,16 +413,15 @@ class DurationDecoder(AggsDecoder):
|
|||
|
||||
|
||||
class RangeDecoder(AggsDecoder):
|
||||
def append_query(self, es_query, start):
|
||||
self.start = start
|
||||
return _range_composer(self.edge, self.edge.domain, es_query, lambda x: x, self.schema)
|
||||
def append_query(self, query_path, es_query):
|
||||
return _range_composer(self, self.edge, self.edge.domain, es_query, lambda x: x, self.schema)
|
||||
|
||||
def get_value(self, index):
|
||||
return self.edge.domain.getKeyByIndex(index)
|
||||
|
||||
def get_index(self, row):
|
||||
def get_index(self, row, es_query=None, index=None):
|
||||
domain = self.edge.domain
|
||||
part = row[self.start]
|
||||
part = row[0]
|
||||
if part == None:
|
||||
return len(domain.partitions)
|
||||
|
||||
|
@ -460,31 +449,40 @@ class MultivalueDecoder(SetDecoder):
|
|||
self.values = query.frum.schema[edge.value.var][0].partitions
|
||||
self.parts = []
|
||||
|
||||
def append_query(self, es_query, start):
|
||||
self.start = start
|
||||
def append_query(self, query_path, es_query):
|
||||
es_field = first(self.query.frum.schema.leaves(self.var)).es_column
|
||||
|
||||
es_field = self.query.frum.schema.leaves(self.var)[0].es_column
|
||||
es_query = wrap({"aggs": {
|
||||
"_match": set_default({"terms": {
|
||||
"script": expand_template(LIST_TO_PIPE, {"expr": 'doc[' + quote(es_field) + '].values'})
|
||||
}}, es_query)
|
||||
}})
|
||||
|
||||
return es_query
|
||||
return Aggs().add(TermsAggs("_match", {
|
||||
"script": expand_template(LIST_TO_PIPE, {"expr": 'doc[' + quote(es_field) + '].values'})
|
||||
}, self).add(es_query))
|
||||
|
||||
def get_value_from_row(self, row):
|
||||
values = row[self.start]['key'].replace("||", "\b").split("|")
|
||||
values = row[0]['key'].replace("||", "\b").split("|")
|
||||
if len(values) == 2:
|
||||
return None
|
||||
return unwraplist([v.replace("\b", "|") for v in values[1:-1]])
|
||||
t = tuple(v.replace("\b", "|") for v in sorted(values[1:-1]))
|
||||
|
||||
def get_index(self, row):
|
||||
if len(t) == 0:
|
||||
return None
|
||||
elif len(t) == 1:
|
||||
return t[0]
|
||||
else:
|
||||
return t
|
||||
|
||||
def get_index(self, row, es_query=None, index=None):
|
||||
find = self.get_value_from_row(row)
|
||||
try:
|
||||
return self.parts.index(find)
|
||||
except Exception:
|
||||
self.parts.append(find)
|
||||
return len(self.parts)-1
|
||||
return self.domain.getIndexByKey(find)
|
||||
|
||||
def count(self, row):
|
||||
value = self.get_value_from_row(row)
|
||||
self.parts.append(value)
|
||||
|
||||
def done_count(self):
|
||||
self.edge.allowNulls = False
|
||||
self.edge.domain = self.domain = SimpleSetDomain(
|
||||
partitions=jx.sort(set(self.parts))
|
||||
)
|
||||
self.parts = None
|
||||
|
||||
@property
|
||||
def num_columns(self):
|
||||
|
@ -494,7 +492,7 @@ class MultivalueDecoder(SetDecoder):
|
|||
class ObjectDecoder(AggsDecoder):
|
||||
def __init__(self, edge, query, limit):
|
||||
AggsDecoder.__init__(self, edge, query, limit)
|
||||
if isinstance(edge.value, LeavesOp):
|
||||
if is_op(edge.value, LeavesOp):
|
||||
prefix = edge.value.term.var
|
||||
flatter = lambda k: literal_field(relative_field(k, prefix))
|
||||
else:
|
||||
|
@ -502,30 +500,29 @@ class ObjectDecoder(AggsDecoder):
|
|||
flatter = lambda k: relative_field(k, prefix)
|
||||
|
||||
self.put, self.fields = transpose(*[
|
||||
(flatter(untype_path(c.names["."])), c.es_column)
|
||||
(flatter(untype_path(c.name)), c.es_column)
|
||||
for c in query.frum.schema.leaves(prefix)
|
||||
])
|
||||
|
||||
self.domain = self.edge.domain = wrap({"dimension": {"fields": self.fields}})
|
||||
self.domain.limit = Math.min(coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT)
|
||||
self.domain.limit = mo_math.min(coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT)
|
||||
self.parts = list()
|
||||
self.key2index = {}
|
||||
self.computed_domain = False
|
||||
|
||||
def append_query(self, es_query, start):
|
||||
self.start = start
|
||||
def append_query(self, query_path, es_query):
|
||||
decoder = self
|
||||
for i, v in enumerate(self.fields):
|
||||
nest = wrap({"aggs": {
|
||||
"_match": set_default({"terms": {
|
||||
nest = Aggs().add(
|
||||
TermsAggs("_match", {
|
||||
"field": v,
|
||||
"size": self.domain.limit
|
||||
}}, es_query),
|
||||
"_missing": set_default(
|
||||
{"filter": es_missing(v)},
|
||||
es_query
|
||||
)
|
||||
}})
|
||||
}, decoder).add(es_query)
|
||||
).add(
|
||||
FilterAggs("_missing", MissingOp(Variable(v)), decoder).add(es_query)
|
||||
)
|
||||
es_query = nest
|
||||
decoder = None
|
||||
return es_query
|
||||
|
||||
def count(self, row):
|
||||
|
@ -542,7 +539,7 @@ class ObjectDecoder(AggsDecoder):
|
|||
partitions=[{"value": p, "dataIndex": i} for i, p in enumerate(self.parts)]
|
||||
)
|
||||
|
||||
def get_index(self, row):
|
||||
def get_index(self, row, es_query=None, index=None):
|
||||
value = self.get_value_from_row(row)
|
||||
if self.computed_domain:
|
||||
return self.domain.getIndexByKey(value)
|
||||
|
@ -555,16 +552,18 @@ class ObjectDecoder(AggsDecoder):
|
|||
self.parts.append(value)
|
||||
return i
|
||||
|
||||
def get_value_from_row(self, row):
|
||||
part = row[self.start:self.start + self.num_columns:]
|
||||
if not part[0]['doc_count']:
|
||||
def get_value_from_row(self, parts):
|
||||
if not parts[0]['doc_count']:
|
||||
return None
|
||||
|
||||
output = Data()
|
||||
for k, v in transpose(self.put, part):
|
||||
for k, v in transpose(self.put, parts):
|
||||
output[k] = v.get('key')
|
||||
return output
|
||||
|
||||
def get_value(self, index):
|
||||
return self.parts[index]
|
||||
|
||||
@property
|
||||
def num_columns(self):
|
||||
return len(self.fields)
|
||||
|
@ -576,81 +575,55 @@ class DefaultDecoder(SetDecoder):
|
|||
def __init__(self, edge, query, limit):
|
||||
AggsDecoder.__init__(self, edge, query, limit)
|
||||
self.domain = edge.domain
|
||||
self.domain.limit = Math.min(coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT)
|
||||
self.domain.limit = mo_math.min(coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT)
|
||||
self.parts = list()
|
||||
self.key2index = {}
|
||||
self.computed_domain = False
|
||||
self.script = self.edge.value.partial_eval().to_es_script(self.schema)
|
||||
self.script = Painless[self.edge.value].partial_eval().to_es_script(self.schema)
|
||||
self.pull = pull_functions[self.script.data_type]
|
||||
self.missing = self.script.miss.partial_eval()
|
||||
self.exists = NotOp("not", self.missing).partial_eval()
|
||||
self.exists = NotOp(self.missing).partial_eval()
|
||||
|
||||
# WHEN SORT VALUE AND EDGE VALUE MATCHES, WE SORT BY TERM
|
||||
sort_candidates = [s for s in self.query.sort if s.value == self.edge.value]
|
||||
sort_candidates = [s for s in query.sort if s.value == edge.value]
|
||||
if sort_candidates:
|
||||
self.es_order = {"_term": {1: "asc", -1: "desc"}[sort_candidates[0].sort]}
|
||||
else:
|
||||
self.es_order = None
|
||||
|
||||
def append_query(self, es_query, start):
|
||||
self.start = start
|
||||
|
||||
if not isinstance(self.edge.value, Variable):
|
||||
if self.exists is TRUE:
|
||||
# IF True THEN WE DO NOT NEED THE _filter OR THE _missing (THIS RARELY HAPPENS THOUGH)
|
||||
output = wrap({"aggs": {
|
||||
"_match": set_default(
|
||||
{"terms": {
|
||||
"script": {"lang": "painless", "inline": self.script.expr},
|
||||
"size": self.domain.limit,
|
||||
"order": self.es_order
|
||||
}},
|
||||
es_query
|
||||
)
|
||||
}})
|
||||
else:
|
||||
output = wrap({"aggs": {
|
||||
"_match": { # _match AND _filter REVERSED SO _match LINES UP WITH _missing
|
||||
"filter": self.exists.to_esfilter(self.schema),
|
||||
"aggs": {
|
||||
"_filter": set_default(
|
||||
{"terms": {
|
||||
"script": {"lang": "painless", "inline": self.script.expr},
|
||||
"size": self.domain.limit,
|
||||
"order": self.es_order
|
||||
}},
|
||||
es_query
|
||||
)
|
||||
}
|
||||
},
|
||||
"_missing": set_default(
|
||||
{"filter": self.missing.to_esfilter(self.schema)},
|
||||
es_query
|
||||
)
|
||||
}})
|
||||
return output
|
||||
def append_query(self, query_path, es_query):
|
||||
if is_op(self.edge.value, FirstOp) and is_op(self.edge.value.term, Variable):
|
||||
self.edge.value = self.edge.value.term # ES USES THE FIRST TERM FOR {"terms": } AGGREGATION
|
||||
if not is_op(self.edge.value, Variable):
|
||||
terms = TermsAggs(
|
||||
"_match",
|
||||
{
|
||||
"script": {"lang": "painless", "inline": self.script.expr},
|
||||
"size": self.domain.limit,
|
||||
"order": self.es_order
|
||||
},
|
||||
self
|
||||
)
|
||||
else:
|
||||
output = wrap({"aggs": {
|
||||
"_match": set_default(
|
||||
{"terms": {
|
||||
"field": self.schema.leaves(self.edge.value.var)[0].es_column,
|
||||
"size": self.domain.limit,
|
||||
"order": self.es_order
|
||||
}},
|
||||
es_query
|
||||
),
|
||||
"_missing": set_default(
|
||||
{"filter": self.missing.to_esfilter(self.schema)},
|
||||
es_query
|
||||
)
|
||||
}})
|
||||
return output
|
||||
terms = TermsAggs(
|
||||
"_match", {
|
||||
"field": first(self.schema.leaves(self.edge.value.var)).es_column,
|
||||
"size": self.domain.limit,
|
||||
"order": self.es_order
|
||||
},
|
||||
self
|
||||
)
|
||||
output = Aggs()
|
||||
output.add(FilterAggs("_filter", self.exists, None).add(terms.add(es_query)))
|
||||
output.add(FilterAggs("_missing", self.missing, self).add(es_query))
|
||||
return output
|
||||
|
||||
def count(self, row):
|
||||
part = row[self.start]
|
||||
part = row[0]
|
||||
if part['doc_count']:
|
||||
if part.get('key') != None:
|
||||
self.parts.append(self.pull(part.get('key')))
|
||||
key = part.get('key')
|
||||
if key != None:
|
||||
self.parts.append(self.pull(key))
|
||||
else:
|
||||
self.edge.allowNulls = True # OK! WE WILL ALLOW NULLS
|
||||
|
||||
|
@ -661,16 +634,16 @@ class DefaultDecoder(SetDecoder):
|
|||
self.parts = None
|
||||
self.computed_domain = True
|
||||
|
||||
def get_index(self, row):
|
||||
def get_index(self, row, es_query=None, index=None):
|
||||
if self.computed_domain:
|
||||
try:
|
||||
part = row[self.start]
|
||||
part = row[0]
|
||||
return self.domain.getIndexByKey(self.pull(part.get('key')))
|
||||
except Exception as e:
|
||||
Log.error("problem", cause=e)
|
||||
else:
|
||||
try:
|
||||
part = row[self.start]
|
||||
part = row[0]
|
||||
key = self.pull(part.get('key'))
|
||||
i = self.key2index.get(key)
|
||||
if i is None:
|
||||
|
@ -693,37 +666,30 @@ class DimFieldListDecoder(SetDecoder):
|
|||
edge.allowNulls = False
|
||||
self.fields = edge.domain.dimension.fields
|
||||
self.domain = self.edge.domain
|
||||
self.domain.limit = Math.min(coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT)
|
||||
self.domain.limit = mo_math.min(coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT)
|
||||
self.parts = list()
|
||||
|
||||
def append_query(self, es_query, start):
|
||||
# TODO: USE "reverse_nested" QUERY TO PULL THESE
|
||||
self.start = start
|
||||
def append_query(self, query_path, es_query):
|
||||
decoder = self
|
||||
for i, v in enumerate(self.fields):
|
||||
exists = v.exists().partial_eval()
|
||||
nest = wrap({"aggs": {"_match": {
|
||||
"filter": exists.to_esfilter(self.schema),
|
||||
"aggs": {"_filter": set_default({"terms": {
|
||||
"field": self.schema.leaves(v.var)[0].es_column,
|
||||
"size": self.domain.limit
|
||||
}}, es_query)}
|
||||
}}})
|
||||
nest.aggs._missing = set_default(
|
||||
{"filter": NotOp("not", exists).to_esfilter(self.schema)},
|
||||
es_query
|
||||
)
|
||||
nest = Aggs()
|
||||
nest.add(TermsAggs("_match", {
|
||||
"field": first(self.schema.leaves(v.var)).es_column,
|
||||
"size": self.domain.limit
|
||||
}, decoder).add(es_query))
|
||||
nest.add(FilterAggs("_missing", NotOp(exists), decoder).add(es_query))
|
||||
es_query = nest
|
||||
decoder = None
|
||||
|
||||
if self.domain.where:
|
||||
filter_ = self.domain.where.partial_eval().to_esfilter(self.schema)
|
||||
es_query = {"aggs": {"_filter": set_default({"filter": filter_}, es_query)}}
|
||||
es_query = FilterAggs("_filter", self.domain.where, None).add(es_query)
|
||||
|
||||
return es_query
|
||||
|
||||
def count(self, row):
|
||||
part = row[self.start:self.start + len(self.fields):]
|
||||
if part[0]['doc_count']:
|
||||
value = tuple(p.get("key") for p in part)
|
||||
def count(self, parts):
|
||||
if parts[0]['doc_count']:
|
||||
value = tuple(p.get("key") for p, f in zip(parts, self.fields))
|
||||
self.parts.append(value)
|
||||
|
||||
def done_count(self):
|
||||
|
@ -737,20 +703,14 @@ class DimFieldListDecoder(SetDecoder):
|
|||
partitions=[{"value": tuple(v[k] for k in columns), "dataIndex": i} for i, v in enumerate(sorted_parts)]
|
||||
)
|
||||
|
||||
def get_index(self, row):
|
||||
part = row[self.start:self.start + len(self.fields):]
|
||||
if part[0]['doc_count']==0:
|
||||
return None
|
||||
find = tuple(p.get("key") for p in part)
|
||||
output = self.domain.getIndexByKey(find)
|
||||
return output
|
||||
def get_index(self, row, es_query=None, index=None):
|
||||
if row[0]['doc_count']:
|
||||
find = tuple(p.get("key") for p, f in zip(row, self.fields))
|
||||
output = self.domain.getIndexByKey(find)
|
||||
return output
|
||||
|
||||
@property
|
||||
def num_columns(self):
|
||||
return len(self.fields)
|
||||
|
||||
|
||||
pull_functions = {
|
||||
STRING: lambda x: x,
|
||||
NUMBER: lambda x: float(x) if x !=None else None,
|
||||
BOOLEAN: string2boolean
|
||||
}
|
||||
|
|
|
@ -7,19 +7,20 @@
|
|||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from jx_base.expressions import NULL
|
||||
from mo_future import is_text, is_binary
|
||||
from jx_base.expressions import LeavesOp, NULL, Variable
|
||||
from jx_base.query import DEFAULT_LIMIT
|
||||
from jx_base.language import is_op
|
||||
from jx_elasticsearch import post as es_post
|
||||
from jx_elasticsearch.es52.expressions import split_expression_by_depth, AndOp, Variable, LeavesOp
|
||||
from jx_elasticsearch.es52.setop import format_dispatch, get_pull_function, get_pull
|
||||
from jx_elasticsearch.es52.util import jx_sort_to_es_sort, es_query_template
|
||||
from jx_python.expressions import compile_expression, jx_expression_to_function
|
||||
from mo_dots import split_field, FlatList, listwrap, literal_field, coalesce, Data, concat_field, set_default, relative_field, startswith_field
|
||||
from mo_json.typed_encoder import NESTED
|
||||
from jx_elasticsearch.es52.expressions import AndOp, ES52, split_expression_by_depth
|
||||
from jx_elasticsearch.es52.setop import format_dispatch, get_pull, get_pull_function
|
||||
from jx_elasticsearch.es52.util import es_query_template, jx_sort_to_es_sort
|
||||
from jx_python.expressions import jx_expression_to_function
|
||||
from mo_dots import Data, FlatList, coalesce, concat_field, is_list as is_list_, listwrap, literal_field, relative_field, set_default, split_field, startswith_field, unwrap, wrap
|
||||
from mo_future import zip_longest
|
||||
from mo_json import NESTED
|
||||
from mo_json.typed_encoder import untype_path
|
||||
from mo_logs import Log
|
||||
from mo_threads import Thread
|
||||
|
@ -60,14 +61,15 @@ def es_deepop(es, query):
|
|||
|
||||
# SPLIT WHERE CLAUSE BY DEPTH
|
||||
wheres = split_expression_by_depth(query.where, schema)
|
||||
for i, f in enumerate(es_filters):
|
||||
script = AndOp("and", wheres[i]).partial_eval().to_esfilter(schema)
|
||||
for f, w in zip_longest(es_filters, wheres):
|
||||
script = ES52[AndOp(w)].partial_eval().to_esfilter(schema)
|
||||
set_default(f, script)
|
||||
|
||||
if not wheres[1]:
|
||||
# INCLUDE DOCS WITH NO NESTED DOCS
|
||||
more_filter = {
|
||||
"bool": {
|
||||
"filter": [AndOp("and", wheres[0]).partial_eval().to_esfilter(schema)],
|
||||
"filter": [AndOp(wheres[0]).partial_eval().to_esfilter(schema)],
|
||||
"must_not": {
|
||||
"nested": {
|
||||
"path": query_path,
|
||||
|
@ -85,48 +87,49 @@ def es_deepop(es, query):
|
|||
|
||||
# es_query.sort = jx_sort_to_es_sort(query.sort)
|
||||
map_to_es_columns = schema.map_to_es()
|
||||
# {c.names["."]: c.es_column for c in schema.leaves(".")}
|
||||
# {c.name: c.es_column for c in schema.leaves(".")}
|
||||
query_for_es = query.map(map_to_es_columns)
|
||||
es_query.sort = jx_sort_to_es_sort(query_for_es.sort, schema)
|
||||
|
||||
es_query.stored_fields = []
|
||||
|
||||
is_list = isinstance(query.select, list)
|
||||
is_list = is_list_(query.select)
|
||||
selects = wrap([unwrap(s.copy()) for s in listwrap(query.select)])
|
||||
new_select = FlatList()
|
||||
|
||||
i = 0
|
||||
for s in listwrap(query.select):
|
||||
if isinstance(s.value, LeavesOp) and isinstance(s.value.term, Variable):
|
||||
put_index = 0
|
||||
for select in selects:
|
||||
if is_op(select.value, LeavesOp) and is_op(select.value.term, Variable):
|
||||
# IF THERE IS A *, THEN INSERT THE EXTRA COLUMNS
|
||||
leaves = schema.leaves(s.value.term.var)
|
||||
leaves = schema.leaves(select.value.term.var)
|
||||
col_names = set()
|
||||
for c in leaves:
|
||||
if c.nested_path[0] == ".":
|
||||
if c.jx_type == NESTED:
|
||||
continue
|
||||
es_query.stored_fields += [c.es_column]
|
||||
c_name = untype_path(c.names[query_path])
|
||||
c_name = untype_path(relative_field(c.name, query_path))
|
||||
col_names.add(c_name)
|
||||
new_select.append({
|
||||
"name": concat_field(s.name, c_name),
|
||||
"name": concat_field(select.name, c_name),
|
||||
"nested_path": c.nested_path[0],
|
||||
"put": {"name": concat_field(s.name, literal_field(c_name)), "index": i, "child": "."},
|
||||
"put": {"name": concat_field(select.name, literal_field(c_name)), "index": put_index, "child": "."},
|
||||
"pull": get_pull_function(c)
|
||||
})
|
||||
i += 1
|
||||
put_index += 1
|
||||
|
||||
# REMOVE DOTS IN PREFIX IF NAME NOT AMBIGUOUS
|
||||
for n in new_select:
|
||||
if n.name.startswith("..") and n.name.lstrip(".") not in col_names:
|
||||
n.put.name = n.name = n.name.lstrip(".")
|
||||
col_names.add(n.name)
|
||||
elif isinstance(s.value, Variable):
|
||||
net_columns = schema.leaves(s.value.var)
|
||||
elif is_op(select.value, Variable):
|
||||
net_columns = schema.leaves(select.value.var)
|
||||
if not net_columns:
|
||||
new_select.append({
|
||||
"name": s.name,
|
||||
"name": select.name,
|
||||
"nested_path": ".",
|
||||
"put": {"name": s.name, "index": i, "child": "."},
|
||||
"put": {"name": select.name, "index": put_index, "child": "."},
|
||||
"pull": NULL
|
||||
})
|
||||
else:
|
||||
|
@ -139,26 +142,28 @@ def es_deepop(es, query):
|
|||
|
||||
# WE MUST FIGURE OUT WHICH NAMESSPACE s.value.var IS USING SO WE CAN EXTRACT THE child
|
||||
for np in n.nested_path:
|
||||
c_name = untype_path(n.names[np])
|
||||
if startswith_field(c_name, s.value.var):
|
||||
child = relative_field(c_name, s.value.var)
|
||||
c_name = untype_path(relative_field(n.name, np))
|
||||
if startswith_field(c_name, select.value.var):
|
||||
child = relative_field(c_name, select.value.var)
|
||||
break
|
||||
else:
|
||||
child = relative_field(untype_path(n.names[n.nested_path[0]]), s.value.var)
|
||||
continue
|
||||
# REMOVED BECAUSE SELECTING INNER PROPERTIES IS NOT ALLOWED
|
||||
# child = relative_field(untype_path(relative_field(n.name, n.nested_path[0])), s.value.var)
|
||||
|
||||
new_select.append({
|
||||
"name": s.name,
|
||||
"name": select.name,
|
||||
"pull": pull,
|
||||
"nested_path": n.nested_path[0],
|
||||
"put": {
|
||||
"name": s.name,
|
||||
"index": i,
|
||||
"name": select.name,
|
||||
"index": put_index,
|
||||
"child": child
|
||||
}
|
||||
})
|
||||
i += 1
|
||||
put_index += 1
|
||||
else:
|
||||
expr = s.value
|
||||
expr = select.value
|
||||
for v in expr.vars():
|
||||
for c in schema[v.var]:
|
||||
if c.nested_path[0] == ".":
|
||||
|
@ -166,18 +171,18 @@ def es_deepop(es, query):
|
|||
# else:
|
||||
# Log.error("deep field not expected")
|
||||
|
||||
pull_name = EXPRESSION_PREFIX + s.name
|
||||
pull_name = EXPRESSION_PREFIX + select.name
|
||||
map_to_local = MapToLocal(schema)
|
||||
pull = jx_expression_to_function(pull_name)
|
||||
post_expressions[pull_name] = compile_expression(expr.map(map_to_local).to_python())
|
||||
post_expressions[pull_name] = jx_expression_to_function(expr.map(map_to_local))
|
||||
|
||||
new_select.append({
|
||||
"name": s.name if is_list else ".",
|
||||
"name": select.name if is_list else ".",
|
||||
"pull": pull,
|
||||
"value": expr.__data__(),
|
||||
"put": {"name": s.name, "index": i, "child": "."}
|
||||
"put": {"name": select.name, "index": put_index, "child": "."}
|
||||
})
|
||||
i += 1
|
||||
put_index += 1
|
||||
|
||||
# <COMPLICATED> ES needs two calls to get all documents
|
||||
more = []
|
||||
|
@ -208,7 +213,7 @@ def es_deepop(es, query):
|
|||
Thread.join(need_more)
|
||||
for t in more[0].hits.hits:
|
||||
yield t
|
||||
#</COMPLICATED>
|
||||
# </COMPLICATED>
|
||||
|
||||
try:
|
||||
formatter, groupby_formatter, mime_type = format_dispatch[query.format]
|
||||
|
|
|
@ -0,0 +1,339 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http:# mozilla.org/MPL/2.0/.
|
||||
#
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from jx_elasticsearch.es52.expressions import ES52
|
||||
from mo_dots import is_data, is_list, startswith_field
|
||||
from mo_future import text_type
|
||||
from mo_json import value2json
|
||||
from mo_logs import Log
|
||||
|
||||
_new = object.__new__
|
||||
|
||||
|
||||
class Aggs(object):
|
||||
|
||||
def __init__(self, name=None):
|
||||
self.name = name
|
||||
self.children = []
|
||||
self.decoders = []
|
||||
self.selects = []
|
||||
|
||||
def to_es(self, schema, query_path="."):
|
||||
if self.children:
|
||||
return {"aggs": {
|
||||
name: t.to_es(schema, query_path)
|
||||
for i, t in enumerate(self.children)
|
||||
for name in [t.name if t.name else "_" + text_type(i)]
|
||||
}}
|
||||
else:
|
||||
return {}
|
||||
|
||||
def add(self, child):
|
||||
self.children.append(child)
|
||||
return self
|
||||
|
||||
def __eq__(self, other):
|
||||
if self is other:
|
||||
return True
|
||||
return isinstance(other, Aggs) and self.name == other.name
|
||||
|
||||
def merge(self, other):
|
||||
if self != other:
|
||||
return False
|
||||
self.children.extend(other.children)
|
||||
self.decoders.extend(other.decoders)
|
||||
return True
|
||||
|
||||
def __str__(self):
|
||||
return value2json(self.to_es)
|
||||
|
||||
def copy(self):
|
||||
output = _new(self.__class__)
|
||||
output.name = self.name
|
||||
output.children = self.children[:]
|
||||
output.decoders = self.decoders[:]
|
||||
output.selects = self.selects[:]
|
||||
return output
|
||||
|
||||
|
||||
class ExprAggs(Aggs):
|
||||
|
||||
def __init__(self, name, expr, select):
|
||||
Aggs.__init__(self, name)
|
||||
self.expr = expr
|
||||
self.selects = [select]
|
||||
|
||||
def __eq__(self, other):
|
||||
if self is other:
|
||||
return True
|
||||
return isinstance(other, ExprAggs) and self.name == other.name and self.expr == other.expr
|
||||
|
||||
def merge(self, other):
|
||||
if self != other:
|
||||
return False
|
||||
self.expr += other.expr
|
||||
self.children.extend(other.children)
|
||||
self.decoders.extend(other.decoders)
|
||||
self.selects.extend(other.selects)
|
||||
return True
|
||||
|
||||
def to_es(self, schema, query_path="."):
|
||||
self.expr['aggs'] = Aggs.to_es(self, schema, query_path).get('aggs')
|
||||
return self.expr
|
||||
|
||||
def copy(self):
|
||||
output = Aggs.copy(self)
|
||||
output.expr = self.expr
|
||||
return output
|
||||
|
||||
|
||||
class FilterAggs(Aggs):
|
||||
def __init__(self, name, filter, decoder):
|
||||
Aggs.__init__(self, name)
|
||||
self.filter = filter
|
||||
if is_data(filter):
|
||||
Log.error("programming error")
|
||||
self.decoders = [decoder] if decoder else []
|
||||
|
||||
def __eq__(self, other):
|
||||
if self is other:
|
||||
return True
|
||||
return isinstance(other, FilterAggs) and self.name == other.name and self.filter == other.filter
|
||||
|
||||
def merge(self, other):
|
||||
if self != other:
|
||||
return False
|
||||
self.children.extend(other.children)
|
||||
self.decoders.extend(other.decoders)
|
||||
return True
|
||||
|
||||
def to_es(self, schema, query_path="."):
|
||||
output = Aggs.to_es(self, schema, query_path)
|
||||
output['filter'] = ES52[self.filter].partial_eval().to_esfilter(schema)
|
||||
return output
|
||||
|
||||
def copy(self):
|
||||
output = Aggs.copy(self)
|
||||
output.filter = self.filter
|
||||
return output
|
||||
|
||||
|
||||
class ComplexAggs(FilterAggs):
|
||||
"""
|
||||
FOR COMPLICATED AGGREGATIONS
|
||||
"""
|
||||
|
||||
def __init__(self, select):
|
||||
Aggs.__init__(self, "_filter")
|
||||
self.expr = {"filter": {"match_all": {}}}
|
||||
self.selects = [select]
|
||||
|
||||
def to_es(self, schema, query_path="."):
|
||||
self.expr['aggs'] = Aggs.to_es(self, schema, query_path).get('aggs')
|
||||
return self.expr
|
||||
|
||||
def copy(self):
|
||||
output = Aggs.copy(self)
|
||||
output.expr = self.expr
|
||||
return output
|
||||
|
||||
|
||||
class FiltersAggs(Aggs):
|
||||
def __init__(self, name, filters, decoder):
|
||||
Aggs.__init__(self, name)
|
||||
self.filters = filters
|
||||
self.decoders = [decoder] if decoder else []
|
||||
if not is_list(filters):
|
||||
Log.error("expecting a list")
|
||||
|
||||
def __eq__(self, other):
|
||||
if self is other:
|
||||
return True
|
||||
return isinstance(other, FiltersAggs) and self.name == other.name and self.filters == other.filters
|
||||
|
||||
def merge(self, other):
|
||||
if self != other:
|
||||
return False
|
||||
self.children.extend(other.children)
|
||||
self.decoders.extend(other.decoders)
|
||||
return True
|
||||
|
||||
def to_es(self, schema, query_path="."):
|
||||
output = Aggs.to_es(self, schema, query_path)
|
||||
output['filters'] = {"filters": [f.partial_eval().to_esfilter(schema) for f in self.filters]}
|
||||
return output
|
||||
|
||||
def copy(self):
|
||||
output = Aggs.copy(self)
|
||||
output.filters = self.filters
|
||||
return output
|
||||
|
||||
|
||||
class NestedAggs(Aggs):
|
||||
def __init__(self, path):
|
||||
Aggs.__init__(self, "_nested")
|
||||
self.path = path
|
||||
|
||||
def __eq__(self, other):
|
||||
if self is other:
|
||||
return True
|
||||
return isinstance(other, NestedAggs) and self.path == other.path
|
||||
|
||||
def to_es(self, schema, query_path="."):
|
||||
output = Aggs.to_es(self, schema, self.path)
|
||||
if query_path == self.path:
|
||||
Log.error("this should have been cancelled out")
|
||||
elif startswith_field(self.path, query_path):
|
||||
output['nested'] = {"path": self.path}
|
||||
else:
|
||||
output["reverse_nested"] = {"path": None if self.path == "." else self.path}
|
||||
return output
|
||||
|
||||
def __eq__(self, other):
|
||||
if self is other:
|
||||
return True
|
||||
return isinstance(other, NestedAggs) and self.path == other.path
|
||||
|
||||
def copy(self):
|
||||
output = Aggs.copy(self)
|
||||
output.path = self.path
|
||||
return output
|
||||
|
||||
|
||||
class TermsAggs(Aggs):
|
||||
def __init__(self, name, terms, decoder):
|
||||
Aggs.__init__(self, name)
|
||||
self.terms = terms
|
||||
self.decoders = [decoder] if decoder else []
|
||||
|
||||
def __eq__(self, other):
|
||||
if self is other:
|
||||
return True
|
||||
return isinstance(other, TermsAggs) and self.name == other.name and self.terms == other.terms
|
||||
|
||||
def to_es(self, schema, query_path="."):
|
||||
output = Aggs.to_es(self, schema, query_path)
|
||||
output['terms'] = self.terms
|
||||
return output
|
||||
|
||||
def copy(self):
|
||||
output = Aggs.copy(self)
|
||||
output.terms = self.terms
|
||||
return output
|
||||
|
||||
|
||||
class RangeAggs(Aggs):
|
||||
def __init__(self, name, expr, decoder):
|
||||
Aggs.__init__(self, name)
|
||||
self.expr = expr
|
||||
self.decoders = [decoder] if decoder else []
|
||||
|
||||
def __eq__(self, other):
|
||||
if self is other:
|
||||
return True
|
||||
return isinstance(other, RangeAggs) and self.name == other.name and self.expr == other.expr
|
||||
|
||||
def to_es(self, schema, query_path="."):
|
||||
output = Aggs.to_es(self, schema, query_path)
|
||||
output['range'] = self.expr
|
||||
return output
|
||||
|
||||
def copy(self):
|
||||
output = Aggs.copy(self)
|
||||
output.expr = self.expr
|
||||
return output
|
||||
|
||||
|
||||
def simplify(aggs):
|
||||
# CONVERT FROM TREE TO UNION OF SEQUENCES
|
||||
def depth_first(aggr):
|
||||
if aggr.__class__ == Aggs:
|
||||
# BASE CLASS Aggs IS ONLY A PLACEHOLDER
|
||||
if not aggr.children:
|
||||
yield tuple()
|
||||
return
|
||||
for c in aggr.children:
|
||||
for path in depth_first(c):
|
||||
yield path
|
||||
elif not aggr.children:
|
||||
yield (aggr,)
|
||||
else:
|
||||
for c in aggr.children:
|
||||
for path in depth_first(c):
|
||||
yield (aggr,) + path
|
||||
|
||||
# CANCEL OUT REDUNDANT NESTED AGGS
|
||||
combined = []
|
||||
for path in depth_first(aggs):
|
||||
current_nested = NestedAggs(".")
|
||||
prev = None
|
||||
remove = []
|
||||
for step in path:
|
||||
if isinstance(step, NestedAggs):
|
||||
if prev is not None:
|
||||
remove.append(prev)
|
||||
prev = None
|
||||
if current_nested is not None:
|
||||
if current_nested.path == step.path:
|
||||
remove.append(step)
|
||||
continue
|
||||
else:
|
||||
pass
|
||||
prev = step
|
||||
else:
|
||||
current_nested = prev if prev else current_nested
|
||||
prev = None
|
||||
|
||||
combined.append(tuple(p for p in path if not any(p is r for r in remove)))
|
||||
|
||||
# COMMON FACTOR, CONVERT BACK TO TREE
|
||||
def merge(aggregations):
|
||||
output = []
|
||||
while True:
|
||||
common_children = []
|
||||
first_found = None
|
||||
common = None
|
||||
for i, terms in enumerate(aggregations):
|
||||
if not terms:
|
||||
continue
|
||||
term, rest = terms[0], terms[1:]
|
||||
if first_found is None:
|
||||
first_found = term
|
||||
common_children.append(rest)
|
||||
common = first_found.copy()
|
||||
aggregations[i] = None
|
||||
elif term == first_found:
|
||||
common_children.append(rest)
|
||||
common.selects.extend([t for t in term.selects if not any(t is s for s in common.selects)])
|
||||
common.decoders.extend([t for t in term.decoders if not any(t is d for d in common.decoders)])
|
||||
aggregations[i] = None
|
||||
|
||||
if first_found is None:
|
||||
return output
|
||||
else:
|
||||
common.children = merge(common_children)
|
||||
output.append(common)
|
||||
|
||||
merged = [trim_root(o) for o in merge(combined)]
|
||||
|
||||
output = Aggs()
|
||||
output.children = merged
|
||||
return output
|
||||
|
||||
|
||||
def trim_root(agg):
|
||||
if isinstance(agg, NestedAggs) and agg.path == '.':
|
||||
if len(agg.children) == 1:
|
||||
return agg.children[0]
|
||||
else:
|
||||
output = Aggs()
|
||||
output.children = agg.children
|
||||
return output
|
||||
else:
|
||||
return agg
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -7,110 +7,154 @@
|
|||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from jx_base.expressions import TupleOp
|
||||
from jx_elasticsearch.es52.aggs import count_dim, aggs_iterator, format_dispatch, drill
|
||||
from jx_base.query import canonical_aggregates
|
||||
from jx_base.language import is_op
|
||||
from jx_elasticsearch.es52.aggs import aggs_iterator, count_dim, format_dispatch
|
||||
from jx_python.containers.cube import Cube
|
||||
from mo_collections.matrix import Matrix
|
||||
from mo_dots import Data, set_default, wrap, split_field, coalesce
|
||||
from mo_dots import Data, coalesce, is_list, set_default, split_field, wrap
|
||||
from mo_future import sort_using_key
|
||||
from mo_logs import Log
|
||||
from mo_logs.strings import quote
|
||||
from pyLibrary import convert
|
||||
|
||||
FunctionType = type(lambda: 1)
|
||||
|
||||
def format_cube(decoders, aggs, start, query, select):
|
||||
# decoders = sorted(decoders, key=lambda d: -d.edge.dim) # REVERSE DECODER ORDER, BECAUSE ES QUERY WAS BUILT IN REVERSE ORDER
|
||||
new_edges = count_dim(aggs, decoders)
|
||||
def format_cube(aggs, es_query, query, decoders, all_selects):
|
||||
new_edges = count_dim(aggs, es_query, decoders)
|
||||
|
||||
dims = []
|
||||
for e in new_edges:
|
||||
if isinstance(e.value, TupleOp):
|
||||
if is_op(e.value, TupleOp):
|
||||
e.allowNulls = False
|
||||
|
||||
extra = 0 if e.allowNulls is False else 1
|
||||
dims.append(len(e.domain.partitions) + extra)
|
||||
|
||||
dims = tuple(dims)
|
||||
matricies = [(s, Matrix(dims=dims, zeros=s.default)) for s in select]
|
||||
for row, coord, agg in aggs_iterator(aggs, decoders):
|
||||
for s, m in matricies:
|
||||
try:
|
||||
v = s.pull(agg)
|
||||
m[coord] = v
|
||||
except Exception as e:
|
||||
# THIS HAPPENS WHEN ES RETURNS MORE TUPLE COMBINATIONS THAN DOCUMENTS
|
||||
if agg.get('doc_count') != 0:
|
||||
Log.error("Programmer error", cause=e)
|
||||
if any(s.default != canonical_aggregates[s.aggregate].default for s in all_selects):
|
||||
# UNUSUAL DEFAULT VALUES MESS THE union() FUNCTION
|
||||
is_default = Matrix(dims=dims, zeros=True)
|
||||
matricies = {s.name: Matrix(dims=dims) for s in all_selects}
|
||||
for row, coord, agg, selects in aggs_iterator(aggs, es_query, decoders):
|
||||
for select in selects:
|
||||
m = matricies[select.name]
|
||||
v = select.pull(agg)
|
||||
if v == None:
|
||||
continue
|
||||
is_default[coord] = False
|
||||
union(m, coord, v, select.aggregate)
|
||||
|
||||
# FILL THE DEFAULT VALUES
|
||||
for c, v in is_default:
|
||||
if v:
|
||||
for s in all_selects:
|
||||
matricies[s.name][c] = s.default
|
||||
else:
|
||||
matricies = {s.name: Matrix(dims=dims, zeros=s.default) for s in all_selects}
|
||||
for row, coord, agg, selects in aggs_iterator(aggs, es_query, decoders):
|
||||
for select in selects:
|
||||
m = matricies[select.name]
|
||||
v = select.pull(agg)
|
||||
union(m, coord, v, select.aggregate)
|
||||
|
||||
cube = Cube(
|
||||
query.select,
|
||||
sort_using_key(new_edges, key=lambda e: e.dim), # ENSURE EDGES ARE IN SAME ORDER AS QUERY
|
||||
{s.name: m for s, m in matricies}
|
||||
matricies
|
||||
)
|
||||
cube.frum = query
|
||||
return cube
|
||||
|
||||
|
||||
def format_cube_from_aggop(decoders, aggs, start, query, select):
|
||||
agg = drill(aggs)
|
||||
matricies = [(s, Matrix(dims=[], zeros=s.default)) for s in select]
|
||||
for s, m in matricies:
|
||||
m[tuple()] = s.pull(agg)
|
||||
cube = Cube(query.select, [], {s.name: m for s, m in matricies})
|
||||
cube.frum = query
|
||||
return cube
|
||||
def _value_drill(agg):
|
||||
while True:
|
||||
deeper = agg.get("_nested")
|
||||
if deeper:
|
||||
agg = deeper
|
||||
continue
|
||||
deeper = agg.get("_filter")
|
||||
if deeper:
|
||||
agg = deeper
|
||||
continue
|
||||
return agg
|
||||
|
||||
|
||||
def format_table(decoders, aggs, start, query, select):
|
||||
new_edges = count_dim(aggs, decoders)
|
||||
header = new_edges.name + select.name
|
||||
def format_table(aggs, es_query, query, decoders, all_selects):
|
||||
new_edges = wrap(count_dim(aggs, es_query, decoders))
|
||||
dims = tuple(len(e.domain.partitions) + (0 if e.allowNulls is False else 1) for e in new_edges)
|
||||
rank = len(dims)
|
||||
header = tuple(new_edges.name + all_selects.name)
|
||||
name2index = {s.name: i + rank for i, s in enumerate(all_selects)}
|
||||
|
||||
def data():
|
||||
dims = tuple(len(e.domain.partitions) + (0 if e.allowNulls is False else 1) for e in new_edges)
|
||||
is_sent = Matrix(dims=dims, zeros=0)
|
||||
|
||||
if query.sort and not query.groupby:
|
||||
is_sent = Matrix(dims=dims)
|
||||
give_me_zeros = query.sort and not query.groupby
|
||||
if give_me_zeros:
|
||||
# WE REQUIRE THE ZEROS FOR SORTING
|
||||
all_coord = is_sent._all_combos() # TRACK THE EXPECTED COMBINATIONS
|
||||
for row, coord, agg in aggs_iterator(aggs, decoders):
|
||||
missing_coord = all_coord.next()
|
||||
while coord != missing_coord:
|
||||
record = [d.get_value(missing_coord[i]) for i, d in enumerate(decoders)]
|
||||
for s in select:
|
||||
if s.aggregate == "count":
|
||||
record.append(0)
|
||||
else:
|
||||
record.append(None)
|
||||
yield record
|
||||
missing_coord = all_coord.next()
|
||||
ordered_coord = all_coord.next()[::-1]
|
||||
output = None
|
||||
for row, coord, agg, ss in aggs_iterator(aggs, es_query, decoders):
|
||||
if coord != ordered_coord:
|
||||
# output HAS BEEN YIELDED, BUT SET THE DEFAULT VALUES
|
||||
if output is not None:
|
||||
for s in all_selects:
|
||||
i = name2index[s.name]
|
||||
if output[i] is None:
|
||||
output[i] = s.default
|
||||
# WE CAN GET THE SAME coord MANY TIMES, SO ONLY ADVANCE WHEN NOT
|
||||
ordered_coord = all_coord.next()[::-1]
|
||||
|
||||
output = [d.get_value(c) for c, d in zip(coord, decoders)]
|
||||
for s in select:
|
||||
output.append(s.pull(agg))
|
||||
while coord != ordered_coord:
|
||||
# HAPPENS WHEN THE coord IS AHEAD OF ordered_coord
|
||||
record = [d.get_value(ordered_coord[i]) for i, d in enumerate(decoders)] + [s.default for s in all_selects]
|
||||
yield record
|
||||
ordered_coord = all_coord.next()[::-1]
|
||||
# coord == missing_coord
|
||||
output = [d.get_value(c) for c, d in zip(coord, decoders)] + [None for s in all_selects]
|
||||
for select in ss:
|
||||
v = select.pull(agg)
|
||||
if v != None:
|
||||
union(output, name2index[select.name], v, select.aggregate)
|
||||
yield output
|
||||
else:
|
||||
for row, coord, agg in aggs_iterator(aggs, decoders):
|
||||
is_sent[coord] = 1
|
||||
last_coord = None # HANG ONTO THE output FOR A BIT WHILE WE FILL THE ELEMENTS
|
||||
output = None
|
||||
for row, coord, agg, ss in aggs_iterator(aggs, es_query, decoders):
|
||||
if coord != last_coord:
|
||||
if output:
|
||||
# SET DEFAULTS
|
||||
for i, s in enumerate(all_selects):
|
||||
v = output[rank+i]
|
||||
if v == None:
|
||||
output[rank+i] = s.default
|
||||
yield output
|
||||
output = is_sent[coord]
|
||||
if output == None:
|
||||
output = is_sent[coord] = [d.get_value(c) for c, d in zip(coord, decoders)] + [None for _ in all_selects]
|
||||
last_coord = coord
|
||||
# THIS IS A TRICK! WE WILL UPDATE A ROW THAT WAS ALREADY YIELDED
|
||||
for select in ss:
|
||||
v = select.pull(agg)
|
||||
if v != None:
|
||||
union(output, name2index[select.name], v, select.aggregate)
|
||||
|
||||
output = [d.get_value(c) for c, d in zip(coord, decoders)]
|
||||
for s in select:
|
||||
output.append(s.pull(agg))
|
||||
if output:
|
||||
# SET DEFAULTS ON LAST ROW
|
||||
for i, s in enumerate(all_selects):
|
||||
v = output[rank+i]
|
||||
if v == None:
|
||||
output[rank+i] = s.default
|
||||
yield output
|
||||
|
||||
# EMIT THE MISSING CELLS IN THE CUBE
|
||||
if not query.groupby:
|
||||
for c, v in is_sent:
|
||||
if not v:
|
||||
record = [d.get_value(c[i]) for i, d in enumerate(decoders)]
|
||||
for s in select:
|
||||
if s.aggregate == "count":
|
||||
record.append(0)
|
||||
else:
|
||||
record.append(None)
|
||||
for coord, output in is_sent:
|
||||
if output == None:
|
||||
record = [d.get_value(c) for c, d in zip(coord, decoders)] + [s.default for s in all_selects]
|
||||
yield record
|
||||
|
||||
return Data(
|
||||
|
@ -119,42 +163,8 @@ def format_table(decoders, aggs, start, query, select):
|
|||
data=list(data())
|
||||
)
|
||||
|
||||
|
||||
def format_table_from_groupby(decoders, aggs, start, query, select):
|
||||
header = [d.edge.name.replace("\\.", ".") for d in decoders] + select.name
|
||||
|
||||
def data():
|
||||
for row, coord, agg in aggs_iterator(aggs, decoders):
|
||||
if agg.get('doc_count', 0) == 0:
|
||||
continue
|
||||
output = [d.get_value_from_row(row) for d in decoders]
|
||||
for s in select:
|
||||
output.append(s.pull(agg))
|
||||
yield output
|
||||
|
||||
return Data(
|
||||
meta={"format": "table"},
|
||||
header=header,
|
||||
data=list(data())
|
||||
)
|
||||
|
||||
|
||||
def format_table_from_aggop(decoders, aggs, start, query, select):
|
||||
header = select.name
|
||||
agg = drill(aggs)
|
||||
row = []
|
||||
for s in select:
|
||||
row.append(s.pull(agg))
|
||||
|
||||
return Data(
|
||||
meta={"format": "table"},
|
||||
header=header,
|
||||
data=[row]
|
||||
)
|
||||
|
||||
|
||||
def format_tab(decoders, aggs, start, query, select):
|
||||
table = format_table(decoders, aggs, start, query, select)
|
||||
def format_tab(aggs, es_query, query, decoders, select):
|
||||
table = format_table(aggs, es_query, query, decoders, select)
|
||||
|
||||
def data():
|
||||
yield "\t".join(map(quote, table.header))
|
||||
|
@ -164,8 +174,8 @@ def format_tab(decoders, aggs, start, query, select):
|
|||
return data()
|
||||
|
||||
|
||||
def format_csv(decoders, aggs, start, query, select):
|
||||
table = format_table(decoders, aggs, start, query, select)
|
||||
def format_csv(aggs, es_query, query, decoders, select):
|
||||
table = format_table(aggs, es_query, query, decoders, select)
|
||||
|
||||
def data():
|
||||
yield ", ".join(map(quote, table.header))
|
||||
|
@ -175,18 +185,42 @@ def format_csv(decoders, aggs, start, query, select):
|
|||
return data()
|
||||
|
||||
|
||||
def format_list_from_groupby(decoders, aggs, start, query, select):
|
||||
def data():
|
||||
for row, coord, agg in aggs_iterator(aggs, decoders):
|
||||
if agg.get('doc_count', 0) == 0:
|
||||
continue
|
||||
output = Data()
|
||||
for g, d in zip(query.groupby, decoders):
|
||||
output[coalesce(g.put.name, g.name)] = d.get_value_from_row(row)
|
||||
def format_list_from_groupby(aggs, es_query, query, decoders, all_selects):
|
||||
new_edges = wrap(count_dim(aggs, es_query, decoders))
|
||||
|
||||
for s in select:
|
||||
output[s.name] = s.pull(agg)
|
||||
yield output
|
||||
def data():
|
||||
groupby = query.groupby
|
||||
dims = tuple(len(e.domain.partitions) + (0 if e.allowNulls is False else 1) for e in new_edges)
|
||||
is_sent = Matrix(dims=dims)
|
||||
give_me_zeros = query.sort and not query.groupby
|
||||
|
||||
finishes = []
|
||||
# IRREGULAR DEFAULTS MESS WITH union(), SET THEM AT END, IF ANY
|
||||
for s in all_selects:
|
||||
if s.default != canonical_aggregates[s.aggregate].default:
|
||||
s.finish = s.default
|
||||
s.default = None
|
||||
finishes.append(s)
|
||||
|
||||
for row, coord, agg, _selects in aggs_iterator(aggs, es_query, decoders, give_me_zeros=give_me_zeros):
|
||||
output = is_sent[coord]
|
||||
if output == None:
|
||||
output = is_sent[coord] = Data()
|
||||
for g, d, c in zip(groupby, decoders, coord):
|
||||
output[g.put.name] = d.get_value(c)
|
||||
for s in all_selects:
|
||||
output[s.name] = s.default
|
||||
yield output
|
||||
# THIS IS A TRICK! WE WILL UPDATE A ROW THAT WAS ALREADY YIELDED
|
||||
for s in _selects:
|
||||
union(output, s.name, s.pull(agg), s.aggregate)
|
||||
|
||||
if finishes:
|
||||
# SET ANY DEFAULTS
|
||||
for c, o in is_sent:
|
||||
for s in finishes:
|
||||
if o[s.name] == None:
|
||||
o[s.name] = s.finish
|
||||
|
||||
for g in query.groupby:
|
||||
g.put.name = coalesce(g.put.name, g.name)
|
||||
|
@ -198,94 +232,36 @@ def format_list_from_groupby(decoders, aggs, start, query, select):
|
|||
return output
|
||||
|
||||
|
||||
def format_list(decoders, aggs, start, query, select):
|
||||
new_edges = count_dim(aggs, decoders)
|
||||
def format_list(aggs, es_query, query, decoders, select):
|
||||
table = format_table(aggs, es_query, query, decoders, select)
|
||||
header = table.header
|
||||
|
||||
def data():
|
||||
dims = tuple(len(e.domain.partitions) + (0 if e.allowNulls is False else 1) for e in new_edges)
|
||||
|
||||
is_sent = Matrix(dims=dims, zeros=0)
|
||||
if query.sort and not query.groupby:
|
||||
# TODO: USE THE format_table() TO PRODUCE THE NEEDED VALUES INSTEAD OF DUPLICATING LOGIC HERE
|
||||
all_coord = is_sent._all_combos() # TRACK THE EXPECTED COMBINATIONS
|
||||
for _, coord, agg in aggs_iterator(aggs, decoders):
|
||||
missing_coord = all_coord.next()
|
||||
while coord != missing_coord:
|
||||
# INSERT THE MISSING COORDINATE INTO THE GENERATION
|
||||
output = Data()
|
||||
for i, d in enumerate(decoders):
|
||||
output[query.edges[i].name] = d.get_value(missing_coord[i])
|
||||
|
||||
for s in select:
|
||||
if s.aggregate == "count":
|
||||
output[s.name] = 0
|
||||
yield output
|
||||
missing_coord = all_coord.next()
|
||||
|
||||
output = Data()
|
||||
for e, c, d in zip(query.edges, coord, decoders):
|
||||
output[e.name] = d.get_value(c)
|
||||
|
||||
for s in select:
|
||||
output[s.name] = s.pull(agg)
|
||||
yield output
|
||||
else:
|
||||
|
||||
for row, coord, agg in aggs_iterator(aggs, decoders):
|
||||
is_sent[coord] = 1
|
||||
|
||||
output = Data()
|
||||
for e, c, d in zip(query.edges, coord, decoders):
|
||||
output[e.name] = d.get_value(c)
|
||||
|
||||
for s in select:
|
||||
output[s.name] = s.pull(agg)
|
||||
yield output
|
||||
|
||||
# EMIT THE MISSING CELLS IN THE CUBE
|
||||
if not query.groupby:
|
||||
for c, v in is_sent:
|
||||
if not v:
|
||||
output = Data()
|
||||
for i, d in enumerate(decoders):
|
||||
output[query.edges[i].name] = d.get_value(c[i])
|
||||
|
||||
for s in select:
|
||||
if s.aggregate == "count":
|
||||
output[s.name] = 0
|
||||
yield output
|
||||
if query.edges or query.groupby:
|
||||
data = []
|
||||
for row in table.data:
|
||||
d = Data()
|
||||
for h, r in zip(header, row):
|
||||
d[h] = r
|
||||
data.append(d)
|
||||
format = "list"
|
||||
elif is_list(query.select):
|
||||
data = Data()
|
||||
for h, r in zip(header, table.data[0]):
|
||||
data[h] = r
|
||||
format = "value"
|
||||
else:
|
||||
data = table.data[0][0]
|
||||
format = "value"
|
||||
|
||||
output = Data(
|
||||
meta={"format": "list"},
|
||||
data=list(data())
|
||||
meta={"format": format},
|
||||
data=data
|
||||
)
|
||||
return output
|
||||
|
||||
|
||||
def format_list_from_aggop(decoders, aggs, start, query, select):
|
||||
agg = drill(aggs)
|
||||
|
||||
if isinstance(query.select, list):
|
||||
item = Data()
|
||||
for s in select:
|
||||
item[s.name] = s.pull(agg)
|
||||
else:
|
||||
item = select[0].pull(agg)
|
||||
|
||||
if query.edges or query.groupby:
|
||||
return wrap({
|
||||
"meta": {"format": "list"},
|
||||
"data": [item]
|
||||
})
|
||||
else:
|
||||
return wrap({
|
||||
"meta": {"format": "value"},
|
||||
"data": item
|
||||
})
|
||||
|
||||
|
||||
def format_line(decoders, aggs, start, query, select):
|
||||
list = format_list(decoders, aggs, start, query, select)
|
||||
def format_line(aggs, es_query, query, decoders, select):
|
||||
list = format_list(aggs, es_query, query, decoders, select)
|
||||
|
||||
def data():
|
||||
for d in list.data:
|
||||
|
@ -295,10 +271,10 @@ def format_line(decoders, aggs, start, query, select):
|
|||
|
||||
|
||||
set_default(format_dispatch, {
|
||||
None: (format_cube, format_table_from_groupby, format_cube_from_aggop, "application/json"),
|
||||
"cube": (format_cube, format_cube, format_cube_from_aggop, "application/json"),
|
||||
"table": (format_table, format_table_from_groupby, format_table_from_aggop, "application/json"),
|
||||
"list": (format_list, format_list_from_groupby, format_list_from_aggop, "application/json"),
|
||||
None: (format_cube, format_table, format_cube, "application/json"),
|
||||
"cube": (format_cube, format_cube, format_cube, "application/json"),
|
||||
"table": (format_table, format_table, format_table, "application/json"),
|
||||
"list": (format_list, format_list_from_groupby, format_list, "application/json"),
|
||||
# "csv": (format_csv, format_csv_from_groupby, "text/csv"),
|
||||
# "tab": (format_tab, format_tab_from_groupby, "text/tab-separated-values"),
|
||||
# "line": (format_line, format_line_from_groupby, "application/json")
|
||||
|
@ -314,3 +290,27 @@ def _get(v, k, d):
|
|||
except Exception:
|
||||
v = [vv.get(p) for vv in v]
|
||||
return v
|
||||
|
||||
|
||||
def union(matrix, coord, value, agg):
|
||||
# matrix[coord] = existing + value WITH ADDITIONAL CHECKS
|
||||
existing = matrix[coord]
|
||||
if existing == None:
|
||||
matrix[coord] = value
|
||||
elif value == None:
|
||||
pass
|
||||
elif agg not in ['sum', 'count']:
|
||||
if agg == "cardinality" and (existing == 0 or value == 0):
|
||||
matrix[coord] = existing + value
|
||||
return
|
||||
elif agg == "stats" and (not existing or not value):
|
||||
matrix[coord] = existing + value
|
||||
return
|
||||
elif agg == "union":
|
||||
matrix[coord] = list(set(existing) | set(value))
|
||||
return
|
||||
Log.warning("{{agg}} not ready", agg=agg)
|
||||
else:
|
||||
matrix[coord] = existing + value
|
||||
|
||||
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -7,26 +7,23 @@
|
|||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from collections import Mapping
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from jx_base.domains import ALGEBRAIC
|
||||
from jx_base.expressions import IDENTITY
|
||||
from jx_base.expressions import IDENTITY, LeavesOp, Variable
|
||||
from jx_base.query import DEFAULT_LIMIT
|
||||
from jx_base.language import is_op
|
||||
from jx_elasticsearch import post as es_post
|
||||
from jx_elasticsearch.es52.expressions import Variable, LeavesOp
|
||||
from jx_elasticsearch.es52.util import jx_sort_to_es_sort, es_query_template, es_and, es_or, es_script
|
||||
from jx_elasticsearch.es52.expressions import AndOp, ES52, split_expression_by_path
|
||||
from jx_elasticsearch.es52.painless import Painless
|
||||
from jx_elasticsearch.es52.util import MATCH_ALL, es_and, es_or, jx_sort_to_es_sort
|
||||
from jx_python.containers.cube import Cube
|
||||
from jx_python.expressions import jx_expression_to_function
|
||||
from mo_collections.matrix import Matrix
|
||||
from mo_dots import coalesce, split_field, set_default, Data, unwraplist, literal_field, unwrap, wrap, concat_field, relative_field, join_field, listwrap
|
||||
from mo_dots.lists import FlatList
|
||||
from mo_future import transpose
|
||||
from mo_json.typed_encoder import NESTED
|
||||
from mo_json.typed_encoder import untype_path, unnest_path, untyped
|
||||
from mo_dots import Data, FlatList, coalesce, concat_field, is_data, is_list, join_field, listwrap, literal_field, relative_field, set_default, split_field, unwrap, unwraplist, wrap
|
||||
from mo_future import first, text_type, transpose
|
||||
from mo_json import NESTED
|
||||
from mo_json.typed_encoder import decode_property, unnest_path, untype_path, untyped
|
||||
from mo_logs import Log
|
||||
from mo_math import AND, MAX
|
||||
from mo_times.timer import Timer
|
||||
|
@ -54,31 +51,30 @@ def is_setop(es, query):
|
|||
|
||||
def es_setop(es, query):
|
||||
schema = query.frum.schema
|
||||
query_path = schema.query_path[0]
|
||||
|
||||
es_query, filters = es_query_template(schema.query_path[0])
|
||||
nested_filter = None
|
||||
set_default(filters[0], query.where.partial_eval().to_esfilter(schema))
|
||||
es_query.size = coalesce(query.limit, DEFAULT_LIMIT)
|
||||
es_query.stored_fields = FlatList()
|
||||
split_select = {".": ESSelect('.')}
|
||||
|
||||
selects = wrap([s.copy() for s in listwrap(query.select)])
|
||||
def get_select(path):
|
||||
es_select = split_select.get(path)
|
||||
if not es_select:
|
||||
es_select = split_select[path] = ESSelect(path)
|
||||
return es_select
|
||||
|
||||
|
||||
selects = wrap([unwrap(s.copy()) for s in listwrap(query.select)])
|
||||
new_select = FlatList()
|
||||
schema = query.frum.schema
|
||||
# columns = schema.columns
|
||||
# nested_columns = set(c.names["."] for c in columns if c.nested_path[0] != ".")
|
||||
|
||||
es_query.sort = jx_sort_to_es_sort(query.sort, schema)
|
||||
|
||||
put_index = 0
|
||||
for select in selects:
|
||||
# IF THERE IS A *, THEN INSERT THE EXTRA COLUMNS
|
||||
if isinstance(select.value, LeavesOp) and isinstance(select.value.term, Variable):
|
||||
if is_op(select.value, LeavesOp) and is_op(select.value.term, Variable):
|
||||
term = select.value.term
|
||||
leaves = schema.leaves(term.var)
|
||||
for c in leaves:
|
||||
full_name = concat_field(select.name, relative_field(untype_path(c.names["."]), term.var))
|
||||
full_name = concat_field(select.name, relative_field(untype_path(c.name), term.var))
|
||||
if c.jx_type == NESTED:
|
||||
es_query.stored_fields = ["_source"]
|
||||
get_select('.').use_source = True
|
||||
new_select.append({
|
||||
"name": full_name,
|
||||
"value": Variable(c.es_column),
|
||||
|
@ -86,98 +82,88 @@ def es_setop(es, query):
|
|||
"pull": get_pull_source(c.es_column)
|
||||
})
|
||||
put_index += 1
|
||||
elif c.nested_path[0] != ".":
|
||||
pass # THE NESTED PARENT WILL CAPTURE THIS
|
||||
else:
|
||||
es_query.stored_fields += [c.es_column]
|
||||
get_select(c.nested_path[0]).fields.append(c.es_column)
|
||||
new_select.append({
|
||||
"name": full_name,
|
||||
"value": Variable(c.es_column),
|
||||
"put": {"name": literal_field(full_name), "index": put_index, "child": "."}
|
||||
})
|
||||
put_index += 1
|
||||
elif isinstance(select.value, Variable):
|
||||
elif is_op(select.value, Variable):
|
||||
s_column = select.value.var
|
||||
# LEAVES OF OBJECT
|
||||
leaves = schema.leaves(s_column)
|
||||
nested_selects = {}
|
||||
|
||||
if s_column == ".":
|
||||
# PULL ALL SOURCE
|
||||
get_select('.').use_source = True
|
||||
new_select.append({
|
||||
"name": select.name,
|
||||
"value": select.value,
|
||||
"put": {"name": select.name, "index": put_index, "child": "."},
|
||||
"pull": get_pull_source(".")
|
||||
})
|
||||
continue
|
||||
|
||||
leaves = schema.leaves(s_column) # LEAVES OF OBJECT
|
||||
# nested_selects = {}
|
||||
if leaves:
|
||||
if s_column == '.':
|
||||
# PULL ALL SOURCE
|
||||
es_query.stored_fields = ["_source"]
|
||||
new_select.append({
|
||||
"name": select.name,
|
||||
"value": select.value,
|
||||
"put": {"name": select.name, "index": put_index, "child": "."},
|
||||
"pull": get_pull_source(".")
|
||||
})
|
||||
elif any(c.jx_type == NESTED for c in leaves):
|
||||
if any(c.jx_type == NESTED for c in leaves):
|
||||
# PULL WHOLE NESTED ARRAYS
|
||||
es_query.stored_fields = ["_source"]
|
||||
get_select('.').use_source = True
|
||||
for c in leaves:
|
||||
if len(c.nested_path) == 1: # NESTED PROPERTIES ARE IGNORED, CAPTURED BY THESE FIRT LEVEL PROPERTIES
|
||||
jx_name = untype_path(c.names["."])
|
||||
if len(c.nested_path) == 1: # NESTED PROPERTIES ARE IGNORED, CAPTURED BY THESE FIRST LEVEL PROPERTIES
|
||||
pre_child = join_field(decode_property(n) for n in split_field(c.name))
|
||||
new_select.append({
|
||||
"name": select.name,
|
||||
"value": Variable(c.es_column),
|
||||
"put": {"name": select.name, "index": put_index, "child": relative_field(jx_name, s_column)},
|
||||
"put": {"name": select.name, "index": put_index, "child": untype_path(relative_field(pre_child, s_column))},
|
||||
"pull": get_pull_source(c.es_column)
|
||||
})
|
||||
else:
|
||||
# PULL ONLY WHAT'S NEEDED
|
||||
for c in leaves:
|
||||
if len(c.nested_path) == 1:
|
||||
jx_name = untype_path(c.names["."])
|
||||
if c.jx_type == NESTED:
|
||||
es_query.stored_fields = ["_source"]
|
||||
c_nested_path = c.nested_path[0]
|
||||
if c_nested_path == ".":
|
||||
if c.es_column == "_id":
|
||||
new_select.append({
|
||||
"name": select.name,
|
||||
"value": Variable(c.es_column),
|
||||
"put": {"name": select.name, "index": put_index, "child": relative_field(jx_name, s_column)},
|
||||
"put": {"name": select.name, "index": put_index, "child": "."},
|
||||
"pull": lambda row: row._id
|
||||
})
|
||||
elif c.jx_type == NESTED:
|
||||
get_select('.').use_source = True
|
||||
pre_child = join_field(decode_property(n) for n in split_field(c.name))
|
||||
new_select.append({
|
||||
"name": select.name,
|
||||
"value": Variable(c.es_column),
|
||||
"put": {"name": select.name, "index": put_index, "child": untype_path(relative_field(pre_child, s_column))},
|
||||
"pull": get_pull_source(c.es_column)
|
||||
})
|
||||
|
||||
else:
|
||||
es_query.stored_fields += [c.es_column]
|
||||
get_select(c_nested_path).fields.append(c.es_column)
|
||||
pre_child = join_field(decode_property(n) for n in split_field(c.name))
|
||||
new_select.append({
|
||||
"name": select.name,
|
||||
"value": Variable(c.es_column),
|
||||
"put": {"name": select.name, "index": put_index, "child": relative_field(jx_name, s_column)}
|
||||
"put": {"name": select.name, "index": put_index, "child": untype_path(relative_field(pre_child, s_column))}
|
||||
})
|
||||
else:
|
||||
if not nested_filter:
|
||||
where = filters[0].copy()
|
||||
nested_filter = [where]
|
||||
for k in filters[0].keys():
|
||||
filters[0][k] = None
|
||||
set_default(
|
||||
filters[0],
|
||||
es_and([where, es_or(nested_filter)])
|
||||
)
|
||||
es_select = get_select(c_nested_path)
|
||||
es_select.fields.append(c.es_column)
|
||||
|
||||
nested_path = c.nested_path[0]
|
||||
if nested_path not in nested_selects:
|
||||
where = nested_selects[nested_path] = Data()
|
||||
nested_filter += [where]
|
||||
where.nested.path = nested_path
|
||||
where.nested.query.match_all = {}
|
||||
where.nested.inner_hits._source = False
|
||||
where.nested.inner_hits.stored_fields += [c.es_column]
|
||||
|
||||
child = relative_field(untype_path(c.names[schema.query_path[0]]), s_column)
|
||||
pull = accumulate_nested_doc(nested_path, Variable(relative_field(s_column, unnest_path(nested_path))))
|
||||
new_select.append({
|
||||
child = relative_field(untype_path(relative_field(c.name, schema.query_path[0])), s_column)
|
||||
pull = accumulate_nested_doc(c_nested_path, Variable(relative_field(s_column, unnest_path(c_nested_path))))
|
||||
new_select.append({
|
||||
"name": select.name,
|
||||
"value": select.value,
|
||||
"put": {
|
||||
"name": select.name,
|
||||
"value": select.value,
|
||||
"put": {
|
||||
"name": select.name,
|
||||
"index": put_index,
|
||||
"child": child
|
||||
},
|
||||
"pull": pull
|
||||
})
|
||||
else:
|
||||
nested_selects[nested_path].nested.inner_hits.stored_fields += [c.es_column]
|
||||
"index": put_index,
|
||||
"child": child
|
||||
},
|
||||
"pull": pull
|
||||
})
|
||||
else:
|
||||
new_select.append({
|
||||
"name": select.name,
|
||||
|
@ -186,21 +172,22 @@ def es_setop(es, query):
|
|||
})
|
||||
put_index += 1
|
||||
else:
|
||||
painless = select.value.partial_eval().to_es_script(schema)
|
||||
es_query.script_fields[literal_field(select.name)] = es_script(painless.script(schema))
|
||||
new_select.append({
|
||||
"name": select.name,
|
||||
"pull": jx_expression_to_function("fields." + literal_field(select.name)),
|
||||
"put": {"name": select.name, "index": put_index, "child": "."}
|
||||
})
|
||||
put_index += 1
|
||||
split_scripts = split_expression_by_path(select.value, schema, lang=Painless)
|
||||
for p, script in split_scripts.items():
|
||||
es_select = get_select(p)
|
||||
es_select.scripts[select.name] = {"script": text_type(Painless[first(script)].partial_eval().to_es_script(schema))}
|
||||
new_select.append({
|
||||
"name": select.name,
|
||||
"pull": jx_expression_to_function("fields." + literal_field(select.name)),
|
||||
"put": {"name": select.name, "index": put_index, "child": "."}
|
||||
})
|
||||
put_index += 1
|
||||
|
||||
for n in new_select:
|
||||
if n.pull:
|
||||
continue
|
||||
elif isinstance(n.value, Variable):
|
||||
if es_query.stored_fields[0] == "_source":
|
||||
es_query.stored_fields = ["_source"]
|
||||
elif is_op(n.value, Variable):
|
||||
if get_select('.').use_source:
|
||||
n.pull = get_pull_source(n.value.var)
|
||||
elif n.value == "_id":
|
||||
n.pull = jx_expression_to_function("_id")
|
||||
|
@ -209,15 +196,22 @@ def es_setop(es, query):
|
|||
else:
|
||||
Log.error("Do not know what to do")
|
||||
|
||||
with Timer("call to ES") as call_timer:
|
||||
split_wheres = split_expression_by_path(query.where, schema, lang=ES52)
|
||||
es_query = es_query_proto(query_path, split_select, split_wheres, schema)
|
||||
es_query.size = coalesce(query.limit, DEFAULT_LIMIT)
|
||||
es_query.sort = jx_sort_to_es_sort(query.sort, schema)
|
||||
|
||||
with Timer("call to ES", silent=True) as call_timer:
|
||||
data = es_post(es, es_query, query.limit)
|
||||
|
||||
T = data.hits.hits
|
||||
|
||||
# Log.note("{{output}}", output=T)
|
||||
|
||||
try:
|
||||
formatter, groupby_formatter, mime_type = format_dispatch[query.format]
|
||||
|
||||
with Timer("formatter"):
|
||||
with Timer("formatter", silent=True):
|
||||
output = formatter(T, new_select, query)
|
||||
output.meta.timing.es = call_timer.duration
|
||||
output.meta.content_type = mime_type
|
||||
|
@ -252,14 +246,18 @@ def accumulate_nested_doc(nested_path, expr=IDENTITY):
|
|||
|
||||
def format_list(T, select, query=None):
|
||||
data = []
|
||||
if isinstance(query.select, list):
|
||||
if is_list(query.select):
|
||||
for row in T:
|
||||
r = Data()
|
||||
for s in select:
|
||||
v = s.pull(row)
|
||||
r[s.put.name][s.put.child] = unwraplist(v)
|
||||
v = unwraplist(s.pull(row))
|
||||
if v is not None:
|
||||
try:
|
||||
r[s.put.name][s.put.child] = v
|
||||
except Exception as e:
|
||||
Log.error("what's happening here?")
|
||||
data.append(r if r else None)
|
||||
elif isinstance(query.select.value, LeavesOp):
|
||||
elif is_op(query.select.value, LeavesOp):
|
||||
for row in T:
|
||||
r = Data()
|
||||
for s in select:
|
||||
|
@ -310,7 +308,7 @@ def format_table(T, select, query=None):
|
|||
|
||||
header = [None] * num_columns
|
||||
|
||||
if isinstance(query.select, Mapping) and not isinstance(query.select.value, LeavesOp):
|
||||
if is_data(query.select) and not is_op(query.select.value, LeavesOp):
|
||||
for s in select:
|
||||
header[s.put.index] = s.name
|
||||
else:
|
||||
|
@ -360,9 +358,8 @@ def get_pull(column):
|
|||
if column.nested_path[0] == ".":
|
||||
return concat_field("fields", literal_field(column.es_column))
|
||||
else:
|
||||
depth = len(split_field(column.nested_path[0]))
|
||||
rel_name = split_field(column.es_column)[depth:]
|
||||
return join_field(["_inner"] + rel_name)
|
||||
rel_name = relative_field(column.es_column, column.nested_path[0])
|
||||
return concat_field("_inner", rel_name)
|
||||
|
||||
|
||||
def get_pull_function(column):
|
||||
|
@ -377,14 +374,80 @@ def get_pull_source(es_column):
|
|||
|
||||
def get_pull_stats(stats_name, median_name):
|
||||
return jx_expression_to_function({"select": [
|
||||
{"name": "count", "value": stats_name + ".count"},
|
||||
{"name": "sum", "value": stats_name + ".sum"},
|
||||
{"name": "min", "value": stats_name + ".min"},
|
||||
{"name": "max", "value": stats_name + ".max"},
|
||||
{"name": "avg", "value": stats_name + ".avg"},
|
||||
{"name": "sos", "value": stats_name + ".sum_of_squares"},
|
||||
{"name": "std", "value": stats_name + ".std_deviation"},
|
||||
{"name": "var", "value": stats_name + ".variance"},
|
||||
{"name": "median", "value": median_name + ".values.50\\.0"}
|
||||
{"name": "count", "value": join_field([stats_name, "count"])},
|
||||
{"name": "sum", "value": join_field([stats_name, "sum"])},
|
||||
{"name": "min", "value": join_field([stats_name, "min"])},
|
||||
{"name": "max", "value": join_field([stats_name, "max"])},
|
||||
{"name": "avg", "value": join_field([stats_name, "avg"])},
|
||||
{"name": "sos", "value": join_field([stats_name, "sum_of_squares"])},
|
||||
{"name": "std", "value": join_field([stats_name, "std_deviation"])},
|
||||
{"name": "var", "value": join_field([stats_name, "variance"])},
|
||||
{"name": "median", "value": join_field([median_name, "values", "50.0"])}
|
||||
]})
|
||||
|
||||
|
||||
class ESSelect(object):
|
||||
"""
|
||||
ACCUMULATE THE FIELDS WE ARE INTERESTED IN
|
||||
"""
|
||||
|
||||
|
||||
def __init__(self, path):
|
||||
self.path = path
|
||||
self.use_source = False
|
||||
self.fields = []
|
||||
self.scripts = {}
|
||||
|
||||
def to_es(self):
|
||||
return {
|
||||
"_source": self.use_source,
|
||||
"stored_fields": self.fields if not self.use_source else None,
|
||||
"script_fields": self.scripts if self.scripts else None
|
||||
}
|
||||
|
||||
|
||||
def es_query_proto(path, selects, wheres, schema):
|
||||
"""
|
||||
RETURN TEMPLATE AND PATH-TO-FILTER AS A 2-TUPLE
|
||||
:param path: THE NESTED PATH (NOT INCLUDING TABLE NAME)
|
||||
:param wheres: MAP FROM path TO LIST OF WHERE CONDITIONS
|
||||
:return: (es_query, filters_map) TUPLE
|
||||
"""
|
||||
output = None
|
||||
last_where = MATCH_ALL
|
||||
for p in reversed(sorted( wheres.keys() | set(selects.keys()))):
|
||||
where = wheres.get(p)
|
||||
select = selects.get(p)
|
||||
|
||||
if where:
|
||||
where = AndOp(where).partial_eval().to_esfilter(schema)
|
||||
if output:
|
||||
where = es_or([es_and([output, where]), where])
|
||||
else:
|
||||
if output:
|
||||
if last_where is MATCH_ALL:
|
||||
where = es_or([output, MATCH_ALL])
|
||||
else:
|
||||
where = output
|
||||
else:
|
||||
where = MATCH_ALL
|
||||
|
||||
if p == ".":
|
||||
output = set_default(
|
||||
{
|
||||
"from": 0,
|
||||
"size": 0,
|
||||
"sort": [],
|
||||
"query": where
|
||||
},
|
||||
select.to_es()
|
||||
)
|
||||
else:
|
||||
output = {"nested": {
|
||||
"path": p,
|
||||
"inner_hits": set_default({"size": 100000}, select.to_es()) if select else None,
|
||||
"query": where
|
||||
}}
|
||||
|
||||
last_where = where
|
||||
return output
|
||||
|
|
|
@ -7,25 +7,25 @@
|
|||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from jx_elasticsearch.es52.expressions import Variable
|
||||
from jx_base.expressions import Variable
|
||||
from jx_base.language import is_op
|
||||
from mo_dots import wrap
|
||||
from mo_future import text_type
|
||||
from mo_json.typed_encoder import STRING, BOOLEAN, NUMBER, OBJECT
|
||||
from mo_future import is_text
|
||||
from mo_json import BOOLEAN, IS_NULL, NUMBER, OBJECT, STRING
|
||||
from mo_logs import Log
|
||||
from pyLibrary.convert import value2boolean
|
||||
|
||||
|
||||
def es_query_template(path):
|
||||
"""
|
||||
RETURN TEMPLATE AND PATH-TO-FILTER AS A 2-TUPLE
|
||||
:param path: THE NESTED PATH (NOT INCLUDING TABLE NAME)
|
||||
:return:
|
||||
:return: (es_query, es_filters) TUPLE
|
||||
"""
|
||||
|
||||
if not isinstance(path, text_type):
|
||||
if not is_text(path):
|
||||
Log.error("expecting path to be a string")
|
||||
|
||||
if path != ".":
|
||||
|
@ -62,7 +62,7 @@ def jx_sort_to_es_sort(sort, schema):
|
|||
|
||||
output = []
|
||||
for s in sort:
|
||||
if isinstance(s.value, Variable):
|
||||
if is_op(s.value, Variable):
|
||||
cols = schema.leaves(s.value.var)
|
||||
if s.sort == -1:
|
||||
types = OBJECT, STRING, NUMBER, BOOLEAN
|
||||
|
@ -71,7 +71,7 @@ def jx_sort_to_es_sort(sort, schema):
|
|||
|
||||
for type in types:
|
||||
for c in cols:
|
||||
if c.jx_type == type:
|
||||
if c.jx_type is type:
|
||||
if s.sort == -1:
|
||||
output.append({c.es_column: "desc"})
|
||||
else:
|
||||
|
@ -91,6 +91,7 @@ aggregates = {
|
|||
"sum": "sum",
|
||||
"add": "sum",
|
||||
"count": "value_count",
|
||||
"count_values": "count_values",
|
||||
"maximum": "max",
|
||||
"minimum": "min",
|
||||
"max": "max",
|
||||
|
@ -114,7 +115,6 @@ aggregates = {
|
|||
|
||||
NON_STATISTICAL_AGGS = {"none", "one"}
|
||||
|
||||
|
||||
def es_and(terms):
|
||||
return wrap({"bool": {"filter": terms}})
|
||||
|
||||
|
@ -128,8 +128,24 @@ def es_not(term):
|
|||
|
||||
|
||||
def es_script(term):
|
||||
return wrap({"script": {"lang": "painless", "inline": term}})
|
||||
return wrap({"script": {"lang": "painless", "source": term}})
|
||||
|
||||
|
||||
def es_missing(term):
|
||||
return {"bool": {"must_not": {"exists": {"field": term}}}}
|
||||
|
||||
|
||||
def es_exists(term):
|
||||
return {"exists": {"field": term}}
|
||||
|
||||
|
||||
MATCH_ALL = wrap({"match_all": {}})
|
||||
MATCH_NONE = es_not({"match_all": {}})
|
||||
|
||||
|
||||
pull_functions = {
|
||||
IS_NULL: lambda x: None,
|
||||
STRING: lambda x: x,
|
||||
NUMBER: lambda x: float(x) if x !=None else None,
|
||||
BOOLEAN: value2boolean
|
||||
}
|
||||
|
|
|
@ -7,12 +7,12 @@
|
|||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from mo_future import is_text, is_binary
|
||||
from datetime import date, datetime
|
||||
from decimal import Decimal
|
||||
import itertools
|
||||
from itertools import product
|
||||
|
||||
import jx_base
|
||||
from jx_base import TableDesc
|
||||
|
@ -20,19 +20,20 @@ from jx_base.namespace import Namespace
|
|||
from jx_base.query import QueryOp
|
||||
from jx_python import jx
|
||||
from jx_python.containers.list_usingPythonList import ListContainer
|
||||
from jx_python.meta import ColumnList, Column
|
||||
from mo_collections.relation import Relation_usingList
|
||||
from mo_dots import Data, relative_field, SELF_PATH, ROOT_PATH, coalesce, set_default, Null, split_field, join_field, wrap, concat_field, startswith_field, literal_field
|
||||
from mo_json.typed_encoder import EXISTS_TYPE, untype_path, unnest_path, OBJECT, EXISTS, STRUCT, BOOLEAN
|
||||
from jx_python.meta import Column, ColumnList
|
||||
from mo_dots import Data, FlatList, Null, NullType, ROOT_PATH, coalesce, concat_field, is_list, literal_field, relative_field, set_default, split_field, startswith_field, tail_field, wrap
|
||||
from mo_files import URL
|
||||
from mo_future import PY2, none_type, text_type
|
||||
from mo_json import BOOLEAN, EXISTS, INTEGER, OBJECT, STRING, STRUCT
|
||||
from mo_json.typed_encoder import BOOLEAN_TYPE, EXISTS_TYPE, NUMBER_TYPE, STRING_TYPE, unnest_path, untype_path
|
||||
from mo_kwargs import override
|
||||
from mo_logs import Log
|
||||
from mo_logs.exceptions import Except
|
||||
from mo_logs.strings import quote
|
||||
from mo_math import MAX
|
||||
from mo_threads import Queue, THREAD_STOP, Thread, Till
|
||||
from mo_times import HOUR, MINUTE, Timer, Date
|
||||
from mo_times import Date, HOUR, MINUTE, Timer, WEEK
|
||||
from pyLibrary.env import elasticsearch
|
||||
from pyLibrary.env.elasticsearch import es_type_to_json_type, _get_best_type_from_mapping
|
||||
from pyLibrary.env.elasticsearch import _get_best_type_from_mapping, es_type_to_json_type
|
||||
|
||||
MAX_COLUMN_METADATA_AGE = 12 * HOUR
|
||||
ENABLE_META_SCAN = True
|
||||
|
@ -72,17 +73,17 @@ class ElasticsearchMetadata(Namespace):
|
|||
self.index_does_not_exist = set()
|
||||
self.todo = Queue("refresh metadata", max=100000, unique=True)
|
||||
|
||||
self.index_to_alias = Relation_usingList()
|
||||
self.index_to_alias = {}
|
||||
|
||||
self.es_metadata = Null
|
||||
self.metadata_last_updated = Date.now() - OLD_METADATA
|
||||
|
||||
self.meta = Data()
|
||||
self.meta.columns = ColumnList()
|
||||
self.meta.columns = ColumnList(URL(self.es_cluster.settings.host).host)
|
||||
|
||||
self.alias_to_query_paths = {
|
||||
"meta.columns": [['.']],
|
||||
"meta.tables": [['.']]
|
||||
"meta.columns": [ROOT_PATH],
|
||||
"meta.tables": [ROOT_PATH]
|
||||
}
|
||||
self.alias_last_updated = {
|
||||
"meta.columns": Date.now(),
|
||||
|
@ -91,10 +92,7 @@ class ElasticsearchMetadata(Namespace):
|
|||
table_columns = metadata_tables()
|
||||
self.meta.tables = ListContainer(
|
||||
"meta.tables",
|
||||
[
|
||||
# TableDesc("meta.columns", None, ".", Date.now()),
|
||||
# TableDesc("meta.tables", None, ".", Date.now())
|
||||
],
|
||||
[],
|
||||
jx_base.Schema(".", table_columns)
|
||||
)
|
||||
self.meta.columns.extend(table_columns)
|
||||
|
@ -102,9 +100,12 @@ class ElasticsearchMetadata(Namespace):
|
|||
if ENABLE_META_SCAN:
|
||||
self.worker = Thread.run("refresh metadata", self.monitor)
|
||||
else:
|
||||
self.worker = Thread.run("refresh metadata", self.not_monitor)
|
||||
self.worker = Thread.run("not refresh metadata", self.not_monitor)
|
||||
return
|
||||
|
||||
|
||||
|
||||
|
||||
@property
|
||||
def namespace(self):
|
||||
return self.meta.columns.namespace
|
||||
|
@ -123,14 +124,13 @@ class ElasticsearchMetadata(Namespace):
|
|||
|
||||
alias = table_desc.name
|
||||
canonical_index = self.es_cluster.get_best_matching_index(alias).index
|
||||
update_required = not (table_desc.timestamp < es_last_updated)
|
||||
metadata = self.es_cluster.get_metadata(force=update_required)
|
||||
es_metadata_update_required = not (table_desc.timestamp < es_last_updated)
|
||||
metadata = self.es_cluster.get_metadata(force=es_metadata_update_required)
|
||||
|
||||
indexes = self.index_to_alias.get_domain(alias)
|
||||
props = [
|
||||
(self.es_cluster.get_index(index=i, type=t, debug=DEBUG), t, m.properties)
|
||||
for i, d in metadata.indices.items()
|
||||
if i in indexes
|
||||
if alias in d.aliases
|
||||
for t, m in [_get_best_type_from_mapping(d.mappings)]
|
||||
]
|
||||
|
||||
|
@ -148,16 +148,19 @@ class ElasticsearchMetadata(Namespace):
|
|||
|
||||
data_type, mapping = _get_best_type_from_mapping(meta.mappings)
|
||||
mapping.properties["_id"] = {"type": "string", "index": "not_analyzed"}
|
||||
self._parse_properties(alias, mapping, meta)
|
||||
columns = self._parse_properties(alias, mapping)
|
||||
table_desc.timestamp = es_last_updated
|
||||
return columns
|
||||
|
||||
def _parse_properties(self, alias, mapping, meta):
|
||||
abs_columns = elasticsearch.parse_properties(alias, None, mapping.properties)
|
||||
if any(c.cardinality == 0 and c.names['.'] != '_id' for c in abs_columns):
|
||||
def _parse_properties(self, alias, mapping):
|
||||
abs_columns = elasticsearch.parse_properties(alias, ".", ROOT_PATH, mapping.properties)
|
||||
if DEBUG and any(c.cardinality == 0 and c.name != '_id' for c in abs_columns):
|
||||
Log.warning(
|
||||
"Some columns are not stored {{names}}",
|
||||
"Some columns are not stored in {{url}} {{index|quote}} table:\n{{names}}",
|
||||
url=self.es_cluster.url,
|
||||
index=alias,
|
||||
names=[
|
||||
".".join((c.es_index, c.names['.']))
|
||||
".".join((c.es_index, c.name))
|
||||
for c in abs_columns
|
||||
if c.cardinality == 0
|
||||
]
|
||||
|
@ -178,20 +181,41 @@ class ElasticsearchMetadata(Namespace):
|
|||
b.insert(i, aa)
|
||||
break
|
||||
for q in query_paths:
|
||||
q.append(SELF_PATH)
|
||||
q.append(".")
|
||||
query_paths.append(ROOT_PATH)
|
||||
self.alias_to_query_paths[alias] = query_paths
|
||||
for i in self.index_to_alias.get_domain(alias):
|
||||
self.alias_to_query_paths[i] = query_paths
|
||||
|
||||
# ADD RELATIVE NAMES
|
||||
# ENSURE ALL TABLES HAVE THE QUERY PATHS SET
|
||||
self.alias_to_query_paths[alias] = query_paths
|
||||
for i, a in self.index_to_alias.items():
|
||||
if a == alias:
|
||||
self.alias_to_query_paths[i] = query_paths
|
||||
|
||||
# ENSURE COLUMN HAS CORRECT jx_type
|
||||
# PICK DEEPEST NESTED PROPERTY AS REPRESENTATIVE
|
||||
output = []
|
||||
best = {}
|
||||
for abs_column in abs_columns:
|
||||
abs_column.last_updated = None
|
||||
abs_column.jx_type = jx_type(abs_column)
|
||||
for query_path in query_paths:
|
||||
abs_column.names[query_path[0]] = relative_field(abs_column.names["."], query_path[0])
|
||||
self.todo.add(self.meta.columns.add(abs_column))
|
||||
pass
|
||||
if abs_column.jx_type not in STRUCT:
|
||||
clean_name = unnest_path(abs_column.name)
|
||||
other = best.get(clean_name)
|
||||
if other:
|
||||
if len(other.nested_path) < len(abs_column.nested_path):
|
||||
output.remove(other)
|
||||
self.meta.columns.update({"clear": ".", "where": {"eq": {"es_column": other.es_column, "es_index": other.es_index}}})
|
||||
else:
|
||||
continue
|
||||
best[clean_name] = abs_column
|
||||
output.append(abs_column)
|
||||
|
||||
# REGISTER ALL COLUMNS
|
||||
canonicals = []
|
||||
for abs_column in output:
|
||||
canonical = self.meta.columns.add(abs_column)
|
||||
canonicals.append(canonical)
|
||||
|
||||
self.todo.extend(canonicals)
|
||||
return canonicals
|
||||
|
||||
def query(self, _query):
|
||||
return self.meta.columns.query(QueryOp(set_default(
|
||||
|
@ -210,12 +234,19 @@ class ElasticsearchMetadata(Namespace):
|
|||
if name in self.alias_last_updated:
|
||||
return name
|
||||
else:
|
||||
return self.index_to_alias[name]
|
||||
return self.index_to_alias.get(name)
|
||||
|
||||
def get_columns(self, table_name, column_name=None, force=False):
|
||||
def get_columns(self, table_name, column_name=None, after=None, timeout=None):
|
||||
"""
|
||||
RETURN METADATA COLUMNS
|
||||
|
||||
:param table_name: TABLE WE WANT COLUMNS FOR
|
||||
:param column_name: OPTIONAL NAME, IF INTERESTED IN ONLY ONE COLUMN
|
||||
:param after: FORCE LOAD, WAITING FOR last_updated TO BE AFTER THIS TIME
|
||||
:param timeout: Signal; True when should give up
|
||||
:return:
|
||||
"""
|
||||
DEBUG and after and Log.note("getting columns for after {{time}}", time=after)
|
||||
table_path = split_field(table_name)
|
||||
root_table_name = table_path[0]
|
||||
|
||||
|
@ -227,39 +258,49 @@ class ElasticsearchMetadata(Namespace):
|
|||
Log.error("{{table|quote}} does not exist", table=table_name)
|
||||
|
||||
try:
|
||||
last_update = MAX([
|
||||
self.es_cluster.index_last_updated[i]
|
||||
for i in self.index_to_alias.get_domain(alias)
|
||||
])
|
||||
|
||||
table = self.get_table(alias)[0]
|
||||
# LAST TIME WE GOT INFO FOR THIS TABLE
|
||||
if not table:
|
||||
table = TableDesc(
|
||||
name=alias,
|
||||
url=None,
|
||||
query_path=['.'],
|
||||
query_path=["."],
|
||||
timestamp=Date.MIN
|
||||
)
|
||||
with self.meta.tables.locker:
|
||||
self.meta.tables.add(table)
|
||||
self._reload_columns(table)
|
||||
elif force or table.timestamp < last_update:
|
||||
self._reload_columns(table)
|
||||
columns = self._reload_columns(table)
|
||||
DEBUG and Log.note("columns from reload")
|
||||
elif after or table.timestamp < self.es_cluster.metatdata_last_updated:
|
||||
columns = self._reload_columns(table)
|
||||
DEBUG and Log.note("columns from reload")
|
||||
else:
|
||||
columns = self.meta.columns.find(alias, column_name)
|
||||
DEBUG and Log.note("columns from find()")
|
||||
|
||||
columns = self.meta.columns.find(alias, column_name)
|
||||
columns = jx.sort(columns, "names.\\.")
|
||||
# AT LEAST WAIT FOR THE COLUMNS TO UPDATE
|
||||
while len(self.todo) and not all(columns.get("last_updated")):
|
||||
DEBUG and Log.note("columns are {{ids}}", ids=[id(c) for c in columns])
|
||||
|
||||
columns = jx.sort(columns, "name")
|
||||
|
||||
if after is None:
|
||||
return columns # DO NOT WAIT FOR COMPLETE COLUMNS
|
||||
|
||||
# WAIT FOR THE COLUMNS TO UPDATE
|
||||
while True:
|
||||
pending = [c for c in columns if after >= c.last_updated or (c.cardinality == None and c.jx_type not in STRUCT)]
|
||||
if not pending:
|
||||
break
|
||||
if timeout:
|
||||
Log.error("trying to gets columns timed out")
|
||||
if DEBUG:
|
||||
if len(columns) > 10:
|
||||
Log.note("waiting for {{num}} columns to update", num=len([c for c in columns if not c.last_updated]))
|
||||
if len(pending) > 10:
|
||||
Log.note("waiting for {{num}} columns to update by {{timestamp}}", num=len(pending), timestamp=after)
|
||||
else:
|
||||
Log.note("waiting for columns to update {{columns|json}}", columns=[c.es_index+"."+c.es_column for c in columns if not c.last_updated])
|
||||
Log.note("waiting for columns to update by {{timestamp}}; {{columns|json}}", timestamp=after, columns=[c.es_index + "." + c.es_column + " id="+text_type(id(c)) for c in pending])
|
||||
Till(seconds=1).wait()
|
||||
return columns
|
||||
except Exception as e:
|
||||
Log.error("Not expected", cause=e)
|
||||
Log.error("Failure to get columns for {{table}}", table=table_name, cause=e)
|
||||
|
||||
return []
|
||||
|
||||
|
@ -267,6 +308,7 @@ class ElasticsearchMetadata(Namespace):
|
|||
"""
|
||||
QUERY ES TO FIND CARDINALITY AND PARTITIONS FOR A SIMPLE COLUMN
|
||||
"""
|
||||
now = Date.now()
|
||||
if column.es_index in self.index_does_not_exist:
|
||||
return
|
||||
|
||||
|
@ -281,7 +323,7 @@ class ElasticsearchMetadata(Namespace):
|
|||
"count": len(self.meta.columns),
|
||||
"cardinality": len(partitions),
|
||||
"multi": 1,
|
||||
"last_updated": Date.now()
|
||||
"last_updated": now
|
||||
},
|
||||
"where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}}
|
||||
})
|
||||
|
@ -294,7 +336,7 @@ class ElasticsearchMetadata(Namespace):
|
|||
"count": len(self.meta.tables),
|
||||
"cardinality": len(partitions),
|
||||
"multi": 1,
|
||||
"last_updated": Date.now()
|
||||
"last_updated": now
|
||||
},
|
||||
"where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}}
|
||||
})
|
||||
|
@ -330,21 +372,42 @@ class ElasticsearchMetadata(Namespace):
|
|||
})
|
||||
count = result.hits.total
|
||||
cardinality = 2
|
||||
multi = 1
|
||||
|
||||
DEBUG and Log.note("{{table}}.{{field}} has {{num}} parts", table=column.es_index, field=column.es_column, num=cardinality)
|
||||
self.meta.columns.update({
|
||||
"set": {
|
||||
"count": count,
|
||||
"cardinality": cardinality,
|
||||
"partitions": [False, True],
|
||||
"multi": 1,
|
||||
"last_updated": now
|
||||
},
|
||||
"clear": ["partitions"],
|
||||
"where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}}
|
||||
})
|
||||
return
|
||||
else:
|
||||
result = self.es_cluster.post("/" + es_index + "/_search", data={
|
||||
es_query = {
|
||||
"aggs": {
|
||||
"count": _counting_query(column),
|
||||
"multi": {"max": {"script": "doc[" + quote(column.es_column) + "].values.size()"}}
|
||||
"_filter": {
|
||||
"aggs": {"multi": {"max": {"script": "doc[" + quote(column.es_column) + "].values.size()"}}},
|
||||
"filter": {"bool": {"should": [
|
||||
{"range": {"etl.timestamp.~n~": {"gte": (Date.today() - WEEK)}}},
|
||||
{"bool": {"must_not": {"exists": {"field": "etl.timestamp.~n~"}}}}
|
||||
]}}
|
||||
}
|
||||
},
|
||||
"size": 0
|
||||
})
|
||||
}
|
||||
|
||||
result = self.es_cluster.post("/" + es_index + "/_search", data=es_query)
|
||||
agg_results = result.aggregations
|
||||
count = result.hits.total
|
||||
cardinality = coalesce(agg_results.count.value, agg_results.count._nested.value, agg_results.count.doc_count)
|
||||
multi = int(coalesce(agg_results.multi.value, 1))
|
||||
multi = int(coalesce(agg_results._filter.multi.value, 1))
|
||||
if cardinality == None:
|
||||
Log.error("logic error")
|
||||
Log.error("logic error")
|
||||
|
||||
query = Data(size=0)
|
||||
|
||||
|
@ -354,7 +417,7 @@ class ElasticsearchMetadata(Namespace):
|
|||
"count": cardinality,
|
||||
"cardinality": cardinality,
|
||||
"multi": 1,
|
||||
"last_updated": Date.now()
|
||||
"last_updated": now
|
||||
},
|
||||
"clear": ["partitions"],
|
||||
"where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}}
|
||||
|
@ -367,7 +430,7 @@ class ElasticsearchMetadata(Namespace):
|
|||
"count": count,
|
||||
"cardinality": cardinality,
|
||||
"multi": multi,
|
||||
"last_updated": Date.now()
|
||||
"last_updated": now
|
||||
},
|
||||
"clear": ["partitions"],
|
||||
"where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}}
|
||||
|
@ -380,7 +443,7 @@ class ElasticsearchMetadata(Namespace):
|
|||
"count": count,
|
||||
"cardinality": cardinality,
|
||||
"multi": multi,
|
||||
"last_updated": Date.now()
|
||||
"last_updated": now
|
||||
},
|
||||
"clear": ["partitions"],
|
||||
"where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}}
|
||||
|
@ -391,7 +454,7 @@ class ElasticsearchMetadata(Namespace):
|
|||
"nested": {"path": column.nested_path[0]},
|
||||
"aggs": {"_nested": {"terms": {"field": column.es_column}}}
|
||||
}
|
||||
elif cardinality == 0:
|
||||
elif cardinality == 0: # WHEN DOES THIS HAPPEN?
|
||||
query.aggs["_"] = {"terms": {"field": column.es_column}}
|
||||
else:
|
||||
query.aggs["_"] = {"terms": {"field": column.es_column, "size": cardinality}}
|
||||
|
@ -404,13 +467,14 @@ class ElasticsearchMetadata(Namespace):
|
|||
else:
|
||||
parts = jx.sort(aggs.buckets.key)
|
||||
|
||||
DEBUG and Log.note("update metadata for {{column.es_index}}.{{column.es_column}} (id={{id}}) at {{time}}", id=id(column), column=column, time=now)
|
||||
self.meta.columns.update({
|
||||
"set": {
|
||||
"count": count,
|
||||
"cardinality": cardinality,
|
||||
"multi": multi,
|
||||
"partitions": parts,
|
||||
"last_updated": Date.now()
|
||||
"last_updated": now
|
||||
},
|
||||
"where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}}
|
||||
})
|
||||
|
@ -421,17 +485,24 @@ class ElasticsearchMetadata(Namespace):
|
|||
TEST_TABLE = "testdata"
|
||||
is_missing_index = any(w in e for w in ["IndexMissingException", "index_not_found_exception"])
|
||||
is_test_table = column.es_index.startswith((TEST_TABLE_PREFIX, TEST_TABLE))
|
||||
if is_missing_index and is_test_table:
|
||||
if is_missing_index:
|
||||
# WE EXPECT TEST TABLES TO DISAPPEAR
|
||||
Log.warning("Missing index {{col.es_index}}", col=column, cause=e)
|
||||
self.meta.columns.update({
|
||||
"clear": ".",
|
||||
"where": {"eq": {"es_index": column.es_index}}
|
||||
})
|
||||
self.index_does_not_exist.add(column.es_index)
|
||||
elif "No field found for" in e:
|
||||
self.meta.columns.update({
|
||||
"clear": ".",
|
||||
"where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}}
|
||||
})
|
||||
Log.warning("Could not get column {{col.es_index}}.{{col.es_column}} info", col=column, cause=e)
|
||||
else:
|
||||
self.meta.columns.update({
|
||||
"set": {
|
||||
"last_updated": Date.now()
|
||||
"last_updated": now
|
||||
},
|
||||
"clear": [
|
||||
"count",
|
||||
|
@ -451,7 +522,7 @@ class ElasticsearchMetadata(Namespace):
|
|||
old_columns = [
|
||||
c
|
||||
for c in self.meta.columns
|
||||
if (c.last_updated == None or c.last_updated < Date.now()-TOO_OLD) and c.jx_type not in STRUCT
|
||||
if ((c.last_updated < Date.now() - MAX_COLUMN_METADATA_AGE) or c.cardinality == None) and c.jx_type not in STRUCT
|
||||
]
|
||||
if old_columns:
|
||||
DEBUG and Log.note(
|
||||
|
@ -460,10 +531,6 @@ class ElasticsearchMetadata(Namespace):
|
|||
dates=[Date(t).format() for t in wrap(old_columns).last_updated]
|
||||
)
|
||||
self.todo.extend(old_columns)
|
||||
# TEST CONSISTENCY
|
||||
for c, d in product(list(self.todo.queue), list(self.todo.queue)):
|
||||
if c.es_column == d.es_column and c.es_index == d.es_index and c != d:
|
||||
Log.error("")
|
||||
else:
|
||||
DEBUG and Log.note("no more metatdata to update")
|
||||
|
||||
|
@ -474,15 +541,19 @@ class ElasticsearchMetadata(Namespace):
|
|||
|
||||
with Timer("update {{table}}.{{column}}", param={"table": column.es_index, "column": column.es_column}, silent=not DEBUG):
|
||||
if column.es_index in self.index_does_not_exist:
|
||||
DEBUG and Log.note("{{column.es_column}} does not exist", column=column)
|
||||
self.meta.columns.update({
|
||||
"clear": ".",
|
||||
"where": {"eq": {"es_index": column.es_index}}
|
||||
})
|
||||
continue
|
||||
if column.jx_type in STRUCT or column.es_column.endswith("." + EXISTS_TYPE):
|
||||
if column.jx_type in STRUCT or split_field(column.es_column)[-1] == EXISTS_TYPE:
|
||||
DEBUG and Log.note("{{column.es_column}} is a struct", column=column)
|
||||
column.last_updated = Date.now()
|
||||
continue
|
||||
elif column.last_updated >= Date.now()-TOO_OLD:
|
||||
elif column.last_updated > Date.now() - TOO_OLD and column.cardinality is not None:
|
||||
# DO NOT UPDATE FRESH COLUMN METADATA
|
||||
DEBUG and Log.note("{{column.es_column}} is still fresh ({{ago}} ago)", column=column, ago=(Date.now()-Date(column.last_updated)).seconds)
|
||||
continue
|
||||
try:
|
||||
self._update_cardinality(column)
|
||||
|
@ -502,32 +573,33 @@ class ElasticsearchMetadata(Namespace):
|
|||
Log.alert("metadata scan has been disabled")
|
||||
please_stop.on_go(lambda: self.todo.add(THREAD_STOP))
|
||||
while not please_stop:
|
||||
c = self.todo.pop()
|
||||
if c == THREAD_STOP:
|
||||
column = self.todo.pop()
|
||||
if column == THREAD_STOP:
|
||||
break
|
||||
|
||||
if c.last_updated >= Date.now()-TOO_OLD:
|
||||
if column.jx_type in STRUCT or split_field(column.es_column)[-1] == EXISTS_TYPE:
|
||||
DEBUG and Log.note("{{column.es_column}} is a struct", column=column)
|
||||
column.last_updated = Date.now()
|
||||
continue
|
||||
elif column.last_updated > Date.now() - TOO_OLD and column.cardinality is not None:
|
||||
# DO NOT UPDATE FRESH COLUMN METADATA
|
||||
DEBUG and Log.note("{{column.es_column}} is still fresh ({{ago}} ago)", column=column, ago=(Date.now()-Date(column.last_updated)).seconds)
|
||||
continue
|
||||
|
||||
with Timer("Update {{col.es_index}}.{{col.es_column}}", param={"col": c}, silent=not DEBUG, too_long=0.05):
|
||||
self.meta.columns.update({
|
||||
"set": {
|
||||
"last_updated": Date.now()
|
||||
},
|
||||
"clear": [
|
||||
"count",
|
||||
"cardinality",
|
||||
"multi",
|
||||
"partitions",
|
||||
],
|
||||
"where": {"eq": {"es_index": c.es_index, "es_column": c.es_column}}
|
||||
})
|
||||
with Timer("Update {{col.es_index}}.{{col.es_column}}", param={"col": column}, silent=not DEBUG, too_long=0.05):
|
||||
if untype_path(column.name) in ["build.type", "run.type"]:
|
||||
try:
|
||||
self._update_cardinality(column)
|
||||
except Exception as e:
|
||||
Log.warning("problem getting cardinality for {{column.name}}", column=column, cause=e)
|
||||
else:
|
||||
column.last_updated = Date.now()
|
||||
|
||||
|
||||
def get_table(self, name):
|
||||
if name == "meta.columns":
|
||||
return self.meta.columns
|
||||
|
||||
# return self.meta.columns
|
||||
with self.meta.tables.locker:
|
||||
return wrap([t for t in self.meta.tables.data if t.name == name])
|
||||
|
||||
|
@ -537,8 +609,9 @@ class ElasticsearchMetadata(Namespace):
|
|||
def get_schema(self, name):
|
||||
if name == "meta.columns":
|
||||
return self.meta.columns.schema
|
||||
query_path = split_field(name)
|
||||
root, rest = query_path[0], join_field(query_path[1:])
|
||||
if name == "meta.tables":
|
||||
return self.meta.tables
|
||||
root, rest = tail_field(name)
|
||||
return self.get_snowflake(root).get_schema(rest)
|
||||
|
||||
|
||||
|
@ -564,6 +637,13 @@ class Snowflake(object):
|
|||
return output
|
||||
Log.error("Can not find index {{index|quote}}", index=self.name)
|
||||
|
||||
@property
|
||||
def sorted_query_paths(self):
|
||||
"""
|
||||
RETURN A LIST OF ALL SCHEMA'S IN DEPTH-FIRST TOPOLOGICAL ORDER
|
||||
"""
|
||||
return list(reversed(sorted(p[0] for p in self.namespace.alias_to_query_paths.get(self.name))))
|
||||
|
||||
@property
|
||||
def columns(self):
|
||||
"""
|
||||
|
@ -578,15 +658,38 @@ class Schema(jx_base.Schema):
|
|||
"""
|
||||
|
||||
def __init__(self, query_path, snowflake):
|
||||
if not isinstance(snowflake.query_paths[0], list):
|
||||
if not is_list(snowflake.query_paths[0]):
|
||||
Log.error("Snowflake query paths should be a list of string tuples (well, technically, a list of lists of strings)")
|
||||
self.snowflake = snowflake
|
||||
try:
|
||||
self.query_path = [
|
||||
path = [
|
||||
p
|
||||
for p in snowflake.query_paths
|
||||
if untype_path(p[0]) == query_path
|
||||
][0]
|
||||
self.snowflake = snowflake
|
||||
]
|
||||
if path:
|
||||
# WE DO NOT NEED TO LOOK INTO MULTI-VALUED FIELDS AS A TABLE
|
||||
self.multi = None
|
||||
self.query_path = path[0]
|
||||
else:
|
||||
# LOOK INTO A SPECIFIC MULTI VALUED COLUMN
|
||||
try:
|
||||
self.multi = [
|
||||
c
|
||||
for c in self.snowflake.columns
|
||||
if untype_path(c.name) == query_path and c.multi > 1
|
||||
][0]
|
||||
self.query_path = [self.multi.name] + self.multi.nested_path
|
||||
except Exception as e:
|
||||
# PROBLEM WITH METADATA UPDATE
|
||||
self.multi = None
|
||||
self.query_path = [query_path] + ["."]
|
||||
|
||||
Log.warning("Problem getting query path {{path|quote}} in snowflake {{sf|quote}}", path=query_path, sf=snowflake.name, cause=e)
|
||||
|
||||
if not is_list(self.query_path) or self.query_path[len(self.query_path) - 1] != ".":
|
||||
Log.error("error")
|
||||
|
||||
except Exception as e:
|
||||
Log.error("logic error", cause=e)
|
||||
|
||||
|
@ -595,43 +698,102 @@ class Schema(jx_base.Schema):
|
|||
:param column_name:
|
||||
:return: ALL COLUMNS THAT START WITH column_name, NOT INCLUDING DEEPER NESTED COLUMNS
|
||||
"""
|
||||
column_name = unnest_path(column_name)
|
||||
clean_name = unnest_path(column_name)
|
||||
|
||||
if clean_name != column_name:
|
||||
clean_name = column_name
|
||||
cleaner = lambda x: x
|
||||
else:
|
||||
cleaner = unnest_path
|
||||
|
||||
|
||||
columns = self.columns
|
||||
deep_path = self.query_path[0]
|
||||
for path in self.query_path:
|
||||
# TODO: '.' IMPLIES ALL FIELDS FROM ABSOLUTE PERPECTIVE, ALL OTHERS ARE A RELATIVE PERSPECTIVE
|
||||
# TODO: HOW TO REFER TO FIELDS THAT MAY BE SHADOWED BY A RELATIVE NAME?
|
||||
for path in reversed(self.query_path) if clean_name == '.' else self.query_path:
|
||||
output = [
|
||||
c
|
||||
for c in columns
|
||||
if (
|
||||
(c.names['.'] != "_id" or column_name == "_id") and
|
||||
c.jx_type not in OBJECTS and
|
||||
startswith_field(unnest_path(c.names[path]), column_name)
|
||||
(c.name != "_id" or clean_name == "_id") and
|
||||
(
|
||||
(c.jx_type == EXISTS and column_name.endswith("." + EXISTS_TYPE)) or
|
||||
c.jx_type not in OBJECTS or
|
||||
(clean_name == '.' and c.cardinality == 0)
|
||||
) and
|
||||
startswith_field(cleaner(relative_field(c.name, path)), clean_name)
|
||||
)
|
||||
]
|
||||
if output:
|
||||
return set(output)
|
||||
return set()
|
||||
|
||||
def new_leaves(self, column_name):
|
||||
"""
|
||||
:param column_name:
|
||||
:return: ALL COLUMNS THAT START WITH column_name, INCLUDING DEEP COLUMNS
|
||||
"""
|
||||
column_name = unnest_path(column_name)
|
||||
columns = self.columns
|
||||
all_paths = self.snowflake.sorted_query_paths
|
||||
|
||||
output = {}
|
||||
for c in columns:
|
||||
if c.name == "_id" and column_name != "_id":
|
||||
continue
|
||||
if c.jx_type in OBJECTS:
|
||||
continue
|
||||
if c.cardinality == 0:
|
||||
continue
|
||||
for path in all_paths:
|
||||
if not startswith_field(unnest_path(relative_field(c.name, path)), column_name):
|
||||
continue
|
||||
existing = output.get(path)
|
||||
if not existing:
|
||||
output[path] = [c]
|
||||
continue
|
||||
if len(path) > len(c.nested_path[0]):
|
||||
continue
|
||||
if any("." + t + "." in c.es_column for t in (STRING_TYPE, NUMBER_TYPE, BOOLEAN_TYPE)):
|
||||
# ELASTICSEARCH field TYPES ARE NOT ALLOWED
|
||||
continue
|
||||
# ONLY THE DEEPEST COLUMN WILL BE CHOSEN
|
||||
output[path].append(c)
|
||||
return set(output.values())
|
||||
|
||||
def both_leaves(self, column_name):
|
||||
old = self.old_leaves(column_name)
|
||||
new = self.new_leaves(column_name)
|
||||
|
||||
if old != new:
|
||||
Log.error(
|
||||
"not the same: {{old}}, {{new}}",
|
||||
old=[c.name for c in old],
|
||||
new=[c.name for c in new]
|
||||
)
|
||||
|
||||
return new
|
||||
|
||||
def values(self, column_name, exclude_type=STRUCT):
|
||||
"""
|
||||
RETURN ALL COLUMNS THAT column_name REFERS TO
|
||||
"""
|
||||
column_name = unnest_path(column_name)
|
||||
columns = self.columns
|
||||
output = []
|
||||
for path in self.query_path:
|
||||
full_path = untype_path(concat_field(path, column_name))
|
||||
for c in columns:
|
||||
if c.jx_type in exclude_type:
|
||||
continue
|
||||
# if c.cardinality == 0:
|
||||
# continue
|
||||
if untype_path(c.name) == full_path:
|
||||
output.append(c)
|
||||
if output:
|
||||
return output
|
||||
return []
|
||||
|
||||
def values(self, column_name):
|
||||
"""
|
||||
RETURN ALL COLUMNS THAT column_name REFERES TO
|
||||
"""
|
||||
column_name = unnest_path(column_name)
|
||||
columns = self.columns
|
||||
deep_path = self.query_path[0]
|
||||
for path in self.query_path:
|
||||
output = [
|
||||
c
|
||||
for c in columns
|
||||
if (
|
||||
c.jx_type not in STRUCT and
|
||||
untype_path(c.names[path]) == column_name
|
||||
)
|
||||
]
|
||||
if output:
|
||||
return output
|
||||
return output
|
||||
|
||||
def __getitem__(self, column_name):
|
||||
return self.values(column_name)
|
||||
|
||||
|
@ -641,7 +803,7 @@ class Schema(jx_base.Schema):
|
|||
|
||||
@property
|
||||
def columns(self):
|
||||
return self.snowflake.namespace.get_columns(literal_field(self.snowflake.name))
|
||||
return self.snowflake.columns
|
||||
|
||||
def map_to_es(self):
|
||||
"""
|
||||
|
@ -653,9 +815,9 @@ class Schema(jx_base.Schema):
|
|||
output,
|
||||
{
|
||||
k: c.es_column
|
||||
for c in self.snowflake.columns
|
||||
for c in self.columns
|
||||
if c.jx_type not in STRUCT
|
||||
for rel_name in [c.names[path]]
|
||||
for rel_name in [relative_field(c.name, path)]
|
||||
for k in [rel_name, untype_path(rel_name), unnest_path(rel_name)]
|
||||
}
|
||||
)
|
||||
|
@ -695,10 +857,12 @@ def metadata_tables():
|
|||
return wrap(
|
||||
[
|
||||
Column(
|
||||
names={".": c},
|
||||
name=c,
|
||||
es_index="meta.tables",
|
||||
es_column=c,
|
||||
es_type="string",
|
||||
jx_type=STRING,
|
||||
last_updated=Date.now(),
|
||||
nested_path=ROOT_PATH
|
||||
)
|
||||
for c in [
|
||||
|
@ -708,10 +872,12 @@ def metadata_tables():
|
|||
]
|
||||
]+[
|
||||
Column(
|
||||
names={".": c},
|
||||
name=c,
|
||||
es_index="meta.tables",
|
||||
es_column=c,
|
||||
es_type="integer",
|
||||
jx_type=INTEGER,
|
||||
last_updated=Date.now(),
|
||||
nested_path=ROOT_PATH
|
||||
)
|
||||
for c in [
|
||||
|
@ -730,4 +896,152 @@ def jx_type(column):
|
|||
return es_type_to_json_type[column.es_type]
|
||||
|
||||
|
||||
python_type_to_es_type = {
|
||||
none_type: "undefined",
|
||||
NullType: "undefined",
|
||||
bool: "boolean",
|
||||
str: "string",
|
||||
text_type: "string",
|
||||
int: "integer",
|
||||
float: "double",
|
||||
Data: "object",
|
||||
dict: "object",
|
||||
set: "nested",
|
||||
list: "nested",
|
||||
FlatList: "nested",
|
||||
Date: "double",
|
||||
Decimal: "double",
|
||||
datetime: "double",
|
||||
date: "double"
|
||||
}
|
||||
|
||||
if PY2:
|
||||
python_type_to_es_type[long] = "integer"
|
||||
|
||||
_merge_es_type = {
|
||||
"undefined": {
|
||||
"undefined": "undefined",
|
||||
"boolean": "boolean",
|
||||
"integer": "integer",
|
||||
"long": "long",
|
||||
"float": "float",
|
||||
"double": "double",
|
||||
"number": "number",
|
||||
"string": "string",
|
||||
"object": "object",
|
||||
"nested": "nested"
|
||||
},
|
||||
"boolean": {
|
||||
"undefined": "boolean",
|
||||
"boolean": "boolean",
|
||||
"integer": "integer",
|
||||
"long": "long",
|
||||
"float": "float",
|
||||
"double": "double",
|
||||
"number": "number",
|
||||
"string": "string",
|
||||
"object": None,
|
||||
"nested": None
|
||||
},
|
||||
"integer": {
|
||||
"undefined": "integer",
|
||||
"boolean": "integer",
|
||||
"integer": "integer",
|
||||
"long": "long",
|
||||
"float": "float",
|
||||
"double": "double",
|
||||
"number": "number",
|
||||
"string": "string",
|
||||
"object": None,
|
||||
"nested": None
|
||||
},
|
||||
"long": {
|
||||
"undefined": "long",
|
||||
"boolean": "long",
|
||||
"integer": "long",
|
||||
"long": "long",
|
||||
"float": "double",
|
||||
"double": "double",
|
||||
"number": "number",
|
||||
"string": "string",
|
||||
"object": None,
|
||||
"nested": None
|
||||
},
|
||||
"float": {
|
||||
"undefined": "float",
|
||||
"boolean": "float",
|
||||
"integer": "float",
|
||||
"long": "double",
|
||||
"float": "float",
|
||||
"double": "double",
|
||||
"number": "number",
|
||||
"string": "string",
|
||||
"object": None,
|
||||
"nested": None
|
||||
},
|
||||
"double": {
|
||||
"undefined": "double",
|
||||
"boolean": "double",
|
||||
"integer": "double",
|
||||
"long": "double",
|
||||
"float": "double",
|
||||
"double": "double",
|
||||
"number": "number",
|
||||
"string": "string",
|
||||
"object": None,
|
||||
"nested": None
|
||||
},
|
||||
"number": {
|
||||
"undefined": "number",
|
||||
"boolean": "number",
|
||||
"integer": "number",
|
||||
"long": "number",
|
||||
"float": "number",
|
||||
"double": "number",
|
||||
"number": "number",
|
||||
"string": "string",
|
||||
"object": None,
|
||||
"nested": None
|
||||
},
|
||||
"string": {
|
||||
"undefined": "string",
|
||||
"boolean": "string",
|
||||
"integer": "string",
|
||||
"long": "string",
|
||||
"float": "string",
|
||||
"double": "string",
|
||||
"number": "string",
|
||||
"string": "string",
|
||||
"object": None,
|
||||
"nested": None
|
||||
},
|
||||
"object": {
|
||||
"undefined": "object",
|
||||
"boolean": None,
|
||||
"integer": None,
|
||||
"long": None,
|
||||
"float": None,
|
||||
"double": None,
|
||||
"number": None,
|
||||
"string": None,
|
||||
"object": "object",
|
||||
"nested": "nested"
|
||||
},
|
||||
"nested": {
|
||||
"undefined": "nested",
|
||||
"boolean": None,
|
||||
"integer": None,
|
||||
"long": None,
|
||||
"float": None,
|
||||
"double": None,
|
||||
"number": None,
|
||||
"string": None,
|
||||
"object": "nested",
|
||||
"nested": "nested"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
OBJECTS = (OBJECT, EXISTS)
|
||||
|
|
|
@ -7,21 +7,18 @@
|
|||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from collections import Mapping
|
||||
|
||||
import mo_dots as dot
|
||||
from mo_dots import Null, Data, FlatList, wrap, wrap_leaves, listwrap
|
||||
from mo_logs import Log
|
||||
from mo_math import MAX, OR
|
||||
from mo_collections.matrix import Matrix
|
||||
from mo_future import is_text, is_binary
|
||||
from jx_base.container import Container
|
||||
from jx_base.query import _normalize_edge
|
||||
from jx_python.cubes.aggs import cube_aggs
|
||||
from jx_python.lists.aggs import is_aggs
|
||||
from jx_base.query import _normalize_edge
|
||||
from mo_collections.matrix import Matrix
|
||||
from mo_dots import Data, FlatList, Null, is_data, is_list, listwrap, wrap, wrap_leaves
|
||||
import mo_dots as dot
|
||||
from mo_logs import Log
|
||||
from mo_math import MAX, OR
|
||||
|
||||
|
||||
class Cube(Container):
|
||||
|
@ -36,7 +33,7 @@ class Cube(Container):
|
|||
ALLOWED, USING THE select AND edges TO DESCRIBE THE data
|
||||
"""
|
||||
|
||||
self.is_value = False if isinstance(select, list) else True
|
||||
self.is_value = False if is_list(select) else True
|
||||
self.select = select
|
||||
self.meta = Data(format="cube") # PUT EXTRA MARKUP HERE
|
||||
self.is_none = False
|
||||
|
@ -45,37 +42,37 @@ class Cube(Container):
|
|||
is_none = True
|
||||
|
||||
# ENSURE frum IS PROPER FORM
|
||||
if isinstance(select, list):
|
||||
if is_list(select):
|
||||
if edges and OR(not isinstance(v, Matrix) for v in data.values()):
|
||||
Log.error("Expecting data to be a dict with Matrix values")
|
||||
|
||||
if not edges:
|
||||
if not data:
|
||||
if isinstance(select, list):
|
||||
if is_list(select):
|
||||
Log.error("not expecting a list of records")
|
||||
|
||||
data = {select.name: Matrix.ZERO}
|
||||
self.edges = FlatList.EMPTY
|
||||
elif isinstance(data, Mapping):
|
||||
elif is_data(data):
|
||||
# EXPECTING NO MORE THAN ONE rownum EDGE IN THE DATA
|
||||
length = MAX([len(v) for v in data.values()])
|
||||
if length >= 1:
|
||||
self.edges = wrap([{"name": "rownum", "domain": {"type": "rownum"}}])
|
||||
else:
|
||||
self.edges = FlatList.EMPTY
|
||||
elif isinstance(data, list):
|
||||
if isinstance(select, list):
|
||||
elif is_list(data):
|
||||
if is_list(select):
|
||||
Log.error("not expecting a list of records")
|
||||
|
||||
data = {select.name: Matrix.wrap(data)}
|
||||
self.edges = wrap([{"name": "rownum", "domain": {"type": "rownum", "min": 0, "max": len(data), "interval": 1}}])
|
||||
elif isinstance(data, Matrix):
|
||||
if isinstance(select, list):
|
||||
if is_list(select):
|
||||
Log.error("not expecting a list of records")
|
||||
|
||||
data = {select.name: data}
|
||||
else:
|
||||
if isinstance(select, list):
|
||||
if is_list(select):
|
||||
Log.error("not expecting a list of records")
|
||||
|
||||
data = {select.name: Matrix(value=data)}
|
||||
|
@ -148,7 +145,7 @@ class Cube(Container):
|
|||
return Null
|
||||
if self.edges:
|
||||
Log.error("can not get value of with dimension")
|
||||
if isinstance(self.select, list):
|
||||
if is_list(self.select):
|
||||
Log.error("can not get value of multi-valued cubes")
|
||||
return self.data[self.select.name].cube
|
||||
|
||||
|
@ -205,7 +202,7 @@ class Cube(Container):
|
|||
# EDGE REMOVES THAT EDGE FROM THIS RESULT, OR ADDS THE PART
|
||||
# AS A select {"name":edge.name, "value":edge.domain.partitions[coord]}
|
||||
# PROBABLY NOT, THE value IS IDENTICAL OVER THE REMAINING
|
||||
if isinstance(item, Mapping):
|
||||
if is_data(item):
|
||||
coordinates = [None] * len(self.edges)
|
||||
|
||||
# MAP DICT TO NUMERIC INDICES
|
||||
|
@ -232,7 +229,7 @@ class Cube(Container):
|
|||
data={k: Matrix(values=c.__getitem__(coordinates)) for k, c in self.data.items()}
|
||||
)
|
||||
return output
|
||||
elif isinstance(item, text_type):
|
||||
elif is_text(item):
|
||||
# RETURN A VALUE CUBE
|
||||
if self.is_value:
|
||||
if item != self.select.name:
|
||||
|
@ -320,7 +317,7 @@ class Cube(Container):
|
|||
getKey = [e.domain.getKey for e in self.edges]
|
||||
lookup = [[getKey[i](p) for p in e.domain.partitions+([None] if e.allowNulls else [])] for i, e in enumerate(self.edges)]
|
||||
|
||||
if isinstance(self.select, list):
|
||||
if is_list(self.select):
|
||||
selects = listwrap(self.select)
|
||||
index, v = transpose(*self.data[selects[0].name].groupby(selector))
|
||||
|
||||
|
@ -375,7 +372,7 @@ class Cube(Container):
|
|||
output = wrap_leaves({keys[i]: lookup[i][c] for i, c in enumerate(coord)})
|
||||
return output
|
||||
|
||||
if isinstance(self.select, list):
|
||||
if is_list(self.select):
|
||||
selects = listwrap(self.select)
|
||||
index, v = transpose(*self.data[selects[0].name].groupby(selector))
|
||||
|
||||
|
|
|
@ -6,22 +6,20 @@
|
|||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from mo_future import is_text, is_binary
|
||||
from copy import copy
|
||||
from datetime import datetime
|
||||
|
||||
from mo_future import text_type
|
||||
from mo_dots import wrap, Data, FlatList, literal_field
|
||||
from mo_json.typed_encoder import TYPE_PREFIX
|
||||
from mo_logs import Log
|
||||
from pyLibrary import convert
|
||||
from jx_base.query import QueryOp
|
||||
from jx_python import jx
|
||||
from jx_python.containers import Container
|
||||
from jx_python.expressions import Variable, Literal
|
||||
from jx_base.query import QueryOp
|
||||
from jx_python.expressions import Literal, Variable
|
||||
from mo_dots import Data, FlatList, is_list, literal_field, wrap
|
||||
from mo_future import text_type
|
||||
from mo_json.typed_encoder import TYPE_PREFIX
|
||||
from mo_logs import Log
|
||||
|
||||
INDEX = "__index__"
|
||||
PARENT = "__parent__"
|
||||
|
@ -108,7 +106,7 @@ class DocStore(Container):
|
|||
if query.sort:
|
||||
short_list = self._sort(query.sort)
|
||||
|
||||
if isinstance(query.select, list):
|
||||
if is_list(query.select):
|
||||
accessors = map(jx.get, query.select.value)
|
||||
|
||||
if query.window:
|
||||
|
@ -218,7 +216,7 @@ class DocStore(Container):
|
|||
return filters[where.name](self, where)
|
||||
|
||||
def _eq(self, op):
|
||||
if isinstance(op.lhs, Variable) and isinstance(op.rhs, Literal):
|
||||
if is_op(op.lhs, Variable) and is_literal(op.rhs):
|
||||
return copy(self._index[op.lhs][op.rhs])
|
||||
|
||||
def _and(self, op):
|
||||
|
|
|
@ -7,29 +7,25 @@
|
|||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from copy import copy
|
||||
import itertools
|
||||
from collections import Mapping
|
||||
|
||||
import jx_base
|
||||
from jx_base import Container
|
||||
from jx_base.expressions import jx_expression, Expression, Variable, TRUE
|
||||
from jx_python.expression_compiler import compile_expression
|
||||
from jx_base.expressions import TRUE, Variable
|
||||
from jx_base.language import is_expression, is_op
|
||||
from jx_python.expressions import jx_expression_to_function
|
||||
from jx_python.lists.aggs import is_aggs, list_aggs
|
||||
from jx_python.meta import get_schema_from_list
|
||||
from mo_collections import UniqueIndex
|
||||
from mo_dots import Data, wrap, listwrap, unwraplist, unwrap, Null
|
||||
from mo_future import sort_using_key
|
||||
from mo_dots import Data, Null, is_data, is_list, listwrap, unwrap, unwraplist, wrap
|
||||
from mo_future import first, sort_using_key
|
||||
from mo_logs import Log
|
||||
from mo_threads import Lock
|
||||
from pyLibrary import convert
|
||||
|
||||
_get = object.__getattribute__
|
||||
|
||||
|
||||
class ListContainer(Container, jx_base.Namespace, jx_base.Table):
|
||||
"""
|
||||
|
@ -96,14 +92,14 @@ class ListContainer(Container, jx_base.Namespace, jx_base.Table):
|
|||
if q.format == "list":
|
||||
return Data(data=output.data, meta={"format": "list"})
|
||||
elif q.format == "table":
|
||||
head = [c.names['.'] for c in output.schema.columns]
|
||||
head = [c.name for c in output.schema.columns]
|
||||
data = [
|
||||
[r if h == '.' else r[h] for h in head]
|
||||
[r if h == "." else r[h] for h in head]
|
||||
for r in output.data
|
||||
]
|
||||
return Data(header=head, data=data, meta={"format": "table"})
|
||||
elif q.format == "cube":
|
||||
head = [c.names['.'] for c in output.schema.columns]
|
||||
head = [c.name for c in output.schema.columns]
|
||||
rows = [
|
||||
[r[h] for h in head]
|
||||
for r in output.data
|
||||
|
@ -144,10 +140,10 @@ class ListContainer(Container, jx_base.Namespace, jx_base.Table):
|
|||
return self.where(where)
|
||||
|
||||
def where(self, where):
|
||||
if isinstance(where, Mapping):
|
||||
temp = compile_expression(jx_expression(where).to_python())
|
||||
elif isinstance(where, Expression):
|
||||
temp = compile_expression(where.to_python())
|
||||
if is_data(where):
|
||||
temp = jx_expression_to_function(where)
|
||||
elif is_expression(where):
|
||||
temp = jx_expression_to_function(where)
|
||||
else:
|
||||
temp = where
|
||||
|
||||
|
@ -161,7 +157,7 @@ class ListContainer(Container, jx_base.Namespace, jx_base.Table):
|
|||
:param select: the variable to extract from list
|
||||
:return: a simple list of the extraction
|
||||
"""
|
||||
if isinstance(select, list):
|
||||
if is_list(select):
|
||||
return [(d[s] for s in select) for d in self.data]
|
||||
else:
|
||||
return [d[select] for d in self.data]
|
||||
|
@ -169,20 +165,20 @@ class ListContainer(Container, jx_base.Namespace, jx_base.Table):
|
|||
def select(self, select):
|
||||
selects = listwrap(select)
|
||||
|
||||
if len(selects) == 1 and isinstance(selects[0].value, Variable) and selects[0].value.var == ".":
|
||||
if len(selects) == 1 and is_op(selects[0].value, Variable) and selects[0].value.var == ".":
|
||||
new_schema = self.schema
|
||||
if selects[0].name == ".":
|
||||
return self
|
||||
else:
|
||||
new_schema = None
|
||||
|
||||
if isinstance(select, list):
|
||||
if is_list(select):
|
||||
if all(
|
||||
isinstance(s.value, Variable) and s.name == s.value.var
|
||||
is_op(s.value, Variable) and s.name == s.value.var
|
||||
for s in select
|
||||
):
|
||||
names = set(s.value.var for s in select)
|
||||
new_schema = Schema(".", [c for c in self.schema.columns if c.names['.'] in names])
|
||||
new_schema = Schema(".", [c for c in self.schema.columns if c.name in names])
|
||||
|
||||
push_and_pull = [(s.name, jx_expression_to_function(s.value)) for s in selects]
|
||||
def selector(d):
|
||||
|
@ -195,6 +191,10 @@ class ListContainer(Container, jx_base.Namespace, jx_base.Table):
|
|||
else:
|
||||
select_value = jx_expression_to_function(select.value)
|
||||
new_data = map(select_value, self.data)
|
||||
if is_op(select.value, Variable):
|
||||
column = copy(first(c for c in self.schema.columns if c.name == select.value.var))
|
||||
column.name = '.'
|
||||
new_schema = Schema("from " + self.name, [column])
|
||||
|
||||
return ListContainer("from "+self.name, data=new_data, schema=new_schema)
|
||||
|
||||
|
@ -242,10 +242,16 @@ class ListContainer(Container, jx_base.Namespace, jx_base.Table):
|
|||
self.data.extend(documents)
|
||||
|
||||
def __data__(self):
|
||||
return wrap({
|
||||
"meta": {"format": "list"},
|
||||
"data": [{k: unwraplist(v) for k, v in row.items()} for row in self.data]
|
||||
})
|
||||
if first(self.schema.columns).name=='.':
|
||||
return wrap({
|
||||
"meta": {"format": "list"},
|
||||
"data": self.data
|
||||
})
|
||||
else:
|
||||
return wrap({
|
||||
"meta": {"format": "list"},
|
||||
"data": [{k: unwraplist(v) for k, v in row.items()} for row in self.data]
|
||||
})
|
||||
|
||||
def get_columns(self, table_name=None):
|
||||
return self.schema.values()
|
||||
|
@ -264,8 +270,6 @@ class ListContainer(Container, jx_base.Namespace, jx_base.Table):
|
|||
def __len__(self):
|
||||
return len(self.data)
|
||||
|
||||
# class Namespace(jx_base.Namespace):
|
||||
|
||||
def get_snowflake(self, name):
|
||||
if self.name != name:
|
||||
Log.error("This container only has table by name of {{name}}", name=name)
|
||||
|
@ -291,8 +295,6 @@ def _exec(code):
|
|||
Log.error("Could not execute {{code|quote}}", code=code, cause=e)
|
||||
|
||||
|
||||
|
||||
|
||||
from jx_base.schema import Schema
|
||||
from jx_python import jx
|
||||
|
||||
|
@ -300,6 +302,5 @@ from jx_python import jx
|
|||
DUAL = ListContainer(
|
||||
name="dual",
|
||||
data=[{}],
|
||||
schema=Schema(table_name="dual", columns=UniqueIndex(keys=("names.\\.",)))
|
||||
schema=Schema(table_name="dual", columns=UniqueIndex(keys=("name",)))
|
||||
)
|
||||
|
||||
|
|
|
@ -7,19 +7,17 @@
|
|||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from mo_future import is_text, is_binary
|
||||
import itertools
|
||||
|
||||
from jx_base.domains import DefaultDomain, SimpleSetDomain
|
||||
from jx_python import windows
|
||||
from mo_dots import listwrap
|
||||
from mo_logs import Log
|
||||
|
||||
from jx_base.domains import SimpleSetDomain, DefaultDomain
|
||||
from jx_python.expressions import jx_expression_to_function
|
||||
from mo_collections.matrix import Matrix
|
||||
from mo_dots import listwrap
|
||||
from mo_logs import Log
|
||||
|
||||
|
||||
def cube_aggs(frum, query):
|
||||
|
|
|
@ -7,16 +7,15 @@
|
|||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from mo_future import is_text, is_binary
|
||||
import re
|
||||
|
||||
from pyLibrary import convert
|
||||
from mo_dots import Data, coalesce, is_data, listwrap, wrap_leaves
|
||||
from mo_logs import Log
|
||||
from mo_dots import coalesce, Data, listwrap, wrap_leaves
|
||||
from mo_times.dates import Date
|
||||
from pyLibrary import convert
|
||||
|
||||
true = True
|
||||
false = False
|
||||
|
@ -42,6 +41,7 @@ def compile_expression(source):
|
|||
_ = EMPTY_DICT
|
||||
_ = re
|
||||
_ = wrap_leaves
|
||||
_ = is_data
|
||||
|
||||
fake_locals = {}
|
||||
try:
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -8,15 +8,15 @@
|
|||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import division
|
||||
from __future__ import absolute_import
|
||||
from collections import Mapping
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from mo_future import is_text, is_binary
|
||||
import functools
|
||||
from mo_math import MIN
|
||||
|
||||
from mo_dots import Data, FlatList, coalesce, is_data, is_list, split_field, wrap
|
||||
from mo_future import is_text
|
||||
from mo_logs import Log
|
||||
from mo_dots import split_field, coalesce, Data, FlatList, wrap
|
||||
from mo_math import MIN
|
||||
|
||||
|
||||
class PartFlatList(list):
|
||||
|
@ -52,10 +52,10 @@ class PartFlatList(list):
|
|||
yield r
|
||||
|
||||
def select(self, fields):
|
||||
if isinstance(fields, Mapping):
|
||||
if is_data(fields):
|
||||
fields=fields.value
|
||||
|
||||
if isinstance(fields, text_type):
|
||||
if is_text(fields):
|
||||
# RETURN LIST OF VALUES
|
||||
if len(split_field(fields)) == 1:
|
||||
if self.path[0] == fields:
|
||||
|
@ -71,7 +71,7 @@ class PartFlatList(list):
|
|||
_select1((wrap(d[depth]) for d in self.data), short_key, 0, output)
|
||||
return output
|
||||
|
||||
if isinstance(fields, list):
|
||||
if is_list(fields):
|
||||
output = FlatList()
|
||||
|
||||
meta = []
|
||||
|
@ -131,7 +131,7 @@ def _select1(data, field, depth, output):
|
|||
if d == None:
|
||||
output.append(None)
|
||||
break
|
||||
elif isinstance(d, list):
|
||||
elif is_list(d):
|
||||
_select1(d, field, i + 1, output)
|
||||
break
|
||||
else:
|
||||
|
|
|
@ -8,22 +8,19 @@
|
|||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
import math
|
||||
import sys
|
||||
|
||||
from mo_dots import listwrap, Null, Data
|
||||
from mo_future import text_type, binary_type
|
||||
from mo_logs import Log
|
||||
|
||||
from jx_base.container import Container
|
||||
from jx_base.expressions import jx_expression, Expression
|
||||
from jx_base.expressions import jx_expression
|
||||
from jx_base.language import is_expression
|
||||
from jx_python.expressions import jx_expression_to_function
|
||||
from mo_collections.multiset import Multiset
|
||||
from mo_dots.lists import FlatList
|
||||
from mo_dots import Data, FlatList, Null, listwrap
|
||||
from mo_future import binary_type, text_type
|
||||
from mo_logs import Log
|
||||
from mo_logs.exceptions import Except
|
||||
|
||||
|
||||
|
@ -57,7 +54,7 @@ def groupby(data, keys=None, size=None, min_size=None, max_size=None, contiguous
|
|||
if not data:
|
||||
return Null
|
||||
|
||||
if any(isinstance(k, Expression) for k in keys):
|
||||
if any(is_expression(k) for k in keys):
|
||||
Log.error("can not handle expressions")
|
||||
else:
|
||||
accessor = jx_expression_to_function(jx_expression({"tuple": keys})) # CAN RETURN Null, WHICH DOES NOT PLAY WELL WITH __cmp__
|
||||
|
@ -146,7 +143,7 @@ def groupby_min_max_size(data, min_size=0, max_size=None, ):
|
|||
if max_size == None:
|
||||
max_size = sys.maxint
|
||||
|
||||
if isinstance(data, (bytearray, text_type, binary_type, list)):
|
||||
if data.__class__ in (bytearray, text_type, binary_type, list, FlatList):
|
||||
def _iter():
|
||||
num = int(math.ceil(len(data)/max_size))
|
||||
for i in range(num):
|
||||
|
|
|
@ -8,30 +8,14 @@
|
|||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
|
||||
_range = range
|
||||
|
||||
from mo_times import Date
|
||||
from collections import Mapping
|
||||
from jx_base import query
|
||||
from jx_python import expressions as _expressions
|
||||
from jx_python import flat_list, group_by
|
||||
from mo_dots import listwrap, wrap, unwrap, FlatList, NullType
|
||||
from mo_dots import set_default, Null, Data, split_field, coalesce, join_field
|
||||
from mo_future import text_type, boolean_type, none_type, long, generator_types, sort_using_cmp, PY2
|
||||
from mo_logs import Log
|
||||
from mo_math import Math
|
||||
from mo_math import UNION, MIN
|
||||
from pyLibrary import convert
|
||||
|
||||
import mo_dots
|
||||
from jx_base.container import Container
|
||||
from jx_base.expressions import TRUE, FALSE, NullOp
|
||||
from jx_base.expressions import FALSE, TRUE
|
||||
from jx_base.query import QueryOp, _normalize_selects
|
||||
from jx_base.language import is_op, value_compare
|
||||
from jx_python import expressions as _expressions, flat_list, group_by
|
||||
from jx_python.containers.cube import Cube
|
||||
from jx_python.cubes.aggs import cube_aggs
|
||||
from jx_python.expression_compiler import compile_expression
|
||||
|
@ -39,7 +23,14 @@ from jx_python.expressions import jx_expression_to_function
|
|||
from jx_python.flat_list import PartFlatList
|
||||
from mo_collections.index import Index
|
||||
from mo_collections.unique_index import UniqueIndex
|
||||
import mo_dots
|
||||
from mo_dots import Data, FlatList, Null, coalesce, is_container, is_data, is_list, is_many, join_field, listwrap, set_default, split_field, unwrap, wrap
|
||||
from mo_dots.objects import DataObject
|
||||
from mo_future import is_text, sort_using_cmp
|
||||
from mo_logs import Log
|
||||
import mo_math
|
||||
from mo_math import MIN, UNION
|
||||
from pyLibrary import convert
|
||||
|
||||
# A COLLECTION OF DATABASE OPERATORS (RELATIONAL ALGEBRA OPERATORS)
|
||||
# JSON QUERY EXPRESSION DOCUMENTATION: https://github.com/klahnakoski/jx/tree/master/docs
|
||||
|
@ -47,6 +38,7 @@ from mo_dots.objects import DataObject
|
|||
# TODO: USE http://docs.sqlalchemy.org/en/latest/core/tutorial.html AS DOCUMENTATION FRAMEWORK
|
||||
|
||||
builtin_tuple = tuple
|
||||
_range = range
|
||||
_Column = None
|
||||
_merge_type = None
|
||||
_ = _expressions
|
||||
|
@ -65,29 +57,32 @@ def run(query, container=Null):
|
|||
BUT IT IS ALSO PROCESSING A list CONTAINER; SEPARATE TO A ListContainer
|
||||
"""
|
||||
if container == None:
|
||||
container = wrap(query)['from']
|
||||
container = wrap(query)["from"]
|
||||
query_op = QueryOp.wrap(query, container=container, namespace=container.schema)
|
||||
else:
|
||||
query_op = QueryOp.wrap(query, container, container.namespace)
|
||||
|
||||
if container == None:
|
||||
from jx_python.containers.list_usingPythonList import DUAL
|
||||
|
||||
return DUAL.query(query_op)
|
||||
elif isinstance(container, Container):
|
||||
return container.query(query_op)
|
||||
elif isinstance(container, (list, set) + generator_types):
|
||||
elif is_many(container):
|
||||
container = wrap(list(container))
|
||||
elif isinstance(container, Cube):
|
||||
if is_aggs(query_op):
|
||||
return cube_aggs(container, query_op)
|
||||
elif isinstance(container, QueryOp):
|
||||
elif is_op(container, QueryOp):
|
||||
container = run(container)
|
||||
elif isinstance(container, Mapping):
|
||||
elif is_data(container):
|
||||
query = container
|
||||
container = query['from']
|
||||
container = query["from"]
|
||||
container = run(QueryOp.wrap(query, container, container.namespace), container)
|
||||
else:
|
||||
Log.error("Do not know how to handle {{type}}", type=container.__class__.__name__)
|
||||
Log.error(
|
||||
"Do not know how to handle {{type}}", type=container.__class__.__name__
|
||||
)
|
||||
|
||||
if is_aggs(query_op):
|
||||
container = list_aggs(container, query_op)
|
||||
|
@ -115,10 +110,7 @@ def run(query, container=Null):
|
|||
container = convert.list2table(container)
|
||||
container.meta.format = "table"
|
||||
else:
|
||||
container = wrap({
|
||||
"meta": {"format": "list"},
|
||||
"data": container
|
||||
})
|
||||
container = wrap({"meta": {"format": "list"}, "data": container})
|
||||
|
||||
return container
|
||||
|
||||
|
@ -127,14 +119,19 @@ groupby = group_by.groupby
|
|||
|
||||
|
||||
def index(data, keys=None):
|
||||
# return dict that uses keys to index data
|
||||
# return dict that uses keys to index data
|
||||
o = Index(keys)
|
||||
|
||||
if isinstance(data, Cube):
|
||||
if data.edges[0].name==keys[0]:
|
||||
#QUICK PATH
|
||||
if data.edges[0].name == keys[0]:
|
||||
# QUICK PATH
|
||||
names = list(data.data.keys())
|
||||
for d in (set_default(mo_dots.zip(names, r), {keys[0]: p}) for r, p in zip(zip(*data.data.values()), data.edges[0].domain.partitions.value)):
|
||||
for d in (
|
||||
set_default(mo_dots.zip(names, r), {keys[0]: p})
|
||||
for r, p in zip(
|
||||
zip(*data.data.values()), data.edges[0].domain.partitions.value
|
||||
)
|
||||
):
|
||||
o.add(d)
|
||||
return o
|
||||
else:
|
||||
|
@ -157,19 +154,20 @@ def unique_index(data, keys=None, fail_on_dup=True):
|
|||
o.add(d)
|
||||
except Exception as e:
|
||||
o.add(d)
|
||||
Log.error("index {{index}} is not unique {{key}} maps to both {{value1}} and {{value2}}",
|
||||
index= keys,
|
||||
key= select([d], keys)[0],
|
||||
value1= o[d],
|
||||
value2= d,
|
||||
cause=e
|
||||
Log.error(
|
||||
"index {{index}} is not unique {{key}} maps to both {{value1}} and {{value2}}",
|
||||
index=keys,
|
||||
key=select([d], keys)[0],
|
||||
value1=o[d],
|
||||
value2=d,
|
||||
cause=e,
|
||||
)
|
||||
return o
|
||||
|
||||
|
||||
def map2set(data, relation):
|
||||
"""
|
||||
EXPECTING A isinstance(relation, Mapping) THAT MAPS VALUES TO lists
|
||||
EXPECTING A is_data(relation) THAT MAPS VALUES TO lists
|
||||
THE LISTS ARE EXPECTED TO POINT TO MEMBERS OF A SET
|
||||
A set() IS RETURNED
|
||||
"""
|
||||
|
@ -178,7 +176,7 @@ def map2set(data, relation):
|
|||
if isinstance(relation, Data):
|
||||
Log.error("Does not accept a Data")
|
||||
|
||||
if isinstance(relation, Mapping):
|
||||
if is_data(relation):
|
||||
try:
|
||||
# relation[d] is expected to be a list
|
||||
# return set(cod for d in data for cod in relation[d])
|
||||
|
@ -215,20 +213,20 @@ def tuple(data, field_name):
|
|||
if isinstance(data, FlatList):
|
||||
Log.error("not supported yet")
|
||||
|
||||
if isinstance(field_name, Mapping) and "value" in field_name:
|
||||
if is_data(field_name) and "value" in field_name:
|
||||
# SIMPLIFY {"value":value} AS STRING
|
||||
field_name = field_name["value"]
|
||||
|
||||
# SIMPLE PYTHON ITERABLE ASSUMED
|
||||
if isinstance(field_name, text_type):
|
||||
if is_text(field_name):
|
||||
if len(split_field(field_name)) == 1:
|
||||
return [(d[field_name], ) for d in data]
|
||||
return [(d[field_name],) for d in data]
|
||||
else:
|
||||
path = split_field(field_name)
|
||||
output = []
|
||||
flat_list._tuple1(data, path, 0, output)
|
||||
return output
|
||||
elif isinstance(field_name, list):
|
||||
elif is_list(field_name):
|
||||
paths = [_select_a_field(f) for f in field_name]
|
||||
output = FlatList()
|
||||
_tuple((), unwrap(data), paths, 0, output)
|
||||
|
@ -265,16 +263,16 @@ def _tuple_deep(v, field, depth, record):
|
|||
field = {"name":name, "value":["attribute", "path"]}
|
||||
r[field.name]=v[field.value], BUT WE MUST DEAL WITH POSSIBLE LIST IN field.value PATH
|
||||
"""
|
||||
if hasattr(field.value, '__call__'):
|
||||
return 0, None, record + (field.value(v), )
|
||||
if hasattr(field.value, "__call__"):
|
||||
return 0, None, record + (field.value(v),)
|
||||
|
||||
for i, f in enumerate(field.value[depth:len(field.value) - 1:]):
|
||||
for i, f in enumerate(field.value[depth : len(field.value) - 1 :]):
|
||||
v = v.get(f)
|
||||
if isinstance(v, list):
|
||||
if is_list(v):
|
||||
return depth + i + 1, v, record
|
||||
|
||||
f = field.value.last()
|
||||
return 0, None, record + (v.get(f), )
|
||||
return 0, None, record + (v.get(f),)
|
||||
|
||||
|
||||
def select(data, field_name):
|
||||
|
@ -288,12 +286,14 @@ def select(data, field_name):
|
|||
return data.select(field_name)
|
||||
|
||||
if isinstance(data, UniqueIndex):
|
||||
data = data._data.values() # THE SELECT ROUTINE REQUIRES dicts, NOT Data WHILE ITERATING
|
||||
data = (
|
||||
data._data.values()
|
||||
) # THE SELECT ROUTINE REQUIRES dicts, NOT Data WHILE ITERATING
|
||||
|
||||
if isinstance(data, Mapping):
|
||||
if is_data(data):
|
||||
return select_one(data, field_name)
|
||||
|
||||
if isinstance(field_name, Mapping):
|
||||
if is_data(field_name):
|
||||
field_name = wrap(field_name)
|
||||
if field_name.value in ["*", "."]:
|
||||
return data
|
||||
|
@ -303,7 +303,7 @@ def select(data, field_name):
|
|||
field_name = field_name.value
|
||||
|
||||
# SIMPLE PYTHON ITERABLE ASSUMED
|
||||
if isinstance(field_name, text_type):
|
||||
if is_text(field_name):
|
||||
path = split_field(field_name)
|
||||
if len(path) == 1:
|
||||
return FlatList([d[field_name] for d in data])
|
||||
|
@ -311,7 +311,7 @@ def select(data, field_name):
|
|||
output = FlatList()
|
||||
flat_list._select1(data, path, 0, output)
|
||||
return output
|
||||
elif isinstance(field_name, list):
|
||||
elif is_list(field_name):
|
||||
keys = [_select_a_field(wrap(f)) for f in field_name]
|
||||
return _select(Data(), unwrap(data), keys, 0)
|
||||
else:
|
||||
|
@ -320,9 +320,9 @@ def select(data, field_name):
|
|||
|
||||
|
||||
def _select_a_field(field):
|
||||
if isinstance(field, text_type):
|
||||
if is_text(field):
|
||||
return wrap({"name": field, "value": split_field(field)})
|
||||
elif isinstance(wrap(field).value, text_type):
|
||||
elif is_text(wrap(field).value):
|
||||
field = wrap(field)
|
||||
return wrap({"name": field.name, "value": split_field(field.value)})
|
||||
else:
|
||||
|
@ -334,8 +334,8 @@ def _select(template, data, fields, depth):
|
|||
deep_path = []
|
||||
deep_fields = UniqueIndex(["name"])
|
||||
for d in data:
|
||||
if isinstance(d, Data):
|
||||
Log.error("programmer error, _select can not handle Data")
|
||||
if d.__class__ is Data:
|
||||
Log.error("programmer error, _select can not handle Data, only dict")
|
||||
|
||||
record = template.copy()
|
||||
children = None
|
||||
|
@ -364,18 +364,18 @@ def _select_deep(v, field, depth, record):
|
|||
field = {"name":name, "value":["attribute", "path"]}
|
||||
r[field.name]=v[field.value], BUT WE MUST DEAL WITH POSSIBLE LIST IN field.value PATH
|
||||
"""
|
||||
if hasattr(field.value, '__call__'):
|
||||
if hasattr(field.value, "__call__"):
|
||||
try:
|
||||
record[field.name] = field.value(wrap(v))
|
||||
except Exception as e:
|
||||
record[field.name] = None
|
||||
return 0, None
|
||||
|
||||
for i, f in enumerate(field.value[depth:len(field.value) - 1:]):
|
||||
for i, f in enumerate(field.value[depth : len(field.value) - 1 :]):
|
||||
v = v.get(f)
|
||||
if v is None:
|
||||
return 0, None
|
||||
if isinstance(v, list):
|
||||
if is_list(v):
|
||||
return depth + i + 1, v
|
||||
|
||||
f = field.value.last()
|
||||
|
@ -385,7 +385,9 @@ def _select_deep(v, field, depth, record):
|
|||
else:
|
||||
record[field.name] = v.get(f)
|
||||
except Exception as e:
|
||||
Log.error("{{value}} does not have {{field}} property", value= v, field=f, cause=e)
|
||||
Log.error(
|
||||
"{{value}} does not have {{field}} property", value=v, field=f, cause=e
|
||||
)
|
||||
return 0, None
|
||||
|
||||
|
||||
|
@ -396,26 +398,31 @@ def _select_deep_meta(field, depth):
|
|||
RETURN FUNCTION THAT PERFORMS THE MAPPING
|
||||
"""
|
||||
name = field.name
|
||||
if hasattr(field.value, '__call__'):
|
||||
if hasattr(field.value, "__call__"):
|
||||
try:
|
||||
|
||||
def assign(source, destination):
|
||||
destination[name] = field.value(wrap(source))
|
||||
return 0, None
|
||||
|
||||
return assign
|
||||
except Exception as e:
|
||||
|
||||
def assign(source, destination):
|
||||
destination[name] = None
|
||||
return 0, None
|
||||
|
||||
return assign
|
||||
|
||||
prefix = field.value[depth:len(field.value) - 1:]
|
||||
prefix = field.value[depth : len(field.value) - 1 :]
|
||||
if prefix:
|
||||
|
||||
def assign(source, destination):
|
||||
for i, f in enumerate(prefix):
|
||||
source = source.get(f)
|
||||
if source is None:
|
||||
return 0, None
|
||||
if isinstance(source, list):
|
||||
if is_list(source):
|
||||
return depth + i + 1, source
|
||||
|
||||
f = field.value.last()
|
||||
|
@ -425,23 +432,38 @@ def _select_deep_meta(field, depth):
|
|||
else:
|
||||
destination[name] = source.get(f)
|
||||
except Exception as e:
|
||||
Log.error("{{value}} does not have {{field}} property", value= source, field=f, cause=e)
|
||||
Log.error(
|
||||
"{{value}} does not have {{field}} property",
|
||||
value=source,
|
||||
field=f,
|
||||
cause=e,
|
||||
)
|
||||
return 0, None
|
||||
|
||||
return assign
|
||||
else:
|
||||
f = field.value[0]
|
||||
if not f: # NO NAME FIELD INDICATES SELECT VALUE
|
||||
|
||||
def assign(source, destination):
|
||||
destination[name] = source
|
||||
return 0, None
|
||||
|
||||
return assign
|
||||
else:
|
||||
|
||||
def assign(source, destination):
|
||||
try:
|
||||
destination[name] = source.get(f)
|
||||
except Exception as e:
|
||||
Log.error("{{value}} does not have {{field}} property", value= source, field=f, cause=e)
|
||||
Log.error(
|
||||
"{{value}} does not have {{field}} property",
|
||||
value=source,
|
||||
field=f,
|
||||
cause=e,
|
||||
)
|
||||
return 0, None
|
||||
|
||||
return assign
|
||||
|
||||
|
||||
|
@ -450,7 +472,12 @@ def get_columns(data, leaves=False):
|
|||
if not leaves:
|
||||
return wrap([{"name": n} for n in UNION(set(d.keys()) for d in data)])
|
||||
else:
|
||||
return wrap([{"name": leaf} for leaf in set(leaf for row in data for leaf, _ in row.leaves())])
|
||||
return wrap(
|
||||
[
|
||||
{"name": leaf}
|
||||
for leaf in set(leaf for row in data for leaf, _ in row.leaves())
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
_ = """
|
||||
|
@ -490,23 +517,23 @@ def _deeper_iterator(columns, nested_path, path, data):
|
|||
c = columns.get(leaf)
|
||||
if not c:
|
||||
c = columns[leaf] = _Column(name=leaf, type=type_to_name[v.__class__], table=None, es_column=leaf)
|
||||
c.type = _merge_type[c.type][type_to_name[v.__class__]]
|
||||
if c.type == "nested" and not nested_path[0].startswith(leaf + "."):
|
||||
c.jx_type = _merge_type[c.jx_type][type_to_name[v.__class__]]
|
||||
if c.jx_type == "nested" and not nested_path[0].startswith(leaf + "."):
|
||||
if leaf.startswith(nested_path[0] + ".") or leaf == nested_path[0] or not nested_path[0]:
|
||||
nested_path[0] = leaf
|
||||
else:
|
||||
Log.error("nested path conflict: {{leaf}} vs {{nested}}", leaf=leaf, nested=nested_path[0])
|
||||
|
||||
if isinstance(v, list) and v:
|
||||
if is_list(v) and v:
|
||||
if deep_leaf:
|
||||
Log.error("nested path conflict: {{leaf}} vs {{nested}}", leaf=leaf, nested=deep_leaf)
|
||||
deep_leaf = leaf
|
||||
deep_v = v
|
||||
elif isinstance(v, Mapping):
|
||||
elif is_data(v):
|
||||
for o in _deeper_iterator(columns, nested_path, leaf, [v]):
|
||||
set_default(output, o)
|
||||
else:
|
||||
if c.type not in ["object", "nested"]:
|
||||
if c.jx_type not in ["object", "nested"]:
|
||||
output[leaf] = v
|
||||
|
||||
if deep_leaf:
|
||||
|
@ -517,6 +544,7 @@ def _deeper_iterator(columns, nested_path, path, data):
|
|||
yield output
|
||||
"""
|
||||
|
||||
|
||||
def sort(data, fieldnames=None, already_normalized=False):
|
||||
"""
|
||||
PASS A FIELD NAME, OR LIST OF FIELD NAMES, OR LIST OF STRUCTS WITH {"field":field_name, "sort":direction}
|
||||
|
@ -545,101 +573,23 @@ def sort(data, fieldnames=None, already_normalized=False):
|
|||
Log.error("problem with compare", e)
|
||||
return 0
|
||||
|
||||
if isinstance(data, list):
|
||||
if is_list(data):
|
||||
output = FlatList([unwrap(d) for d in sort_using_cmp(data, cmp=comparer)])
|
||||
elif hasattr(data, "__iter__"):
|
||||
output = FlatList([unwrap(d) for d in sort_using_cmp(list(data), cmp=comparer)])
|
||||
output = FlatList(
|
||||
[unwrap(d) for d in sort_using_cmp(list(data), cmp=comparer)]
|
||||
)
|
||||
else:
|
||||
Log.error("Do not know how to handle")
|
||||
output = None
|
||||
|
||||
return output
|
||||
except Exception as e:
|
||||
Log.error("Problem sorting\n{{data}}", data=data, cause=e)
|
||||
Log.error("Problem sorting\n{{data}}", data=data, cause=e)
|
||||
|
||||
|
||||
def count(values):
|
||||
return sum((1 if v!=None else 0) for v in values)
|
||||
|
||||
|
||||
def value_compare(left, right, ordering=1):
|
||||
"""
|
||||
SORT VALUES, NULL IS THE LEAST VALUE
|
||||
:param left: LHS
|
||||
:param right: RHS
|
||||
:param ordering: (-1, 0, 0) TO AFFECT SORT ORDER
|
||||
:return: The return value is negative if x < y, zero if x == y and strictly positive if x > y.
|
||||
"""
|
||||
|
||||
try:
|
||||
if isinstance(left, list) or isinstance(right, list):
|
||||
if left == None:
|
||||
return ordering
|
||||
elif right == None:
|
||||
return - ordering
|
||||
|
||||
left = listwrap(left)
|
||||
right = listwrap(right)
|
||||
for a, b in zip(left, right):
|
||||
c = value_compare(a, b) * ordering
|
||||
if c != 0:
|
||||
return c
|
||||
|
||||
if len(left) < len(right):
|
||||
return - ordering
|
||||
elif len(left) > len(right):
|
||||
return ordering
|
||||
else:
|
||||
return 0
|
||||
|
||||
ltype = type(left)
|
||||
rtype = type(right)
|
||||
ltype_num = TYPE_ORDER.get(ltype, 10)
|
||||
rtype_num = TYPE_ORDER.get(rtype, 10)
|
||||
type_diff = ltype_num - rtype_num
|
||||
if type_diff != 0:
|
||||
return ordering if type_diff > 0 else -ordering
|
||||
|
||||
if ltype_num == 9:
|
||||
return 0
|
||||
elif ltype is builtin_tuple:
|
||||
for a, b in zip(left, right):
|
||||
c = value_compare(a, b)
|
||||
if c != 0:
|
||||
return c * ordering
|
||||
return 0
|
||||
elif ltype in (dict, Data):
|
||||
for k in sorted(set(left.keys()) | set(right.keys())):
|
||||
c = value_compare(left.get(k), right.get(k)) * ordering
|
||||
if c != 0:
|
||||
return c
|
||||
return 0
|
||||
elif left > right:
|
||||
return ordering
|
||||
elif left < right:
|
||||
return -ordering
|
||||
else:
|
||||
return 0
|
||||
except Exception as e:
|
||||
Log.error("Can not compare values {{left}} to {{right}}", left=left, right=right, cause=e)
|
||||
|
||||
TYPE_ORDER = {
|
||||
boolean_type: 0,
|
||||
int: 1,
|
||||
float: 1,
|
||||
Date: 1,
|
||||
text_type: 2,
|
||||
list: 3,
|
||||
builtin_tuple: 3,
|
||||
dict: 4,
|
||||
Data: 4,
|
||||
none_type: 9,
|
||||
NullType: 9,
|
||||
NullOp: 9
|
||||
}
|
||||
|
||||
if PY2:
|
||||
TYPE_ORDER[long] = 1
|
||||
return sum((1 if v != None else 0) for v in values)
|
||||
|
||||
|
||||
def pairwise(values):
|
||||
|
@ -654,6 +604,7 @@ def pairwise(values):
|
|||
yield (a, b)
|
||||
a = b
|
||||
|
||||
|
||||
pairs = pairwise
|
||||
|
||||
|
||||
|
@ -667,18 +618,22 @@ def filter(data, where):
|
|||
if isinstance(data, Container):
|
||||
return data.filter(where)
|
||||
|
||||
if isinstance(data, (list, set)):
|
||||
if is_container(data):
|
||||
temp = jx_expression_to_function(where)
|
||||
dd = wrap(data)
|
||||
return wrap([unwrap(d) for i, d in enumerate(data) if temp(wrap(d), i, dd)])
|
||||
else:
|
||||
Log.error("Do not know how to handle type {{type}}", type=data.__class__.__name__)
|
||||
Log.error(
|
||||
"Do not know how to handle type {{type}}", type=data.__class__.__name__
|
||||
)
|
||||
|
||||
try:
|
||||
return drill_filter(where, data)
|
||||
except Exception as _:
|
||||
# WOW! THIS IS INEFFICIENT!
|
||||
return wrap([unwrap(d) for d in drill_filter(where, [DataObject(d) for d in data])])
|
||||
return wrap(
|
||||
[unwrap(d) for d in drill_filter(where, [DataObject(d) for d in data])]
|
||||
)
|
||||
|
||||
|
||||
def drill_filter(esfilter, data):
|
||||
|
@ -690,7 +645,9 @@ def drill_filter(esfilter, data):
|
|||
esfilter = unwrap(esfilter)
|
||||
primary_nested = [] # track if nested, changes if not
|
||||
primary_column = [] # only one path allowed
|
||||
primary_branch = [] # CONTAINS LISTS OF RECORDS TO ITERATE: constantly changing as we dfs the tree
|
||||
primary_branch = (
|
||||
[]
|
||||
) # CONTAINS LISTS OF RECORDS TO ITERATE: constantly changing as we dfs the tree
|
||||
|
||||
def parse_field(fieldname, data, depth):
|
||||
"""
|
||||
|
@ -703,21 +660,21 @@ def drill_filter(esfilter, data):
|
|||
d = d[c]
|
||||
except Exception as e:
|
||||
Log.error("{{name}} does not exist", name=fieldname)
|
||||
if isinstance(d, list) and len(col) > 1:
|
||||
if len(primary_column) <= depth+i:
|
||||
if is_list(d) and len(col) > 1:
|
||||
if len(primary_column) <= depth + i:
|
||||
primary_nested.append(True)
|
||||
primary_column.append(c)
|
||||
primary_branch.append(d)
|
||||
elif primary_nested[depth] and primary_column[depth+i] != c:
|
||||
elif primary_nested[depth] and primary_column[depth + i] != c:
|
||||
Log.error("only one branch of tree allowed")
|
||||
else:
|
||||
primary_nested[depth+i] = True
|
||||
primary_column[depth+i] = c
|
||||
primary_branch[depth+i] = d
|
||||
primary_nested[depth + i] = True
|
||||
primary_column[depth + i] = c
|
||||
primary_branch[depth + i] = d
|
||||
|
||||
return c, join_field(col[i+1:])
|
||||
return c, join_field(col[i + 1 :])
|
||||
else:
|
||||
if len(primary_column) <= depth+i:
|
||||
if len(primary_column) <= depth + i:
|
||||
primary_nested.append(False)
|
||||
primary_column.append(c)
|
||||
primary_branch.append([d])
|
||||
|
@ -737,7 +694,7 @@ def drill_filter(esfilter, data):
|
|||
if filter["and"]:
|
||||
result = True
|
||||
output = FlatList()
|
||||
for a in filter[u"and"]:
|
||||
for a in filter["and"]:
|
||||
f = pe_filter(a, data, depth)
|
||||
if f is False:
|
||||
result = False
|
||||
|
@ -749,7 +706,7 @@ def drill_filter(esfilter, data):
|
|||
return result
|
||||
elif filter["or"]:
|
||||
output = FlatList()
|
||||
for o in filter[u"or"]:
|
||||
for o in filter["or"]:
|
||||
f = pe_filter(o, data, depth)
|
||||
if f is True:
|
||||
return True
|
||||
|
@ -843,7 +800,7 @@ def drill_filter(esfilter, data):
|
|||
else:
|
||||
return result
|
||||
elif filter.missing:
|
||||
if isinstance(filter.missing, text_type):
|
||||
if is_text(filter.missing):
|
||||
field = filter["missing"]
|
||||
else:
|
||||
field = filter["missing"]["field"]
|
||||
|
@ -863,7 +820,7 @@ def drill_filter(esfilter, data):
|
|||
first, rest = parse_field(col, data, depth)
|
||||
d = data[first]
|
||||
if not rest:
|
||||
if d==None or not d.startswith(val):
|
||||
if d == None or not d.startswith(val):
|
||||
result = False
|
||||
else:
|
||||
output[rest] = val
|
||||
|
@ -873,7 +830,7 @@ def drill_filter(esfilter, data):
|
|||
return result
|
||||
|
||||
elif filter.exists:
|
||||
if isinstance(filter["exists"], text_type):
|
||||
if is_text(filter["exists"]):
|
||||
field = filter["exists"]
|
||||
else:
|
||||
field = filter["exists"]["field"]
|
||||
|
@ -887,7 +844,7 @@ def drill_filter(esfilter, data):
|
|||
else:
|
||||
return {"exists": rest}
|
||||
else:
|
||||
Log.error(u"Can not interpret esfilter: {{esfilter}}", {u"esfilter": filter})
|
||||
Log.error("Can not interpret esfilter: {{esfilter}}", {"esfilter": filter})
|
||||
|
||||
output = [] # A LIST OF OBJECTS MAKING THROUGH THE FILTER
|
||||
|
||||
|
@ -912,7 +869,7 @@ def drill_filter(esfilter, data):
|
|||
|
||||
# OUTPUT
|
||||
for i, d in enumerate(data):
|
||||
if isinstance(d, Mapping):
|
||||
if is_data(d):
|
||||
main([], esfilter, wrap(d), 0)
|
||||
else:
|
||||
Log.error("filter is expecting a dict, not {{type}}", type=d.__class__)
|
||||
|
@ -927,6 +884,7 @@ def drill_filter(esfilter, data):
|
|||
# OUTPUT IS A LIST OF ROWS,
|
||||
# WHERE EACH ROW IS A LIST OF VALUES SEEN DURING A WALK DOWN A PATH IN THE HIERARCHY
|
||||
uniform_output = FlatList()
|
||||
|
||||
def recurse(row, depth):
|
||||
if depth == max:
|
||||
uniform_output.append(row)
|
||||
|
@ -957,21 +915,24 @@ def wrap_function(func):
|
|||
"""
|
||||
RETURN A THREE-PARAMETER WINDOW FUNCTION TO MATCH
|
||||
"""
|
||||
if isinstance(func, text_type):
|
||||
if is_text(func):
|
||||
return compile_expression(func)
|
||||
|
||||
numarg = func.__code__.co_argcount
|
||||
if numarg == 0:
|
||||
|
||||
def temp(row, rownum, rows):
|
||||
return func()
|
||||
|
||||
return temp
|
||||
elif numarg == 1:
|
||||
|
||||
def temp(row, rownum, rows):
|
||||
return func(row)
|
||||
|
||||
return temp
|
||||
elif numarg == 2:
|
||||
|
||||
def temp(row, rownum, rows):
|
||||
return func(row, rownum)
|
||||
|
||||
|
@ -985,13 +946,17 @@ def window(data, param):
|
|||
MAYBE WE CAN DO THIS WITH NUMPY (no, the edges of windows are not graceful with numpy)
|
||||
data - list of records
|
||||
"""
|
||||
name = param.name # column to assign window function result
|
||||
edges = param.edges # columns to gourp by
|
||||
where = param.where # DO NOT CONSIDER THESE VALUES
|
||||
sortColumns = param.sort # columns to sort by
|
||||
calc_value = jx_expression_to_function(param.value) # function that takes a record and returns a value (for aggregation)
|
||||
name = param.name # column to assign window function result
|
||||
edges = param.edges # columns to gourp by
|
||||
where = param.where # DO NOT CONSIDER THESE VALUES
|
||||
sortColumns = param.sort # columns to sort by
|
||||
calc_value = jx_expression_to_function(
|
||||
param.value
|
||||
) # function that takes a record and returns a value (for aggregation)
|
||||
aggregate = param.aggregate # WindowFunction to apply
|
||||
_range = param.range # of form {"min":-10, "max":0} to specify the size and relative position of window
|
||||
_range = (
|
||||
param.range
|
||||
) # of form {"min":-10, "max":0} to specify the size and relative position of window
|
||||
|
||||
data = filter(data, where)
|
||||
|
||||
|
@ -1014,7 +979,7 @@ def window(data, param):
|
|||
if not aggregate or aggregate == "none":
|
||||
for _, values in groupby(data, edge_values):
|
||||
if not values:
|
||||
continue # CAN DO NOTHING WITH THIS ZERO-SAMPLE
|
||||
continue # CAN DO NOTHING WITH THIS ZERO-SAMPLE
|
||||
|
||||
if sortColumns:
|
||||
sequence = sort(values, sortColumns, already_normalized=True)
|
||||
|
@ -1027,7 +992,7 @@ def window(data, param):
|
|||
|
||||
for keys, values in groupby(data, edge_values):
|
||||
if not values:
|
||||
continue # CAN DO NOTHING WITH THIS ZERO-SAMPLE
|
||||
continue # CAN DO NOTHING WITH THIS ZERO-SAMPLE
|
||||
|
||||
sequence = sort(values, sortColumns)
|
||||
|
||||
|
@ -1052,11 +1017,6 @@ def window(data, param):
|
|||
r["__temp__"] = None # CLEANUP
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def intervals(_min, _max=None, size=1):
|
||||
"""
|
||||
RETURN (min, max) PAIRS OF GIVEN SIZE, WHICH COVER THE _min, _max RANGE
|
||||
|
@ -1066,8 +1026,8 @@ def intervals(_min, _max=None, size=1):
|
|||
if _max == None:
|
||||
_max = _min
|
||||
_min = 0
|
||||
_max = int(Math.ceiling(_max))
|
||||
_min = int(Math.floor(_min))
|
||||
_max = int(mo_math.ceiling(_max))
|
||||
_min = int(mo_math.floor(_min))
|
||||
|
||||
output = ((x, min(x + size, _max)) for x in _range(_min, _max, size))
|
||||
return output
|
||||
|
@ -1076,10 +1036,10 @@ def intervals(_min, _max=None, size=1):
|
|||
def prefixes(vals):
|
||||
"""
|
||||
:param vals: iterable
|
||||
:return: vals[:1], vals[:1], ... , vals[:n]
|
||||
:return: vals[:1], vals[:2], ... , vals[:n]
|
||||
"""
|
||||
for i in range(len(vals)):
|
||||
yield vals[:i + 1]
|
||||
yield vals[: i + 1]
|
||||
|
||||
|
||||
def accumulate(vals):
|
||||
|
@ -1092,6 +1052,7 @@ def accumulate(vals):
|
|||
yield sum, v
|
||||
sum += v
|
||||
|
||||
|
||||
def reverse(vals):
|
||||
# TODO: Test how to do this fastest
|
||||
if not hasattr(vals, "len"):
|
||||
|
@ -1105,11 +1066,10 @@ def reverse(vals):
|
|||
|
||||
return wrap(output)
|
||||
|
||||
|
||||
def countdown(vals):
|
||||
remaining = len(vals) - 1
|
||||
return [(remaining - i, v) for i, v in enumerate(vals)]
|
||||
|
||||
|
||||
|
||||
|
||||
from jx_python.lists.aggs import is_aggs, list_aggs
|
||||
|
|
|
@ -7,12 +7,10 @@
|
|||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import division
|
||||
from __future__ import absolute_import
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from mo_future import is_text, is_binary
|
||||
import dataset
|
||||
|
||||
from jx_python.containers.Table_usingDataset import Table_usingDataset
|
||||
|
||||
|
||||
|
|
|
@ -7,23 +7,18 @@
|
|||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from mo_future import is_text, is_binary
|
||||
import itertools
|
||||
|
||||
from jx_base.query import _normalize_domain
|
||||
|
||||
from jx_base.domains import DefaultDomain, SimpleSetDomain
|
||||
from jx_python import windows
|
||||
from mo_dots import listwrap, wrap, coalesce
|
||||
from mo_logs import Log
|
||||
from mo_math import UNION
|
||||
|
||||
from jx_base.domains import SimpleSetDomain, DefaultDomain
|
||||
from jx_python.expression_compiler import compile_expression
|
||||
from jx_python.expressions import jx_expression_to_function
|
||||
from mo_collections.matrix import Matrix
|
||||
from mo_dots import coalesce, listwrap, wrap
|
||||
from mo_logs import Log
|
||||
from mo_math import UNION
|
||||
from mo_times.dates import Date
|
||||
|
||||
_ = Date
|
||||
|
@ -49,7 +44,7 @@ def list_aggs(frum, query):
|
|||
else:
|
||||
pass
|
||||
|
||||
s_accessors = [(ss.name, compile_expression(ss.value.to_python())) for ss in select]
|
||||
s_accessors = [(ss.name, jx_expression_to_function(ss.value)) for ss in select]
|
||||
|
||||
result = {
|
||||
s.name: Matrix(
|
||||
|
|
|
@ -7,28 +7,33 @@
|
|||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from collections import Mapping
|
||||
from datetime import date
|
||||
from datetime import datetime
|
||||
from decimal import Decimal
|
||||
from contextlib import contextmanager
|
||||
import sqlite3
|
||||
|
||||
import jx_base
|
||||
from jx_base import Column, Table
|
||||
from jx_base.schema import Schema
|
||||
from jx_python import jx
|
||||
from mo_collections import UniqueIndex
|
||||
from mo_dots import Data, concat_field, listwrap, unwraplist, NullType, FlatList, set_default, split_field, join_field, ROOT_PATH, wrap, coalesce
|
||||
from mo_future import none_type, text_type, long, PY2
|
||||
from mo_json.typed_encoder import untype_path, unnest_path, python_type_to_json_type, STRUCT
|
||||
from mo_logs import Log
|
||||
from mo_threads import Lock
|
||||
from mo_dots import Data, FlatList, Null, NullType, ROOT_PATH, concat_field, is_container, is_data, is_list, join_field, listwrap, split_field, unwraplist, wrap
|
||||
from mo_files import File
|
||||
from mo_future import items, none_type, reduce, text_type, binary_type
|
||||
from mo_json import (INTEGER, NUMBER, STRING, STRUCT, json2value, python_type_to_json_type, value2json)
|
||||
from mo_json.typed_encoder import unnest_path, untype_path
|
||||
from mo_logs import Except, Log
|
||||
from mo_threads import Lock, Queue, Thread, Till
|
||||
from mo_times.dates import Date
|
||||
from pyLibrary.sql import (SQL_AND, SQL_FROM, SQL_ORDERBY, SQL_SELECT, SQL_WHERE, sql_iso, sql_list)
|
||||
from pyLibrary.sql.sqlite import json_type_to_sqlite_type, quote_column, quote_value
|
||||
|
||||
DEBUG = False
|
||||
singlton = None
|
||||
db_table_name = quote_column("meta.columns")
|
||||
|
||||
INSERT, UPDATE, DELETE, EXECUTE = "insert", "update", "delete", "execute"
|
||||
|
||||
|
||||
class ColumnList(Table, jx_base.Container):
|
||||
|
@ -36,14 +41,235 @@ class ColumnList(Table, jx_base.Container):
|
|||
OPTIMIZED FOR THE PARTICULAR ACCESS PATTERNS USED
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
def __init__(self, name):
|
||||
Table.__init__(self, "meta.columns")
|
||||
self.db_file = File("metadata." + name + ".sqlite")
|
||||
self.data = {} # MAP FROM ES_INDEX TO (abs_column_name to COLUMNS)
|
||||
self.locker = Lock()
|
||||
self._schema = None
|
||||
self.extend(METADATA_COLUMNS)
|
||||
self.db = sqlite3.connect(
|
||||
database=self.db_file.abspath, check_same_thread=False, isolation_level=None
|
||||
)
|
||||
self.last_load = Null
|
||||
self.todo = Queue(
|
||||
"update columns to db"
|
||||
) # HOLD (action, column) PAIR, WHERE action in ['insert', 'update']
|
||||
self._db_load()
|
||||
Thread.run("update " + name, self._db_worker)
|
||||
|
||||
def find(self, es_index, abs_column_name):
|
||||
@contextmanager
|
||||
def _db_transaction(self):
|
||||
self.db.execute(str("BEGIN"))
|
||||
try:
|
||||
yield
|
||||
self.db.execute(str("COMMIT"))
|
||||
except Exception as e:
|
||||
e = Except.wrap(e)
|
||||
self.db.execute(str("ROLLBACK"))
|
||||
Log.error("Transaction failed", cause=e)
|
||||
|
||||
def _query(self, query):
|
||||
result = Data()
|
||||
curr = self.db.execute(query)
|
||||
result.meta.format = "table"
|
||||
result.header = [d[0] for d in curr.description] if curr.description else None
|
||||
result.data = curr.fetchall()
|
||||
return result
|
||||
|
||||
def _db_create(self):
|
||||
with self._db_transaction():
|
||||
self.db.execute(
|
||||
"CREATE TABLE "
|
||||
+ db_table_name
|
||||
+ sql_iso(
|
||||
sql_list(
|
||||
[
|
||||
quote_column(c.name)
|
||||
+ " "
|
||||
+ json_type_to_sqlite_type[c.jx_type]
|
||||
for c in METADATA_COLUMNS
|
||||
]
|
||||
+ [
|
||||
"PRIMARY KEY"
|
||||
+ sql_iso(
|
||||
sql_list(map(quote_column, ["es_index", "es_column"]))
|
||||
)
|
||||
]
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
for c in METADATA_COLUMNS:
|
||||
self._add(c)
|
||||
self._db_insert_column(c)
|
||||
|
||||
def _db_load(self):
|
||||
self.last_load = Date.now()
|
||||
|
||||
result = self._query(
|
||||
SQL_SELECT
|
||||
+ "name"
|
||||
+ SQL_FROM
|
||||
+ "sqlite_master"
|
||||
+ SQL_WHERE
|
||||
+ SQL_AND.join(["name=" + db_table_name, "type=" + quote_value("table")])
|
||||
)
|
||||
if not result.data:
|
||||
self._db_create()
|
||||
return
|
||||
|
||||
result = self._query(
|
||||
SQL_SELECT
|
||||
+ all_columns
|
||||
+ SQL_FROM
|
||||
+ db_table_name
|
||||
+ SQL_ORDERBY
|
||||
+ sql_list(map(quote_column, ["es_index", "name", "es_column"]))
|
||||
)
|
||||
|
||||
with self.locker:
|
||||
for r in result.data:
|
||||
c = row_to_column(result.header, r)
|
||||
self._add(c)
|
||||
|
||||
def _db_worker(self, please_stop):
|
||||
while not please_stop:
|
||||
try:
|
||||
with self._db_transaction():
|
||||
result = self._query(
|
||||
SQL_SELECT
|
||||
+ all_columns
|
||||
+ SQL_FROM
|
||||
+ db_table_name
|
||||
+ SQL_WHERE
|
||||
+ "last_updated > "
|
||||
+ quote_value(self.last_load)
|
||||
+ SQL_ORDERBY
|
||||
+ sql_list(map(quote_column, ["es_index", "name", "es_column"]))
|
||||
)
|
||||
|
||||
with self.locker:
|
||||
for r in result.data:
|
||||
c = row_to_column(result.header, r)
|
||||
self._add(c)
|
||||
if c.last_updated > self.last_load:
|
||||
self.last_load = c.last_updated
|
||||
|
||||
updates = self.todo.pop_all()
|
||||
DEBUG and updates and Log.note(
|
||||
"{{num}} columns to push to db", num=len(updates)
|
||||
)
|
||||
for action, column in updates:
|
||||
while not please_stop:
|
||||
try:
|
||||
with self._db_transaction():
|
||||
DEBUG and Log.note(
|
||||
"{{action}} db for {{table}}.{{column}}",
|
||||
action=action,
|
||||
table=column.es_index,
|
||||
column=column.es_column,
|
||||
)
|
||||
if action is EXECUTE:
|
||||
self.db.execute(column)
|
||||
elif action is UPDATE:
|
||||
self.db.execute(
|
||||
"UPDATE"
|
||||
+ db_table_name
|
||||
+ "SET"
|
||||
+ sql_list(
|
||||
[
|
||||
"count=" + quote_value(column.count),
|
||||
"cardinality="
|
||||
+ quote_value(column.cardinality),
|
||||
"multi=" + quote_value(column.multi),
|
||||
"partitions="
|
||||
+ quote_value(
|
||||
value2json(column.partitions)
|
||||
),
|
||||
"last_updated="
|
||||
+ quote_value(column.last_updated),
|
||||
]
|
||||
)
|
||||
+ SQL_WHERE
|
||||
+ SQL_AND.join(
|
||||
[
|
||||
"es_index = "
|
||||
+ quote_value(column.es_index),
|
||||
"es_column = "
|
||||
+ quote_value(column.es_column),
|
||||
"last_updated < "
|
||||
+ quote_value(column.last_updated),
|
||||
]
|
||||
)
|
||||
)
|
||||
elif action is DELETE:
|
||||
self.db.execute(
|
||||
"DELETE FROM"
|
||||
+ db_table_name
|
||||
+ SQL_WHERE
|
||||
+ SQL_AND.join(
|
||||
[
|
||||
"es_index = "
|
||||
+ quote_value(column.es_index),
|
||||
"es_column = "
|
||||
+ quote_value(column.es_column),
|
||||
]
|
||||
)
|
||||
)
|
||||
else:
|
||||
self._db_insert_column(column)
|
||||
break
|
||||
except Exception as e:
|
||||
e = Except.wrap(e)
|
||||
if "database is locked" in e:
|
||||
Log.note("metadata database is locked")
|
||||
Till(seconds=1).wait()
|
||||
break
|
||||
else:
|
||||
Log.warning("problem updataing database", cause=e)
|
||||
|
||||
except Exception as e:
|
||||
Log.warning("problem updating database", cause=e)
|
||||
|
||||
(Till(seconds=10) | please_stop).wait()
|
||||
|
||||
def _db_insert_column(self, column):
|
||||
try:
|
||||
self.db.execute(
|
||||
"INSERT INTO"
|
||||
+ db_table_name
|
||||
+ sql_iso(all_columns)
|
||||
+ "VALUES"
|
||||
+ sql_iso(
|
||||
sql_list(
|
||||
[
|
||||
quote_value(column[c.name])
|
||||
if c.name not in ("nested_path", "partitions")
|
||||
else quote_value(value2json(column[c.name]))
|
||||
for c in METADATA_COLUMNS
|
||||
]
|
||||
)
|
||||
)
|
||||
)
|
||||
except Exception as e:
|
||||
e = Except.wrap(e)
|
||||
if "UNIQUE constraint failed" in e or " are not unique" in e:
|
||||
# THIS CAN HAPPEN BECAUSE todo HAS OLD COLUMN DATA
|
||||
self.todo.add((UPDATE, column), force=True)
|
||||
else:
|
||||
Log.error("do not know how to handle", cause=e)
|
||||
|
||||
def __copy__(self):
|
||||
output = object.__new__(ColumnList)
|
||||
Table.__init__(output, "meta.columns")
|
||||
output.data = {
|
||||
t: {c: list(cs) for c, cs in dd.items()} for t, dd in self.data.items()
|
||||
}
|
||||
output.locker = Lock()
|
||||
output._schema = None
|
||||
return output
|
||||
|
||||
def find(self, es_index, abs_column_name=None):
|
||||
with self.locker:
|
||||
if es_index.startswith("meta."):
|
||||
self._update_meta()
|
||||
|
@ -62,19 +288,37 @@ class ColumnList(Table, jx_base.Container):
|
|||
def add(self, column):
|
||||
self.dirty = True
|
||||
with self.locker:
|
||||
return self._add(column)
|
||||
canonical = self._add(column)
|
||||
if canonical == None:
|
||||
return column # ALREADY ADDED
|
||||
self.todo.add((INSERT if canonical is column else UPDATE, canonical))
|
||||
return canonical
|
||||
|
||||
def remove_table(self, table_name):
|
||||
del self.data[table_name]
|
||||
|
||||
def _add(self, column):
|
||||
"""
|
||||
:param column: ANY COLUMN OBJECT
|
||||
:return: None IF column IS canonical ALREADY (NET-ZERO EFFECT)
|
||||
"""
|
||||
columns_for_table = self.data.setdefault(column.es_index, {})
|
||||
existing_columns = columns_for_table.setdefault(column.names["."], [])
|
||||
existing_columns = columns_for_table.setdefault(column.name, [])
|
||||
|
||||
for canonical in existing_columns:
|
||||
if canonical is column:
|
||||
return canonical
|
||||
return None
|
||||
if canonical.es_type == column.es_type:
|
||||
set_default(column.names, canonical.names)
|
||||
for key in Column.__slots__:
|
||||
canonical[key] = column[key]
|
||||
if column.last_updated > canonical.last_updated:
|
||||
for key in Column.__slots__:
|
||||
old_value = canonical[key]
|
||||
new_value = column[key]
|
||||
if new_value == None:
|
||||
pass # DO NOT BOTHER CLEARING OLD VALUES (LIKE cardinality AND paritiions)
|
||||
elif new_value == old_value:
|
||||
pass # NO NEED TO UPDATE WHEN NO CHANGE MADE (COMMON CASE)
|
||||
else:
|
||||
canonical[key] = new_value
|
||||
return canonical
|
||||
existing_columns.append(column)
|
||||
return column
|
||||
|
@ -90,18 +334,18 @@ class ColumnList(Table, jx_base.Container):
|
|||
objects = 0
|
||||
multi = 1
|
||||
for column in self._all_columns():
|
||||
value = column[mc.names["."]]
|
||||
value = column[mc.name]
|
||||
if value == None:
|
||||
pass
|
||||
else:
|
||||
count += 1
|
||||
if isinstance(value, list):
|
||||
if is_list(value):
|
||||
multi = max(multi, len(value))
|
||||
try:
|
||||
values |= set(value)
|
||||
except Exception:
|
||||
objects += len(value)
|
||||
elif isinstance(value, Mapping):
|
||||
elif is_data(value):
|
||||
objects += 1
|
||||
else:
|
||||
values.add(value)
|
||||
|
@ -126,25 +370,42 @@ class ColumnList(Table, jx_base.Container):
|
|||
return iter(self._all_columns())
|
||||
|
||||
def __len__(self):
|
||||
return self.data['meta.columns']['es_index'].count
|
||||
return self.data["meta.columns"]["es_index"].count
|
||||
|
||||
def update(self, command):
|
||||
self.dirty = True
|
||||
try:
|
||||
command = wrap(command)
|
||||
DEBUG and Log.note(
|
||||
"Update {{timestamp}}: {{command|json}}",
|
||||
command=command,
|
||||
timestamp=Date(command["set"].last_updated),
|
||||
)
|
||||
eq = command.where.eq
|
||||
if eq.es_index:
|
||||
all_columns = self.data.get(eq.es_index, {}).values()
|
||||
if len(eq) == 1:
|
||||
if unwraplist(command.clear) == ".":
|
||||
with self.locker:
|
||||
del self.data[eq.es_index]
|
||||
self.todo.add(
|
||||
(
|
||||
EXECUTE,
|
||||
"DELETE FROM "
|
||||
+ db_table_name
|
||||
+ SQL_WHERE
|
||||
+ " es_index="
|
||||
+ quote_value(eq.es_index),
|
||||
)
|
||||
)
|
||||
return
|
||||
|
||||
# FASTEST
|
||||
all_columns = self.data.get(eq.es_index, {}).values()
|
||||
with self.locker:
|
||||
columns = [
|
||||
c
|
||||
for cs in all_columns
|
||||
for c in cs
|
||||
]
|
||||
columns = [c for cs in all_columns for c in cs]
|
||||
elif eq.es_column and len(eq) == 2:
|
||||
# FASTER
|
||||
all_columns = self.data.get(eq.es_index, {}).values()
|
||||
with self.locker:
|
||||
columns = [
|
||||
c
|
||||
|
@ -155,12 +416,15 @@ class ColumnList(Table, jx_base.Container):
|
|||
|
||||
else:
|
||||
# SLOWER
|
||||
all_columns = self.data.get(eq.es_index, {}).values()
|
||||
with self.locker:
|
||||
columns = [
|
||||
c
|
||||
for cs in all_columns
|
||||
for c in cs
|
||||
if all(c[k] == v for k, v in eq.items()) # THIS LINE IS VERY SLOW
|
||||
if all(
|
||||
c[k] == v for k, v in eq.items()
|
||||
) # THIS LINE IS VERY SLOW
|
||||
]
|
||||
else:
|
||||
columns = list(self)
|
||||
|
@ -168,20 +432,30 @@ class ColumnList(Table, jx_base.Container):
|
|||
|
||||
with self.locker:
|
||||
for col in columns:
|
||||
DEBUG and Log.note(
|
||||
"update column {{table}}.{{column}}",
|
||||
table=col.es_index,
|
||||
column=col.es_column,
|
||||
)
|
||||
for k in command["clear"]:
|
||||
if k == ".":
|
||||
self.todo.add((DELETE, col))
|
||||
lst = self.data[col.es_index]
|
||||
cols = lst[col.names['.']]
|
||||
cols = lst[col.name]
|
||||
cols.remove(col)
|
||||
if len(cols) == 0:
|
||||
del lst[col.names['.']]
|
||||
del lst[col.name]
|
||||
if len(lst) == 0:
|
||||
del self.data[col.es_index]
|
||||
break
|
||||
else:
|
||||
col[k] = None
|
||||
else:
|
||||
# DID NOT DELETE COLUMNM ("."), CONTINUE TO SET PROPERTIES
|
||||
for k, v in command.set.items():
|
||||
col[k] = v
|
||||
self.todo.add((UPDATE, col))
|
||||
|
||||
for k, v in command.set.items():
|
||||
col[k] = v
|
||||
except Exception as e:
|
||||
Log.error("should not happen", cause=e)
|
||||
|
||||
|
@ -191,10 +465,13 @@ class ColumnList(Table, jx_base.Container):
|
|||
with self.locker:
|
||||
self._update_meta()
|
||||
if not self._schema:
|
||||
self._schema = Schema(".", [c for cs in self.data["meta.columns"].values() for c in cs])
|
||||
self._schema = Schema(
|
||||
".", [c for cs in self.data["meta.columns"].values() for c in cs]
|
||||
)
|
||||
snapshot = self._all_columns()
|
||||
|
||||
from jx_python.containers.list_usingPythonList import ListContainer
|
||||
|
||||
query.frum = ListContainer("meta.columns", snapshot, self._schema)
|
||||
return jx.run(query)
|
||||
|
||||
|
@ -208,7 +485,9 @@ class ColumnList(Table, jx_base.Container):
|
|||
if not self._schema:
|
||||
with self.locker:
|
||||
self._update_meta()
|
||||
self._schema = Schema(".", [c for cs in self.data["meta.columns"].values() for c in cs])
|
||||
self._schema = Schema(
|
||||
".", [c for cs in self.data["meta.columns"].values() for c in cs]
|
||||
)
|
||||
return self._schema
|
||||
|
||||
@property
|
||||
|
@ -229,8 +508,8 @@ class ColumnList(Table, jx_base.Container):
|
|||
self._update_meta()
|
||||
output = [
|
||||
{
|
||||
"table": concat_field(c.es_index, untype_path(table)),
|
||||
"name": untype_path(name),
|
||||
"table": c.es_index,
|
||||
"name": untype_path(c.name),
|
||||
"cardinality": c.cardinality,
|
||||
"es_column": c.es_column,
|
||||
"es_index": c.es_index,
|
||||
|
@ -238,23 +517,20 @@ class ColumnList(Table, jx_base.Container):
|
|||
"count": c.count,
|
||||
"nested_path": [unnest_path(n) for n in c.nested_path],
|
||||
"es_type": c.es_type,
|
||||
"type": c.jx_type
|
||||
"type": c.jx_type,
|
||||
}
|
||||
for tname, css in self.data.items()
|
||||
for cname, cs in css.items()
|
||||
for c in cs
|
||||
if c.jx_type not in STRUCT # and c.es_column != "_id"
|
||||
for table, name in c.names.items()
|
||||
]
|
||||
|
||||
from jx_python.containers.list_usingPythonList import ListContainer
|
||||
|
||||
return ListContainer(
|
||||
self.name,
|
||||
data=output,
|
||||
schema=jx_base.Schema(
|
||||
"meta.columns",
|
||||
SIMPLE_METADATA_COLUMNS
|
||||
)
|
||||
schema=jx_base.Schema("meta.columns", SIMPLE_METADATA_COLUMNS),
|
||||
)
|
||||
|
||||
|
||||
|
@ -262,7 +538,7 @@ def get_schema_from_list(table_name, frum):
|
|||
"""
|
||||
SCAN THE LIST FOR COLUMN TYPES
|
||||
"""
|
||||
columns = UniqueIndex(keys=("names.\\.",))
|
||||
columns = UniqueIndex(keys=("name",))
|
||||
_get_schema_from_list(frum, ".", parent=".", nested_path=ROOT_PATH, columns=columns)
|
||||
return Schema(table_name=table_name, columns=list(columns))
|
||||
|
||||
|
@ -271,277 +547,205 @@ def _get_schema_from_list(frum, table_name, parent, nested_path, columns):
|
|||
"""
|
||||
:param frum: The list
|
||||
:param table_name: Name of the table this list holds records for
|
||||
:param prefix_path: parent path
|
||||
:param parent: parent path
|
||||
:param nested_path: each nested array, in reverse order
|
||||
:param columns: map from full name to column definition
|
||||
:return:
|
||||
"""
|
||||
|
||||
for d in frum:
|
||||
row_type = _type_to_name[d.__class__]
|
||||
row_type = python_type_to_json_type[d.__class__]
|
||||
|
||||
if row_type != "object":
|
||||
# EXPECTING PRIMITIVE VALUE
|
||||
full_name = parent
|
||||
column = columns[full_name]
|
||||
if not column:
|
||||
column = Column(
|
||||
names={table_name: full_name},
|
||||
name=concat_field(table_name, full_name),
|
||||
es_column=full_name,
|
||||
es_index=".",
|
||||
jx_type=python_type_to_json_type[d.__class__],
|
||||
es_type=row_type,
|
||||
nested_path=nested_path
|
||||
es_type=d.__class__.__name__,
|
||||
jx_type=None, # WILL BE SET BELOW
|
||||
last_updated=Date.now(),
|
||||
nested_path=nested_path,
|
||||
)
|
||||
columns.add(column)
|
||||
column.es_type = _merge_type[column.es_type][row_type]
|
||||
column.jx_type = _merge_type[coalesce(column.jx_type, "undefined")][row_type]
|
||||
column.es_type = _merge_python_type(column.es_type, d.__class__)
|
||||
column.jx_type = python_type_to_json_type[column.es_type]
|
||||
else:
|
||||
for name, value in d.items():
|
||||
full_name = concat_field(parent, name)
|
||||
column = columns[full_name]
|
||||
if not column:
|
||||
column = Column(
|
||||
names={table_name: full_name},
|
||||
name=concat_field(table_name, full_name),
|
||||
es_column=full_name,
|
||||
es_index=".",
|
||||
es_type="undefined",
|
||||
nested_path=nested_path
|
||||
es_type=value.__class__.__name__,
|
||||
jx_type=None, # WILL BE SET BELOW
|
||||
last_updated=Date.now(),
|
||||
nested_path=nested_path,
|
||||
)
|
||||
columns.add(column)
|
||||
if isinstance(value, (list, set)): # GET TYPE OF MULTIVALUE
|
||||
if is_container(value): # GET TYPE OF MULTIVALUE
|
||||
v = list(value)
|
||||
if len(v) == 0:
|
||||
this_type = "undefined"
|
||||
this_type = none_type.__name__
|
||||
elif len(v) == 1:
|
||||
this_type = _type_to_name[v[0].__class__]
|
||||
this_type = v[0].__class__.__name__
|
||||
else:
|
||||
this_type = _type_to_name[v[0].__class__]
|
||||
if this_type == "object":
|
||||
this_type = "nested"
|
||||
this_type = reduce(
|
||||
_merge_python_type, (vi.__class__.__name__ for vi in value)
|
||||
)
|
||||
else:
|
||||
this_type = _type_to_name[value.__class__]
|
||||
new_type = _merge_type[column.es_type][this_type]
|
||||
column.es_type = new_type
|
||||
this_type = value.__class__.__name__
|
||||
column.es_type = _merge_python_type(column.es_type, this_type)
|
||||
column.jx_type = python_type_to_json_type[column.es_type]
|
||||
|
||||
if this_type == "object":
|
||||
_get_schema_from_list([value], table_name, full_name, nested_path, columns)
|
||||
elif this_type == "nested":
|
||||
if this_type in {"object", "dict", "Mapping", "Data"}:
|
||||
_get_schema_from_list(
|
||||
[value], table_name, full_name, nested_path, columns
|
||||
)
|
||||
elif this_type in {"list", "FlatList"}:
|
||||
np = listwrap(nested_path)
|
||||
newpath = unwraplist([join_field(split_field(np[0]) + [name])] + np)
|
||||
_get_schema_from_list(value, table_name, full_name, newpath, columns)
|
||||
_get_schema_from_list(
|
||||
value, table_name, full_name, newpath, columns
|
||||
)
|
||||
|
||||
|
||||
METADATA_COLUMNS = (
|
||||
[
|
||||
Column(
|
||||
names={".": c},
|
||||
name=c,
|
||||
es_index="meta.columns",
|
||||
es_column=c,
|
||||
es_type="string",
|
||||
nested_path=ROOT_PATH
|
||||
es_type="keyword",
|
||||
jx_type=STRING,
|
||||
last_updated=Date.now(),
|
||||
nested_path=ROOT_PATH,
|
||||
)
|
||||
for c in ["es_type", "jx_type", "nested_path", "es_column", "es_index"]
|
||||
] + [
|
||||
for c in [
|
||||
"name",
|
||||
"es_type",
|
||||
"jx_type",
|
||||
"nested_path",
|
||||
"es_column",
|
||||
"es_index",
|
||||
"partitions",
|
||||
]
|
||||
]
|
||||
+ [
|
||||
Column(
|
||||
es_index="meta.columns",
|
||||
names={".": c},
|
||||
es_column=c,
|
||||
es_type="object",
|
||||
nested_path=ROOT_PATH
|
||||
)
|
||||
for c in ["names", "partitions"]
|
||||
] + [
|
||||
Column(
|
||||
names={".": c},
|
||||
name=c,
|
||||
es_index="meta.columns",
|
||||
es_column=c,
|
||||
es_type="long",
|
||||
nested_path=ROOT_PATH
|
||||
es_type="integer",
|
||||
jx_type=INTEGER,
|
||||
last_updated=Date.now(),
|
||||
nested_path=ROOT_PATH,
|
||||
)
|
||||
for c in ["count", "cardinality", "multi"]
|
||||
] + [
|
||||
]
|
||||
+ [
|
||||
Column(
|
||||
names={".": "last_updated"},
|
||||
name="last_updated",
|
||||
es_index="meta.columns",
|
||||
es_column="last_updated",
|
||||
es_type="time",
|
||||
nested_path=ROOT_PATH
|
||||
es_type="double",
|
||||
jx_type=NUMBER,
|
||||
last_updated=Date.now(),
|
||||
nested_path=ROOT_PATH,
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
SIMPLE_METADATA_COLUMNS = (
|
||||
|
||||
def row_to_column(header, row):
|
||||
return Column(
|
||||
**{
|
||||
h: c
|
||||
if c is None or h not in ("nested_path", "partitions")
|
||||
else json2value(c)
|
||||
for h, c in zip(header, row)
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
all_columns = sql_list([quote_column(c.name) for c in METADATA_COLUMNS])
|
||||
|
||||
|
||||
SIMPLE_METADATA_COLUMNS = ( # FOR PURLY INTERNAL PYTHON LISTS, NOT MAPPING TO ANOTHER DATASTORE
|
||||
[
|
||||
Column(
|
||||
names={".": c},
|
||||
name=c,
|
||||
es_index="meta.columns",
|
||||
es_column=c,
|
||||
es_type="string",
|
||||
nested_path=ROOT_PATH
|
||||
jx_type=STRING,
|
||||
last_updated=Date.now(),
|
||||
nested_path=ROOT_PATH,
|
||||
)
|
||||
for c in ["table", "name", "type", "nested_path"]
|
||||
] + [
|
||||
]
|
||||
+ [
|
||||
Column(
|
||||
names={".": c},
|
||||
name=c,
|
||||
es_index="meta.columns",
|
||||
es_column=c,
|
||||
es_type="long",
|
||||
nested_path=ROOT_PATH
|
||||
jx_type=INTEGER,
|
||||
last_updated=Date.now(),
|
||||
nested_path=ROOT_PATH,
|
||||
)
|
||||
for c in ["count", "cardinality", "multi"]
|
||||
] + [
|
||||
]
|
||||
+ [
|
||||
Column(
|
||||
names={".": "last_updated"},
|
||||
name="last_updated",
|
||||
es_index="meta.columns",
|
||||
es_column="last_updated",
|
||||
es_type="time",
|
||||
nested_path=ROOT_PATH
|
||||
jx_type=NUMBER,
|
||||
last_updated=Date.now(),
|
||||
nested_path=ROOT_PATH,
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
_type_to_name = {
|
||||
none_type: "undefined",
|
||||
NullType: "undefined",
|
||||
bool: "boolean",
|
||||
str: "string",
|
||||
text_type: "string",
|
||||
int: "integer",
|
||||
float: "double",
|
||||
Data: "object",
|
||||
dict: "object",
|
||||
set: "nested",
|
||||
list: "nested",
|
||||
FlatList: "nested",
|
||||
Date: "double",
|
||||
Decimal: "double",
|
||||
datetime: "double",
|
||||
date: "double"
|
||||
_merge_order = {
|
||||
none_type: 0,
|
||||
NullType: 1,
|
||||
bool: 2,
|
||||
int: 3,
|
||||
Date: 4,
|
||||
float: 5,
|
||||
text_type: 6,
|
||||
binary_type: 6,
|
||||
object: 7,
|
||||
dict: 8,
|
||||
Mapping: 9,
|
||||
Data: 10,
|
||||
list: 11,
|
||||
FlatList: 12,
|
||||
}
|
||||
|
||||
if PY2:
|
||||
_type_to_name[long] = "integer"
|
||||
for k, v in items(_merge_order):
|
||||
_merge_order[k.__name__] = v
|
||||
|
||||
_merge_type = {
|
||||
"undefined": {
|
||||
"undefined": "undefined",
|
||||
"boolean": "boolean",
|
||||
"integer": "integer",
|
||||
"long": "long",
|
||||
"float": "float",
|
||||
"double": "double",
|
||||
"number": "number",
|
||||
"string": "string",
|
||||
"object": "object",
|
||||
"nested": "nested"
|
||||
},
|
||||
"boolean": {
|
||||
"undefined": "boolean",
|
||||
"boolean": "boolean",
|
||||
"integer": "integer",
|
||||
"long": "long",
|
||||
"float": "float",
|
||||
"double": "double",
|
||||
"number": "number",
|
||||
"string": "string",
|
||||
"object": None,
|
||||
"nested": None
|
||||
},
|
||||
"integer": {
|
||||
"undefined": "integer",
|
||||
"boolean": "integer",
|
||||
"integer": "integer",
|
||||
"long": "long",
|
||||
"float": "float",
|
||||
"double": "double",
|
||||
"number": "number",
|
||||
"string": "string",
|
||||
"object": None,
|
||||
"nested": None
|
||||
},
|
||||
"long": {
|
||||
"undefined": "long",
|
||||
"boolean": "long",
|
||||
"integer": "long",
|
||||
"long": "long",
|
||||
"float": "double",
|
||||
"double": "double",
|
||||
"number": "number",
|
||||
"string": "string",
|
||||
"object": None,
|
||||
"nested": None
|
||||
},
|
||||
"float": {
|
||||
"undefined": "float",
|
||||
"boolean": "float",
|
||||
"integer": "float",
|
||||
"long": "double",
|
||||
"float": "float",
|
||||
"double": "double",
|
||||
"number": "number",
|
||||
"string": "string",
|
||||
"object": None,
|
||||
"nested": None
|
||||
},
|
||||
"double": {
|
||||
"undefined": "double",
|
||||
"boolean": "double",
|
||||
"integer": "double",
|
||||
"long": "double",
|
||||
"float": "double",
|
||||
"double": "double",
|
||||
"number": "number",
|
||||
"string": "string",
|
||||
"object": None,
|
||||
"nested": None
|
||||
},
|
||||
"number": {
|
||||
"undefined": "number",
|
||||
"boolean": "number",
|
||||
"integer": "number",
|
||||
"long": "number",
|
||||
"float": "number",
|
||||
"double": "number",
|
||||
"number": "number",
|
||||
"string": "string",
|
||||
"object": None,
|
||||
"nested": None
|
||||
},
|
||||
"string": {
|
||||
"undefined": "string",
|
||||
"boolean": "string",
|
||||
"integer": "string",
|
||||
"long": "string",
|
||||
"float": "string",
|
||||
"double": "string",
|
||||
"number": "string",
|
||||
"string": "string",
|
||||
"object": None,
|
||||
"nested": None
|
||||
},
|
||||
"object": {
|
||||
"undefined": "object",
|
||||
"boolean": None,
|
||||
"integer": None,
|
||||
"long": None,
|
||||
"float": None,
|
||||
"double": None,
|
||||
"number": None,
|
||||
"string": None,
|
||||
"object": "object",
|
||||
"nested": "nested"
|
||||
},
|
||||
"nested": {
|
||||
"undefined": "nested",
|
||||
"boolean": None,
|
||||
"integer": None,
|
||||
"long": None,
|
||||
"float": None,
|
||||
"double": None,
|
||||
"number": None,
|
||||
"string": None,
|
||||
"object": "nested",
|
||||
"nested": "nested"
|
||||
}
|
||||
}
|
||||
|
||||
def _merge_python_type(A, B):
|
||||
a = _merge_order[A]
|
||||
b = _merge_order[B]
|
||||
|
||||
if a >= b:
|
||||
output = A
|
||||
else:
|
||||
output = B
|
||||
|
||||
if isinstance(output, str):
|
||||
return output
|
||||
else:
|
||||
return output.__name__
|
||||
|
|
|
@ -7,26 +7,21 @@
|
|||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from collections import Mapping
|
||||
from mo_future import is_text, is_binary
|
||||
from copy import copy
|
||||
|
||||
from mo_dots import Data
|
||||
from mo_dots import FlatList
|
||||
from mo_dots import coalesce, Null
|
||||
from mo_dots import wrap, listwrap
|
||||
from mo_logs import Log
|
||||
from mo_math import Math
|
||||
|
||||
from jx_base.dimensions import Dimension
|
||||
from jx_base.domains import Domain
|
||||
from jx_base.query import QueryOp, get_all_vars
|
||||
from jx_python.containers import Container
|
||||
from jx_python.expressions import TRUE
|
||||
from jx_python.namespace import Namespace, convert_list
|
||||
from jx_base.query import QueryOp, get_all_vars
|
||||
from mo_dots import Data, FlatList, Null, coalesce, is_data, is_list, listwrap, wrap
|
||||
from mo_future import text_type
|
||||
from mo_logs import Log
|
||||
import mo_math
|
||||
|
||||
DEFAULT_LIMIT = 10
|
||||
|
||||
|
@ -37,7 +32,7 @@ class Normal(Namespace):
|
|||
"""
|
||||
|
||||
def convert(self, expr):
|
||||
if isinstance(expr, Mapping) and expr["from"]:
|
||||
if is_data(expr) and expr["from"]:
|
||||
return self._convert_query(expr)
|
||||
return expr
|
||||
|
||||
|
@ -47,7 +42,7 @@ class Normal(Namespace):
|
|||
# Log.error('Expecting from clause to be a Container')
|
||||
query = wrap(query)
|
||||
|
||||
output = QueryOp("from", None)
|
||||
output = QueryOp(None)
|
||||
output["from"] = self._convert_from(query["from"])
|
||||
|
||||
output.format = query.format
|
||||
|
@ -77,7 +72,7 @@ class Normal(Namespace):
|
|||
output.sort = self._convert_sort(query.sort)
|
||||
|
||||
output.limit = coalesce(query.limit, DEFAULT_LIMIT)
|
||||
if not Math.is_integer(output.limit) or output.limit < 0:
|
||||
if not mo_math.is_integer(output.limit) or output.limit < 0:
|
||||
Log.error("Expecting limit >= 0")
|
||||
|
||||
output.isLean = query.isLean
|
||||
|
@ -94,15 +89,15 @@ class Normal(Namespace):
|
|||
return output
|
||||
|
||||
def _convert_from(self, frum):
|
||||
if isinstance(frum, text_type):
|
||||
if is_text(frum):
|
||||
return Data(name=frum)
|
||||
elif isinstance(frum, (Container, QueryOp)):
|
||||
elif is_op(frum, (Container, Variable)):
|
||||
return frum
|
||||
else:
|
||||
Log.error("Expecting from clause to be a name, or a container")
|
||||
|
||||
def _convert_select(self, select):
|
||||
if isinstance(select, text_type):
|
||||
if is_text(select):
|
||||
return Data(
|
||||
name=select.rstrip("."), # TRAILING DOT INDICATES THE VALUE, BUT IS INVALID FOR THE NAME
|
||||
value=select,
|
||||
|
@ -111,7 +106,7 @@ class Normal(Namespace):
|
|||
else:
|
||||
select = wrap(select)
|
||||
output = copy(select)
|
||||
if not select.value or isinstance(select.value, text_type):
|
||||
if not select.value or is_text(select.value):
|
||||
if select.value == ".":
|
||||
output.name = coalesce(select.name, select.aggregate)
|
||||
else:
|
||||
|
@ -126,7 +121,7 @@ class Normal(Namespace):
|
|||
return output
|
||||
|
||||
def _convert_edge(self, edge):
|
||||
if isinstance(edge, text_type):
|
||||
if is_text(edge):
|
||||
return Data(
|
||||
name=edge,
|
||||
value=edge,
|
||||
|
@ -134,10 +129,10 @@ class Normal(Namespace):
|
|||
)
|
||||
else:
|
||||
edge = wrap(edge)
|
||||
if not edge.name and not isinstance(edge.value, text_type):
|
||||
if not edge.name and not is_text(edge.value):
|
||||
Log.error("You must name compound edges: {{edge}}", edge= edge)
|
||||
|
||||
if isinstance(edge.value, (Mapping, list)) and not edge.domain:
|
||||
if edge.value.__class__ in (Data, dict, list, FlatList) and not edge.domain:
|
||||
# COMPLEX EDGE IS SHORT HAND
|
||||
domain =self._convert_domain()
|
||||
domain.dimension = Data(fields=edge.value)
|
||||
|
@ -158,7 +153,7 @@ class Normal(Namespace):
|
|||
)
|
||||
|
||||
def _convert_group(self, column):
|
||||
if isinstance(column, text_type):
|
||||
if is_text(column):
|
||||
return wrap({
|
||||
"name": column,
|
||||
"value": column,
|
||||
|
@ -169,7 +164,7 @@ class Normal(Namespace):
|
|||
if (column.domain and column.domain.type != "default") or column.allowNulls != None:
|
||||
Log.error("groupby does not accept complicated domains")
|
||||
|
||||
if not column.name and not isinstance(column.value, text_type):
|
||||
if not column.name and not is_text(column.value):
|
||||
Log.error("You must name compound edges: {{edge}}", edge= column)
|
||||
|
||||
return wrap({
|
||||
|
@ -191,7 +186,7 @@ class Normal(Namespace):
|
|||
domain = domain.copy()
|
||||
domain.name = domain.type
|
||||
|
||||
if not isinstance(domain.partitions, list):
|
||||
if not is_list(domain.partitions):
|
||||
domain.partitions = list(domain.partitions)
|
||||
|
||||
return Domain(**domain)
|
||||
|
@ -237,7 +232,7 @@ def normalize_sort(sort=None):
|
|||
|
||||
output = FlatList()
|
||||
for s in listwrap(sort):
|
||||
if isinstance(s, text_type) or Math.is_integer(s):
|
||||
if is_text(s) or mo_math.is_integer(s):
|
||||
output.append({"value": s, "sort": 1})
|
||||
elif not s.field and not s.value and s.sort==None:
|
||||
#ASSUME {name: sort} FORM
|
||||
|
@ -255,8 +250,7 @@ sort_direction = {
|
|||
1: 1,
|
||||
0: 0,
|
||||
-1: -1,
|
||||
None: 1,
|
||||
Null: 1
|
||||
None: 1
|
||||
}
|
||||
|
||||
canonical_aggregates = {
|
||||
|
|
|
@ -7,22 +7,20 @@
|
|||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from collections import Mapping
|
||||
from copy import copy
|
||||
|
||||
from mo_dots import set_default, wrap, coalesce, Data, listwrap, unwraplist
|
||||
from mo_logs import Log
|
||||
from mo_math import Math
|
||||
from mo_times.dates import Date
|
||||
|
||||
from jx_base.dimensions import Dimension
|
||||
from jx_base.queries import is_variable_name
|
||||
from jx_python.namespace import Namespace, convert_list
|
||||
from jx_base.utils import is_variable_name
|
||||
from jx_base.query import QueryOp
|
||||
from jx_base.language import is_op
|
||||
from jx_python.namespace import Namespace, convert_list
|
||||
from mo_dots import Data, coalesce, is_data, is_list, listwrap, set_default, unwraplist, wrap, is_many
|
||||
from mo_future import is_text
|
||||
from mo_logs import Log
|
||||
from mo_math import is_number
|
||||
from mo_times.dates import Date
|
||||
|
||||
|
||||
class Rename(Namespace):
|
||||
|
@ -32,7 +30,7 @@ class Rename(Namespace):
|
|||
EXPECTING A LIST OF {"name":name, "value":value} OBJECTS TO PERFORM A MAPPING
|
||||
"""
|
||||
dimensions = wrap(dimensions)
|
||||
if isinstance(dimensions, Mapping) and dimensions.name == None:
|
||||
if is_data(dimensions) and dimensions.name == None:
|
||||
# CONVERT TO A REAL DIMENSION DEFINITION
|
||||
dimensions = {"name": ".", "type": "set", "edges":[{"name": k, "field": v} for k, v in dimensions.items()]}
|
||||
|
||||
|
@ -44,19 +42,19 @@ class Rename(Namespace):
|
|||
"""
|
||||
if expr is True or expr == None or expr is False:
|
||||
return expr
|
||||
elif Math.is_number(expr):
|
||||
elif is_number(expr):
|
||||
return expr
|
||||
elif expr == ".":
|
||||
return "."
|
||||
elif is_variable_name(expr):
|
||||
return coalesce(self.dimensions[expr], expr)
|
||||
elif isinstance(expr, text_type):
|
||||
elif is_text(expr):
|
||||
Log.error("{{name|quote}} is not a valid variable name", name=expr)
|
||||
elif isinstance(expr, Date):
|
||||
return expr
|
||||
elif isinstance(expr, QueryOp):
|
||||
elif is_op(expr, QueryOp):
|
||||
return self._convert_query(expr)
|
||||
elif isinstance(expr, Mapping):
|
||||
elif is_data(expr):
|
||||
if expr["from"]:
|
||||
return self._convert_query(expr)
|
||||
elif len(expr) >= 2:
|
||||
|
@ -66,7 +64,7 @@ class Rename(Namespace):
|
|||
# ASSUME SINGLE-CLAUSE EXPRESSION
|
||||
k, v = expr.items()[0]
|
||||
return converter_map.get(k, self._convert_bop)(self, k, v)
|
||||
elif isinstance(expr, (list, set, tuple)):
|
||||
elif is_many(expr):
|
||||
return wrap([self.convert(value) for value in expr])
|
||||
else:
|
||||
return expr
|
||||
|
@ -88,7 +86,7 @@ class Rename(Namespace):
|
|||
|
||||
|
||||
def _convert_bop(self, op, term):
|
||||
if isinstance(term, list):
|
||||
if is_list(term):
|
||||
return {op: map(self.convert, term)}
|
||||
|
||||
return {op: {self.convert(var): val for var, val in term.items()}}
|
||||
|
@ -97,7 +95,7 @@ class Rename(Namespace):
|
|||
return {k: map(self.convert, v)}
|
||||
|
||||
def _convert_from(self, frum):
|
||||
if isinstance(frum, Mapping):
|
||||
if is_data(frum):
|
||||
return Data(name=self.convert(frum.name))
|
||||
else:
|
||||
return self.convert(frum)
|
||||
|
@ -126,7 +124,7 @@ class Rename(Namespace):
|
|||
|
||||
if clause == None:
|
||||
return None
|
||||
elif isinstance(clause, Mapping):
|
||||
elif is_data(clause):
|
||||
return set_default({"value": self.convert(clause.value)}, clause)
|
||||
else:
|
||||
return [set_default({"value": self.convert(c.value)}, c) for c in clause]
|
||||
|
|
|
@ -7,9 +7,9 @@
|
|||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import division
|
||||
from __future__ import absolute_import
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from mo_future import is_text, is_binary
|
||||
from mo_dots import listwrap
|
||||
|
||||
|
||||
|
|
|
@ -7,10 +7,9 @@
|
|||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import division
|
||||
from __future__ import absolute_import
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from mo_future import is_text, is_binary
|
||||
import jx_base
|
||||
from mo_dots import Data
|
||||
|
||||
|
|
|
@ -8,20 +8,16 @@
|
|||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
import functools
|
||||
from copy import copy
|
||||
import functools
|
||||
|
||||
import mo_math
|
||||
from mo_collections.multiset import Multiset
|
||||
from mo_dots.lists import FlatList
|
||||
from mo_dots import FlatList
|
||||
from mo_logs import Log
|
||||
from mo_math import MIN
|
||||
from mo_math import Math
|
||||
from mo_math import stats
|
||||
import mo_math
|
||||
from mo_math import MIN, stats
|
||||
from mo_math.stats import ZeroMoment, ZeroMoment2Stats
|
||||
|
||||
|
||||
|
@ -147,7 +143,7 @@ class _Stats(WindowFunction):
|
|||
Log.error("Do not know how to handle")
|
||||
|
||||
def end(self):
|
||||
ignore = Math.ceiling(len(self.samples) * (1 - self.middle) / 2)
|
||||
ignore = mo_math.ceiling(len(self.samples) * (1 - self.middle) / 2)
|
||||
if ignore * 2 >= len(self.samples):
|
||||
return stats.Stats()
|
||||
output = stats.Stats(samples=sorted(self.samples)[ignore:len(self.samples) - ignore:])
|
||||
|
|
|
@ -7,13 +7,12 @@
|
|||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from mo_future import is_text, is_binary
|
||||
from mo_collections.unique_index import UniqueIndex
|
||||
|
||||
|
||||
def reverse(values):
|
||||
"""
|
||||
REVERSE - WITH NO SIDE EFFECTS!
|
||||
|
|
|
@ -11,10 +11,9 @@
|
|||
# REPLACE NUMPY ARRAY FUNCTIONS
|
||||
# THIS CODE IS FASTER THAN NUMPY WHEN USING PYPY *AND* THE ARRAYS ARE SMALL
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from mo_future import is_text, is_binary
|
||||
from mo_logs import Log
|
||||
|
||||
|
||||
|
@ -100,6 +99,7 @@ MORE_MATH = {
|
|||
"subtract": lambda a, b: a - b,
|
||||
"sub": lambda a, b: a - b,
|
||||
"multiply": lambda a, b: a * b,
|
||||
"mul": lambda a, b: a * b,
|
||||
"mult": lambda a, b: a * b,
|
||||
"divide": lambda a, b: a / b,
|
||||
"div": lambda a, b: a / b
|
||||
|
|
|
@ -8,14 +8,11 @@
|
|||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from collections import Mapping
|
||||
from copy import copy
|
||||
|
||||
from mo_dots import wrap, unwrap, tuplewrap, get_attr
|
||||
from mo_dots import get_attr, is_data, is_sequence, tuplewrap, unwrap, wrap
|
||||
from mo_logs import Log
|
||||
|
||||
|
||||
|
@ -36,7 +33,7 @@ class Index(object):
|
|||
|
||||
def __getitem__(self, key):
|
||||
try:
|
||||
if isinstance(key, (list, tuple)) and len(key) < len(self._keys):
|
||||
if is_sequence(key) and len(key) < len(self._keys):
|
||||
# RETURN ANOTHER Index
|
||||
raise NotImplementedError()
|
||||
|
||||
|
@ -67,7 +64,7 @@ class Index(object):
|
|||
|
||||
def _test_contains(self, key):
|
||||
try:
|
||||
if isinstance(key, (list, tuple)) and len(key) < len(self._keys):
|
||||
if is_sequence(key) and len(key) < len(self._keys):
|
||||
# RETURN ANOTHER Index
|
||||
length = len(key)
|
||||
key = value2key(self._keys[0:length:], key)
|
||||
|
@ -158,15 +155,15 @@ class Index(object):
|
|||
|
||||
def value2key(keys, val):
|
||||
if len(keys) == 1:
|
||||
if isinstance(val, Mapping):
|
||||
if is_data(val):
|
||||
return get_attr(val, keys[0]),
|
||||
elif isinstance(val, (list, tuple)):
|
||||
elif is_sequence(val):
|
||||
return val[0],
|
||||
return val,
|
||||
else:
|
||||
if isinstance(val, Mapping):
|
||||
if is_data(val):
|
||||
return tuple(val[k] for k in keys)
|
||||
elif isinstance(val, (list, tuple)):
|
||||
elif is_sequence(val):
|
||||
return tuple(val)
|
||||
else:
|
||||
Log.error("do not know what to do here")
|
||||
|
|
|
@ -7,15 +7,11 @@
|
|||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from mo_future import text_type, xrange, transpose
|
||||
from mo_dots import Null, Data, coalesce, get_module
|
||||
from mo_kwargs import override
|
||||
from mo_dots import Data, Null, coalesce, get_module, is_sequence
|
||||
from mo_future import text_type, transpose, xrange
|
||||
from mo_logs import Log
|
||||
from mo_logs.exceptions import suppress_exception
|
||||
|
||||
|
||||
class Matrix(object):
|
||||
|
@ -40,7 +36,7 @@ class Matrix(object):
|
|||
self.num = len(dims)
|
||||
self.dims = tuple(dims)
|
||||
if zeros != None:
|
||||
if self.num == 0 or any(d == 0 for d in dims): #NO DIMS, OR HAS A ZERO DIM, THEN IT IS A NULL CUBE
|
||||
if self.num == 0 or any(d == 0 for d in dims): # NO DIMS, OR HAS A ZERO DIM, THEN IT IS A NULL CUBE
|
||||
if hasattr(zeros, "__call__"):
|
||||
self.cube = zeros()
|
||||
else:
|
||||
|
@ -61,7 +57,7 @@ class Matrix(object):
|
|||
return output
|
||||
|
||||
def __getitem__(self, index):
|
||||
if not isinstance(index, (list, tuple)):
|
||||
if not is_sequence(index):
|
||||
if isinstance(index, slice):
|
||||
sub = self.cube[index]
|
||||
output = Matrix()
|
||||
|
@ -171,10 +167,11 @@ class Matrix(object):
|
|||
|
||||
def __iter__(self):
|
||||
if not self.dims:
|
||||
return [self.value].__iter__()
|
||||
yield (tuple(), self.value)
|
||||
else:
|
||||
# TODO: MAKE THIS FASTER BY NOT CALLING __getitem__ (MAKES CUBE OBJECTS)
|
||||
return ((c, self[c]) for c in self._all_combos())
|
||||
for c in self._all_combos():
|
||||
yield (c, self[c])
|
||||
|
||||
def __float__(self):
|
||||
return self.value
|
||||
|
|
|
@ -8,12 +8,10 @@
|
|||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import division
|
||||
from __future__ import absolute_import
|
||||
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
|
||||
from mo_future import is_text, is_binary
|
||||
class Multiset(object):
|
||||
"""
|
||||
Multiset IS ONE MEMBER IN A FAMILY OF USEFUL CONTAINERS
|
||||
|
|
|
@ -8,13 +8,11 @@
|
|||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
import mo_json
|
||||
from mo_dots import Data, wrap
|
||||
from mo_files import File
|
||||
import mo_json
|
||||
from mo_logs import Log
|
||||
from mo_logs.exceptions import suppress_exception
|
||||
from mo_math.randoms import Random
|
||||
|
|
|
@ -9,10 +9,9 @@
|
|||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import division
|
||||
from __future__ import absolute_import
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from mo_future import is_text, is_binary
|
||||
from collections import deque
|
||||
|
||||
|
||||
|
|
|
@ -8,10 +8,9 @@
|
|||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import division
|
||||
from __future__ import absolute_import
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from mo_future import is_text, is_binary
|
||||
from mo_logs import Log
|
||||
|
||||
|
||||
|
|
|
@ -8,13 +8,11 @@
|
|||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from collections import Mapping, Iterable, Set
|
||||
from collections import Iterable, Mapping, Set
|
||||
|
||||
from mo_dots import unwrap, tuplewrap, wrap
|
||||
from mo_dots import is_data, is_sequence, tuplewrap, unwrap, wrap
|
||||
from mo_dots.objects import datawrap
|
||||
from mo_future import PY2, iteritems
|
||||
from mo_logs import Log
|
||||
|
@ -83,7 +81,11 @@ class UniqueIndex(Set, Mapping):
|
|||
if key == None:
|
||||
Log.error("Expecting key to be not None")
|
||||
|
||||
d = self._data.get(key)
|
||||
try:
|
||||
d = self._data.get(key)
|
||||
except Exception as e:
|
||||
key = value2key(self._keys, val)
|
||||
|
||||
if d is None:
|
||||
self._data[key] = unwrap(val)
|
||||
self.count += 1
|
||||
|
@ -175,16 +177,16 @@ class UniqueIndex(Set, Mapping):
|
|||
|
||||
def value2key(keys, val):
|
||||
if len(keys) == 1:
|
||||
if isinstance(val, Mapping):
|
||||
if is_data(val):
|
||||
return val[keys[0]]
|
||||
elif isinstance(val, (list, tuple)):
|
||||
elif is_sequence(val):
|
||||
return val[0]
|
||||
else:
|
||||
return val
|
||||
else:
|
||||
if isinstance(val, Mapping):
|
||||
if is_data(val):
|
||||
return datawrap({k: val[k] for k in keys})
|
||||
elif isinstance(val, (list, tuple)):
|
||||
elif is_sequence(val):
|
||||
return datawrap(dict(zip(keys, val)))
|
||||
else:
|
||||
Log.error("do not know what to do here")
|
||||
|
|
|
@ -7,23 +7,20 @@
|
|||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
import sys
|
||||
from collections import Mapping
|
||||
|
||||
from mo_dots.utils import get_logger, get_module
|
||||
from mo_future import text_type, binary_type, generator_types
|
||||
from mo_future import binary_type, generator_types, is_binary, is_text, text_type
|
||||
|
||||
from mo_dots.utils import CLASS, OBJ, get_logger, get_module
|
||||
|
||||
none_type = type(None)
|
||||
ModuleType = type(sys.modules[__name__])
|
||||
|
||||
|
||||
_builtin_zip = zip
|
||||
SELF_PATH = "."
|
||||
ROOT_PATH = [SELF_PATH]
|
||||
ROOT_PATH = ["."]
|
||||
|
||||
|
||||
_get = object.__getattribute__
|
||||
|
@ -88,13 +85,29 @@ def unliteral_field(field):
|
|||
return field.replace("\\.", ".")
|
||||
|
||||
|
||||
def tail_field(field):
|
||||
"""
|
||||
RETURN THE FIRST STEP IN PATH, ALONG WITH THE REMAINING TAIL
|
||||
"""
|
||||
if field == "." or field==None:
|
||||
return ".", "."
|
||||
elif "." in field:
|
||||
if "\\." in field:
|
||||
return tuple(k.replace("\a", ".") for k in field.replace("\\.", "\a").split(".", 1))
|
||||
else:
|
||||
return field.split(".", 1)
|
||||
else:
|
||||
return field, "."
|
||||
|
||||
|
||||
|
||||
def split_field(field):
|
||||
"""
|
||||
RETURN field AS ARRAY OF DOT-SEPARATED FIELDS
|
||||
"""
|
||||
if field == "." or field==None:
|
||||
return []
|
||||
elif isinstance(field, text_type) and "." in field:
|
||||
elif is_text(field) and "." in field:
|
||||
if field.startswith(".."):
|
||||
remainder = field.lstrip(".")
|
||||
back = len(field) - len(remainder) - 1
|
||||
|
@ -105,14 +118,17 @@ def split_field(field):
|
|||
return [field]
|
||||
|
||||
|
||||
def join_field(field):
|
||||
def join_field(path):
|
||||
"""
|
||||
RETURN field SEQUENCE AS STRING
|
||||
"""
|
||||
potent = [f for f in field if f != "."]
|
||||
if not potent:
|
||||
return "."
|
||||
return ".".join([f.replace(".", "\\.") for f in potent])
|
||||
output = ".".join([f.replace(".", "\\.") for f in path if f != None])
|
||||
return output if output else "."
|
||||
|
||||
# potent = [f for f in path if f != "."]
|
||||
# if not potent:
|
||||
# return "."
|
||||
# return ".".join([f.replace(".", "\\.") for f in potent])
|
||||
|
||||
|
||||
def concat_field(prefix, suffix):
|
||||
|
@ -132,8 +148,14 @@ def startswith_field(field, prefix):
|
|||
"""
|
||||
RETURN True IF field PATH STRING STARTS WITH prefix PATH STRING
|
||||
"""
|
||||
if prefix == ".":
|
||||
if prefix.startswith("."):
|
||||
return True
|
||||
# f_back = len(field) - len(field.strip("."))
|
||||
# p_back = len(prefix) - len(prefix.strip("."))
|
||||
# if f_back > p_back:
|
||||
# return False
|
||||
# else:
|
||||
# return True
|
||||
|
||||
if field.startswith(prefix):
|
||||
if len(field) == len(prefix) or field[len(prefix)] == ".":
|
||||
|
@ -164,9 +186,9 @@ def relative_field(field, parent):
|
|||
|
||||
|
||||
def hash_value(v):
|
||||
if isinstance(v, (set, tuple, list)):
|
||||
if is_many(v):
|
||||
return hash(tuple(hash_value(vv) for vv in v))
|
||||
elif not isinstance(v, Mapping):
|
||||
elif _get(v, CLASS) not in data_types:
|
||||
return hash(v)
|
||||
else:
|
||||
return hash(tuple(sorted(hash_value(vv) for vv in v.values())))
|
||||
|
@ -191,7 +213,7 @@ def set_default(*params):
|
|||
FOR EACH LEAF, RETURN THE HIGHEST PRIORITY LEAF VALUE
|
||||
"""
|
||||
p0 = params[0]
|
||||
agg = p0 if p0 or isinstance(p0, Mapping) else {}
|
||||
agg = p0 if p0 or _get(p0, CLASS) in data_types else {}
|
||||
for p in params[1:]:
|
||||
p = unwrap(p)
|
||||
if p is None:
|
||||
|
@ -207,10 +229,10 @@ def _all_default(d, default, seen=None):
|
|||
"""
|
||||
if default is None:
|
||||
return
|
||||
if isinstance(default, Data):
|
||||
if _get(default, CLASS) is Data:
|
||||
default = object.__getattribute__(default, SLOT) # REACH IN AND GET THE dict
|
||||
# Log = _late_import()
|
||||
# Log.error("strictly dict (or object) allowed: got {{type}}", type=default.__class__.__name__)
|
||||
# Log.error("strictly dict (or object) allowed: got {{type}}", type=_get(default, CLASS).__name__)
|
||||
|
||||
for k, default_value in default.items():
|
||||
default_value = unwrap(default_value) # TWO DIFFERENT Dicts CAN SHARE id() BECAUSE THEY ARE SHORT LIVED
|
||||
|
@ -218,7 +240,7 @@ def _all_default(d, default, seen=None):
|
|||
|
||||
if existing_value == None:
|
||||
if default_value != None:
|
||||
if isinstance(default_value, Mapping):
|
||||
if _get(default_value, CLASS) in data_types:
|
||||
df = seen.get(id(default_value))
|
||||
if df is not None:
|
||||
_set_attr(d, [k], df)
|
||||
|
@ -234,10 +256,10 @@ def _all_default(d, default, seen=None):
|
|||
except Exception as e:
|
||||
if PATH_NOT_FOUND not in e:
|
||||
get_logger().error("Can not set attribute {{name}}", name=k, cause=e)
|
||||
elif isinstance(existing_value, list) or isinstance(default_value, list):
|
||||
elif is_list(existing_value) or is_list(default_value):
|
||||
_set_attr(d, [k], None)
|
||||
_set_attr(d, [k], listwrap(existing_value) + listwrap(default_value))
|
||||
elif (hasattr(existing_value, "__setattr__") or isinstance(existing_value, Mapping)) and isinstance(default_value, Mapping):
|
||||
elif (hasattr(existing_value, "__setattr__") or _get(existing_value, CLASS) in data_types) and _get(default_value, CLASS) in data_types:
|
||||
df = seen.get(id(default_value))
|
||||
if df is not None:
|
||||
_set_attr(d, [k], df)
|
||||
|
@ -390,7 +412,7 @@ def _set_attr(obj_, path, value):
|
|||
elif value == None:
|
||||
new_value = None
|
||||
else:
|
||||
new_value = old_value.__class__(value) # TRY TO MAKE INSTANCE OF SAME CLASS
|
||||
new_value = _get(old_value, CLASS)(value) # TRY TO MAKE INSTANCE OF SAME CLASS
|
||||
except Exception as e:
|
||||
old_value = None
|
||||
new_value = value
|
||||
|
@ -417,7 +439,7 @@ def wrap(v):
|
|||
:return: Data INSTANCE
|
||||
"""
|
||||
|
||||
type_ = v.__class__
|
||||
type_ = _get(v, CLASS)
|
||||
|
||||
if type_ is dict:
|
||||
m = object.__new__(Data)
|
||||
|
@ -443,10 +465,12 @@ def wrap_leaves(value):
|
|||
def _wrap_leaves(value):
|
||||
if value == None:
|
||||
return None
|
||||
if isinstance(value, (text_type, binary_type, int, float)):
|
||||
|
||||
class_ = _get(value, CLASS)
|
||||
if class_ in (text_type, binary_type, int, float):
|
||||
return value
|
||||
if isinstance(value, Mapping):
|
||||
if isinstance(value, Data):
|
||||
if class_ in data_types:
|
||||
if class_ is Data:
|
||||
value = unwrap(value)
|
||||
|
||||
output = {}
|
||||
|
@ -455,7 +479,7 @@ def _wrap_leaves(value):
|
|||
|
||||
if key == "":
|
||||
get_logger().error("key is empty string. Probably a bad idea")
|
||||
if isinstance(key, binary_type):
|
||||
if is_binary(key):
|
||||
key = key.decode("utf8")
|
||||
|
||||
d = output
|
||||
|
@ -487,7 +511,7 @@ def _wrap_leaves(value):
|
|||
|
||||
|
||||
def unwrap(v):
|
||||
_type = _get(v, "__class__")
|
||||
_type = _get(v, CLASS)
|
||||
if _type is Data:
|
||||
d = _get(v, SLOT)
|
||||
return d
|
||||
|
@ -496,8 +520,8 @@ def unwrap(v):
|
|||
elif _type is NullType:
|
||||
return None
|
||||
elif _type is DataObject:
|
||||
d = _get(v, "_obj")
|
||||
if isinstance(d, Mapping):
|
||||
d = _get(v, OBJ)
|
||||
if _get(d, CLASS) in data_types:
|
||||
return d
|
||||
else:
|
||||
return v
|
||||
|
@ -537,7 +561,7 @@ def listwrap(value):
|
|||
"""
|
||||
if value == None:
|
||||
return FlatList()
|
||||
elif isinstance(value, list):
|
||||
elif is_list(value):
|
||||
return wrap(value)
|
||||
elif isinstance(value, set):
|
||||
return wrap(list(value))
|
||||
|
@ -548,7 +572,7 @@ def unwraplist(v):
|
|||
"""
|
||||
LISTS WITH ZERO AND ONE element MAP TO None AND element RESPECTIVELY
|
||||
"""
|
||||
if isinstance(v, list):
|
||||
if is_list(v):
|
||||
if len(v) == 0:
|
||||
return None
|
||||
elif len(v) == 1:
|
||||
|
@ -564,11 +588,11 @@ def tuplewrap(value):
|
|||
INTENDED TO TURN lists INTO tuples FOR USE AS KEYS
|
||||
"""
|
||||
if isinstance(value, (list, set, tuple) + generator_types):
|
||||
return tuple(tuplewrap(v) if isinstance(v, (list, tuple)) else v for v in value)
|
||||
return tuple(tuplewrap(v) if is_sequence(v) else v for v in value)
|
||||
return unwrap(value),
|
||||
|
||||
|
||||
from mo_dots.datas import Data, SLOT, data_types, is_data
|
||||
from mo_dots.nones import Null, NullType
|
||||
from mo_dots.datas import Data, SLOT
|
||||
from mo_dots.lists import FlatList
|
||||
from mo_dots.lists import FlatList, is_list, is_sequence, is_container, is_many
|
||||
from mo_dots.objects import DataObject
|
||||
|
|
|
@ -7,18 +7,16 @@
|
|||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from collections import MutableMapping, Mapping
|
||||
from copy import deepcopy
|
||||
from collections import MutableMapping
|
||||
from copy import copy, deepcopy
|
||||
from decimal import Decimal
|
||||
|
||||
from mo_future import text_type, PY2, iteritems, none_type, generator_types, long
|
||||
from mo_future import PY2, generator_types, is_binary, iteritems, long, none_type, text_type
|
||||
|
||||
from mo_dots import _getdefault, hash_value, literal_field, coalesce, listwrap, get_logger
|
||||
from mo_dots.lists import FlatList
|
||||
from mo_dots import _getdefault, coalesce, get_logger, hash_value, listwrap, literal_field
|
||||
from mo_dots.utils import CLASS
|
||||
|
||||
_get = object.__getattribute__
|
||||
_set = object.__setattr__
|
||||
|
@ -46,10 +44,11 @@ class Data(MutableMapping):
|
|||
else:
|
||||
if args:
|
||||
args0 = args[0]
|
||||
if isinstance(args0, Data):
|
||||
_set(self, SLOT, _get(args0, SLOT))
|
||||
elif isinstance(args0, dict):
|
||||
class_ = _get(args0, CLASS)
|
||||
if class_ is dict:
|
||||
_set(self, SLOT, args0)
|
||||
elif class_ is Data:
|
||||
_set(self, SLOT, _get(args0, SLOT))
|
||||
else:
|
||||
_set(self, SLOT, dict(args0))
|
||||
elif kwargs:
|
||||
|
@ -59,21 +58,21 @@ class Data(MutableMapping):
|
|||
|
||||
def __bool__(self):
|
||||
d = self._internal_dict
|
||||
if isinstance(d, dict):
|
||||
if _get(d, CLASS) is dict:
|
||||
return bool(d)
|
||||
else:
|
||||
return d != None
|
||||
|
||||
def __nonzero__(self):
|
||||
d = self._internal_dict
|
||||
if isinstance(d, dict):
|
||||
if _get(d, CLASS) is dict:
|
||||
return True if d else False
|
||||
else:
|
||||
return d != None
|
||||
|
||||
def __contains__(self, item):
|
||||
value = Data.__getitem__(self, item)
|
||||
if isinstance(value, Mapping) or value:
|
||||
if _get(value, CLASS) in data_types or value:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
@ -86,7 +85,7 @@ class Data(MutableMapping):
|
|||
return Null
|
||||
if key == ".":
|
||||
output = self._internal_dict
|
||||
if isinstance(output, Mapping):
|
||||
if _get(output, CLASS) in data_types:
|
||||
return self
|
||||
else:
|
||||
return output
|
||||
|
@ -97,9 +96,9 @@ class Data(MutableMapping):
|
|||
if key.find(".") >= 0:
|
||||
seq = _split_field(key)
|
||||
for n in seq:
|
||||
if isinstance(d, NullType):
|
||||
if _get(d, CLASS) is NullType:
|
||||
d = NullType(d, n) # OH DEAR, Null TREATS n AS PATH, NOT LITERAL
|
||||
elif isinstance(d, list):
|
||||
elif is_list(d):
|
||||
d = [_getdefault(dd, n) for dd in d]
|
||||
else:
|
||||
d = _getdefault(d, n) # EVERYTHING ELSE TREATS n AS LITERAL
|
||||
|
@ -151,9 +150,9 @@ class Data(MutableMapping):
|
|||
raise e
|
||||
|
||||
def __getattr__(self, key):
|
||||
d = self._internal_dict
|
||||
d = _get(self, SLOT)
|
||||
v = d.get(key)
|
||||
t = v.__class__
|
||||
t = _get(v, CLASS)
|
||||
|
||||
# OPTIMIZED wrap()
|
||||
if t is dict:
|
||||
|
@ -198,13 +197,13 @@ class Data(MutableMapping):
|
|||
return True
|
||||
|
||||
d = self._internal_dict
|
||||
if not isinstance(d, dict):
|
||||
if _get(d, CLASS) is not dict:
|
||||
return d == other
|
||||
|
||||
if not d and other == None:
|
||||
return False
|
||||
|
||||
if not isinstance(other, Mapping):
|
||||
if _get(other, CLASS) not in data_types:
|
||||
return False
|
||||
e = unwrap(other)
|
||||
for k, v in d.items():
|
||||
|
@ -224,7 +223,7 @@ class Data(MutableMapping):
|
|||
|
||||
def items(self):
|
||||
d = self._internal_dict
|
||||
return [(k, wrap(v)) for k, v in d.items() if v != None or isinstance(v, Mapping)]
|
||||
return [(k, wrap(v)) for k, v in d.items() if v != None or _get(v, CLASS) in data_types]
|
||||
|
||||
def leaves(self, prefix=None):
|
||||
"""
|
||||
|
@ -253,11 +252,18 @@ class Data(MutableMapping):
|
|||
return dict.__len__(d)
|
||||
|
||||
def copy(self):
|
||||
return Data(**self)
|
||||
d = self._internal_dict
|
||||
if _get(d, CLASS) is dict:
|
||||
return Data(**self)
|
||||
else:
|
||||
return copy(d)
|
||||
|
||||
def __copy__(self):
|
||||
d = self._internal_dict
|
||||
return Data(**d)
|
||||
if _get(d, CLASS) is dict:
|
||||
return Data(**self)
|
||||
else:
|
||||
return copy(d)
|
||||
|
||||
def __deepcopy__(self, memo):
|
||||
d = self._internal_dict
|
||||
|
@ -311,7 +317,7 @@ def leaves(value, prefix=None):
|
|||
output = []
|
||||
for k, v in value.items():
|
||||
try:
|
||||
if isinstance(v, Mapping):
|
||||
if _get(v, CLASS) in data_types:
|
||||
output.extend(leaves(v, prefix=prefix + literal_field(k) + "."))
|
||||
else:
|
||||
output.append((prefix + literal_field(k), unwrap(v)))
|
||||
|
@ -342,7 +348,7 @@ class _DictUsingSelf(dict):
|
|||
def __getitem__(self, key):
|
||||
if key == None:
|
||||
return Null
|
||||
if isinstance(key, str):
|
||||
if is_binary(key):
|
||||
key = key.decode("utf8")
|
||||
|
||||
d=self
|
||||
|
@ -385,7 +391,7 @@ class _DictUsingSelf(dict):
|
|||
raise e
|
||||
|
||||
def __getattr__(self, key):
|
||||
if isinstance(key, str):
|
||||
if is_binary(key):
|
||||
ukey = key.decode("utf8")
|
||||
else:
|
||||
ukey = key
|
||||
|
@ -397,7 +403,7 @@ class _DictUsingSelf(dict):
|
|||
return wrap(o)
|
||||
|
||||
def __setattr__(self, key, value):
|
||||
if isinstance(key, str):
|
||||
if is_binary(key):
|
||||
ukey = key.decode("utf8")
|
||||
else:
|
||||
ukey = key
|
||||
|
@ -421,7 +427,7 @@ class _DictUsingSelf(dict):
|
|||
if not d and other == None:
|
||||
return True
|
||||
|
||||
if not isinstance(other, Mapping):
|
||||
if not _get(other, CLASS) in data_types:
|
||||
return False
|
||||
e = unwrap(other)
|
||||
for k, v in dict.items(d):
|
||||
|
@ -439,7 +445,7 @@ class _DictUsingSelf(dict):
|
|||
return wrap(dict.get(self, key, default))
|
||||
|
||||
def items(self):
|
||||
return [(k, wrap(v)) for k, v in dict.items(self) if v != None or isinstance(v, Mapping)]
|
||||
return [(k, wrap(v)) for k, v in dict.items(self) if v != None or _get(v, CLASS) in data_types]
|
||||
|
||||
def leaves(self, prefix=None):
|
||||
"""
|
||||
|
@ -448,7 +454,7 @@ class _DictUsingSelf(dict):
|
|||
prefix = coalesce(prefix, "")
|
||||
output = []
|
||||
for k, v in self.items():
|
||||
if isinstance(v, Mapping):
|
||||
if _get(v, CLASS) in data_types:
|
||||
output.extend(wrap(v).leaves(prefix=prefix + literal_field(k) + "."))
|
||||
else:
|
||||
output.append((prefix + literal_field(k), v))
|
||||
|
@ -487,7 +493,7 @@ class _DictUsingSelf(dict):
|
|||
return wrap(dict.__deepcopy__(self, memo))
|
||||
|
||||
def __delitem__(self, key):
|
||||
if isinstance(key, str):
|
||||
if is_binary(key):
|
||||
key = key.decode("utf8")
|
||||
|
||||
if key.find(".") == -1:
|
||||
|
@ -529,11 +535,11 @@ def _str(value, depth):
|
|||
FOR DEBUGGING POSSIBLY RECURSIVE STRUCTURES
|
||||
"""
|
||||
output = []
|
||||
if depth >0 and isinstance(value, Mapping):
|
||||
if depth >0 and _get(value, CLASS) in data_types:
|
||||
for k, v in value.items():
|
||||
output.append(str(k) + "=" + _str(v, depth - 1))
|
||||
return "{" + ",\n".join(output) + "}"
|
||||
elif depth >0 and isinstance(value, list):
|
||||
elif depth >0 and is_list(value):
|
||||
for v in value:
|
||||
output.append(_str(v, depth-1))
|
||||
return "[" + ",\n".join(output) + "]"
|
||||
|
@ -542,7 +548,7 @@ def _str(value, depth):
|
|||
|
||||
|
||||
def _iadd(self, other):
|
||||
if not isinstance(other, Mapping):
|
||||
if not _get(other, CLASS) in data_types:
|
||||
get_logger().error("Expecting a Mapping")
|
||||
d = unwrap(self)
|
||||
for ok, ov in other.items():
|
||||
|
@ -550,39 +556,61 @@ def _iadd(self, other):
|
|||
if sv == None:
|
||||
d[ok] = deepcopy(ov)
|
||||
elif isinstance(ov, (Decimal, float, long, int)):
|
||||
if isinstance(sv, Mapping):
|
||||
if _get(sv, CLASS) in data_types:
|
||||
get_logger().error(
|
||||
"can not add {{stype}} with {{otype}",
|
||||
stype=sv.__class__.__name__,
|
||||
otype=ov.__class__.__name__
|
||||
stype=_get(sv, CLASS).__name__,
|
||||
otype=_get(ov, CLASS).__name__
|
||||
)
|
||||
elif isinstance(sv, list):
|
||||
elif is_list(sv):
|
||||
d[ok].append(ov)
|
||||
else:
|
||||
d[ok] = sv + ov
|
||||
elif isinstance(ov, list):
|
||||
elif is_list(ov):
|
||||
d[ok] = listwrap(sv) + ov
|
||||
elif isinstance(ov, Mapping):
|
||||
if isinstance(sv, Mapping):
|
||||
elif _get(ov, CLASS) in data_types:
|
||||
if _get(sv, CLASS) in data_types:
|
||||
_iadd(sv, ov)
|
||||
elif isinstance(sv, list):
|
||||
elif is_list(sv):
|
||||
d[ok].append(ov)
|
||||
else:
|
||||
get_logger().error(
|
||||
"can not add {{stype}} with {{otype}",
|
||||
stype=sv.__class__.__name__,
|
||||
otype=ov.__class__.__name__
|
||||
stype=_get(sv, CLASS).__name__,
|
||||
otype=_get(ov, CLASS).__name__
|
||||
)
|
||||
else:
|
||||
if isinstance(sv, Mapping):
|
||||
if _get(sv, CLASS) in data_types:
|
||||
get_logger().error(
|
||||
"can not add {{stype}} with {{otype}",
|
||||
stype=sv.__class__.__name__,
|
||||
otype=ov.__class__.__name__
|
||||
stype=_get(sv, CLASS).__name__,
|
||||
otype=_get(ov, CLASS).__name__
|
||||
)
|
||||
else:
|
||||
d[ok].append(ov)
|
||||
return self
|
||||
|
||||
|
||||
data_types = (Data, dict) # TYPES TO HOLD DATA
|
||||
|
||||
|
||||
def register_data(type_):
|
||||
"""
|
||||
:param type_: ADD OTHER TYPE THAT HOLDS DATA
|
||||
:return:
|
||||
"""
|
||||
global data_types
|
||||
data_types = tuple(set(data_types + (type_,)))
|
||||
|
||||
|
||||
def is_data(d):
|
||||
"""
|
||||
:param d:
|
||||
:return: True IF d IS A TYPE THAT HOLDS DATA
|
||||
"""
|
||||
return d.__class__ in data_types
|
||||
|
||||
|
||||
from mo_dots.nones import Null, NullType
|
||||
from mo_dots.lists import is_list, FlatList
|
||||
from mo_dots import unwrap, wrap
|
||||
|
|
|
@ -7,20 +7,21 @@
|
|||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from copy import deepcopy
|
||||
|
||||
from mo_dots import wrap, unwrap, coalesce
|
||||
from mo_future import generator_types, text_type
|
||||
|
||||
from mo_dots import CLASS, coalesce, unwrap, wrap
|
||||
from mo_dots.nones import Null
|
||||
|
||||
LIST = text_type("list")
|
||||
|
||||
_get = object.__getattribute__
|
||||
_get_list = lambda self: _get(self, "list")
|
||||
_get_list = lambda self: _get(self, LIST)
|
||||
_set = object.__setattr__
|
||||
_emit_slice_warning = True
|
||||
|
||||
_datawrap = None
|
||||
Log = None
|
||||
|
||||
|
@ -29,6 +30,7 @@ def _late_import():
|
|||
global _datawrap
|
||||
global Log
|
||||
|
||||
|
||||
from mo_dots.objects import datawrap as _datawrap
|
||||
try:
|
||||
from mo_logs import Log
|
||||
|
@ -51,13 +53,13 @@ class FlatList(list):
|
|||
# list.__init__(self)
|
||||
if vals == None:
|
||||
self.list = []
|
||||
elif isinstance(vals, FlatList):
|
||||
elif vals.__class__ is FlatList:
|
||||
self.list = vals.list
|
||||
else:
|
||||
self.list = vals
|
||||
|
||||
def __getitem__(self, index):
|
||||
if isinstance(index, slice):
|
||||
if _get(index, CLASS) is slice:
|
||||
# IMPLEMENT FLAT SLICES (for i not in range(0, len(self)): assert self[i]==None)
|
||||
if index.step is not None:
|
||||
if not Log:
|
||||
|
@ -77,7 +79,7 @@ class FlatList(list):
|
|||
j = max(min(j, length), 0)
|
||||
return FlatList(_get_list(self)[i:j])
|
||||
|
||||
if index < 0 or len(_get_list(self)) <= index:
|
||||
if not isinstance(index, int) or index < 0 or len(_get_list(self)) <= index:
|
||||
return Null
|
||||
return wrap(_get_list(self)[index])
|
||||
|
||||
|
@ -109,7 +111,6 @@ class FlatList(list):
|
|||
"""
|
||||
if not Log:
|
||||
_late_import()
|
||||
|
||||
return FlatList(vals=[unwrap(coalesce(_datawrap(v), Null)[key]) for v in _get_list(self)])
|
||||
|
||||
def select(self, key):
|
||||
|
@ -185,17 +186,18 @@ class FlatList(list):
|
|||
return wrap(_get_list(self).pop(index))
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, FlatList):
|
||||
other = _get_list(other)
|
||||
lst = _get_list(self)
|
||||
if other == None and len(lst) == 0:
|
||||
return True
|
||||
if not isinstance(other, list):
|
||||
other_class = _get(other, CLASS)
|
||||
if other_class is FlatList:
|
||||
other = _get_list(other)
|
||||
try:
|
||||
if len(lst) != len(other):
|
||||
return False
|
||||
return all([s == o for s, o in zip(lst, other)])
|
||||
except Exception:
|
||||
return False
|
||||
if len(lst) != len(other):
|
||||
return False
|
||||
return all([s == o for s, o in zip(lst, other)])
|
||||
|
||||
|
||||
def __add__(self, value):
|
||||
if value == None:
|
||||
|
@ -215,7 +217,7 @@ class FlatList(list):
|
|||
return FlatList(vals=output)
|
||||
|
||||
def __iadd__(self, other):
|
||||
if isinstance(other, list):
|
||||
if is_list(other):
|
||||
self.extend(other)
|
||||
else:
|
||||
self.append(other)
|
||||
|
@ -282,3 +284,22 @@ class FlatList(list):
|
|||
|
||||
|
||||
FlatList.EMPTY = Null
|
||||
|
||||
list_types = (list, FlatList)
|
||||
container_types = (list, FlatList, set)
|
||||
sequence_types = (list, FlatList, tuple)
|
||||
many_types = tuple(set(list_types + container_types + sequence_types + generator_types))
|
||||
|
||||
|
||||
def is_list(l):
|
||||
return l.__class__ in list_types
|
||||
|
||||
def is_container(l):
|
||||
return l.__class__ in container_types
|
||||
|
||||
def is_sequence(l):
|
||||
return l.__class__ in sequence_types
|
||||
|
||||
def is_many(l):
|
||||
return l.__class__ in many_types
|
||||
|
||||
|
|
|
@ -7,16 +7,17 @@
|
|||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from mo_dots import _setdefault, wrap, split_field
|
||||
from mo_future import text_type, binary_type
|
||||
from mo_future import is_binary, text_type
|
||||
|
||||
from mo_dots import _setdefault, wrap
|
||||
from mo_dots.utils import CLASS, OBJ
|
||||
|
||||
_get = object.__getattribute__
|
||||
_set = object.__setattr__
|
||||
_zero_list = []
|
||||
_null_hash = hash(None)
|
||||
|
||||
|
||||
class NullType(object):
|
||||
|
@ -35,7 +36,7 @@ class NullType(object):
|
|||
key - THE dict ITEM REFERENCE (DOT(.) IS NOT ESCAPED)
|
||||
"""
|
||||
d = _get(self, "__dict__")
|
||||
d["_obj"] = obj
|
||||
d[OBJ] = obj
|
||||
d["__key__"] = key
|
||||
|
||||
def __bool__(self):
|
||||
|
@ -45,7 +46,7 @@ class NullType(object):
|
|||
return False
|
||||
|
||||
def __add__(self, other):
|
||||
if isinstance(other, list):
|
||||
if is_list(other):
|
||||
return other
|
||||
return Null
|
||||
|
||||
|
@ -58,7 +59,7 @@ class NullType(object):
|
|||
def __iadd__(self, other):
|
||||
try:
|
||||
d = _get(self, "__dict__")
|
||||
o = d["_obj"]
|
||||
o = d[OBJ]
|
||||
if o is None:
|
||||
return self
|
||||
key = d["__key__"]
|
||||
|
@ -108,10 +109,10 @@ class NullType(object):
|
|||
return Null
|
||||
|
||||
def __eq__(self, other):
|
||||
return other == None or isinstance(other, NullType)
|
||||
return other is None or _get(other, CLASS) is NullType or other == None
|
||||
|
||||
def __ne__(self, other):
|
||||
return other is not None and not isinstance(other, NullType)
|
||||
return other is not None and _get(other, CLASS) is not NullType and other != None
|
||||
|
||||
def __or__(self, other):
|
||||
if other is True:
|
||||
|
@ -153,7 +154,7 @@ class NullType(object):
|
|||
def __getitem__(self, key):
|
||||
if isinstance(key, slice):
|
||||
return Null
|
||||
elif isinstance(key, binary_type):
|
||||
elif is_binary(key):
|
||||
key = key.decode("utf8")
|
||||
elif isinstance(key, int):
|
||||
return NullType(self, key)
|
||||
|
@ -168,11 +169,11 @@ class NullType(object):
|
|||
key = text_type(key)
|
||||
|
||||
d = _get(self, "__dict__")
|
||||
o = wrap(d["_obj"])
|
||||
o = wrap(d[OBJ])
|
||||
k = d["__key__"]
|
||||
if o is None:
|
||||
return Null
|
||||
elif isinstance(o, NullType):
|
||||
elif _get(o, CLASS) is NullType:
|
||||
return NullType(self, key)
|
||||
v = o.get(k)
|
||||
if v == None:
|
||||
|
@ -187,7 +188,7 @@ class NullType(object):
|
|||
key = text_type(key)
|
||||
|
||||
d = _get(self, "__dict__")
|
||||
o = wrap(d["_obj"])
|
||||
o = wrap(d[OBJ])
|
||||
k = d["__key__"]
|
||||
|
||||
seq = [k] + [key]
|
||||
|
@ -195,7 +196,7 @@ class NullType(object):
|
|||
|
||||
def __setitem__(self, key, value):
|
||||
d = _get(self, "__dict__")
|
||||
o = d["_obj"]
|
||||
o = d[OBJ]
|
||||
if o is None:
|
||||
return
|
||||
k = d["__key__"]
|
||||
|
@ -225,7 +226,7 @@ class NullType(object):
|
|||
return "Null"
|
||||
|
||||
def __hash__(self):
|
||||
return hash(None)
|
||||
return _null_hash
|
||||
|
||||
|
||||
Null = NullType() # INSTEAD OF None!!!
|
||||
|
@ -240,9 +241,9 @@ def _assign_to_null(obj, path, value, force=True):
|
|||
try:
|
||||
if obj is Null:
|
||||
return
|
||||
if isinstance(obj, NullType):
|
||||
if _get(obj, CLASS) is NullType:
|
||||
d = _get(obj, "__dict__")
|
||||
o = d["_obj"]
|
||||
o = d[OBJ]
|
||||
p = d["__key__"]
|
||||
s = [p]+path
|
||||
return _assign_to_null(o, s, value)
|
||||
|
|
|
@ -7,16 +7,17 @@
|
|||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from collections import Mapping
|
||||
from datetime import date, datetime
|
||||
from decimal import Decimal
|
||||
|
||||
from mo_dots import wrap, unwrap, Data, FlatList, NullType, get_attr, set_attr, SLOT
|
||||
from mo_future import text_type, binary_type, get_function_defaults, get_function_arguments, none_type, generator_types
|
||||
from mo_future import binary_type, generator_types, get_function_arguments, get_function_defaults, none_type, text_type
|
||||
|
||||
from mo_dots import Data, FlatList, NullType, SLOT, get_attr, set_attr, unwrap, wrap
|
||||
from mo_dots.datas import register_data
|
||||
from mo_dots.utils import CLASS, OBJ
|
||||
|
||||
_get = object.__getattribute__
|
||||
_set = object.__setattr__
|
||||
|
@ -29,31 +30,31 @@ class DataObject(Mapping):
|
|||
"""
|
||||
|
||||
def __init__(self, obj):
|
||||
_set(self, "_obj", obj)
|
||||
_set(self, OBJ, obj)
|
||||
|
||||
def __getattr__(self, item):
|
||||
obj = _get(self, "_obj")
|
||||
obj = _get(self, OBJ)
|
||||
output = get_attr(obj, item)
|
||||
return datawrap(output)
|
||||
|
||||
def __setattr__(self, key, value):
|
||||
obj = _get(self, "_obj")
|
||||
obj = _get(self, OBJ)
|
||||
set_attr(obj, key, value)
|
||||
|
||||
def __getitem__(self, item):
|
||||
obj = _get(self, "_obj")
|
||||
obj = _get(self, OBJ)
|
||||
output = get_attr(obj, item)
|
||||
return datawrap(output)
|
||||
|
||||
def keys(self):
|
||||
obj = _get(self, "_obj")
|
||||
obj = _get(self, OBJ)
|
||||
try:
|
||||
return obj.__dict__.keys()
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
def items(self):
|
||||
obj = _get(self, "_obj")
|
||||
obj = _get(self, OBJ)
|
||||
try:
|
||||
return obj.__dict__.items()
|
||||
except Exception as e:
|
||||
|
@ -64,7 +65,7 @@ class DataObject(Mapping):
|
|||
]
|
||||
|
||||
def iteritems(self):
|
||||
obj = _get(self, "_obj")
|
||||
obj = _get(self, OBJ)
|
||||
try:
|
||||
return obj.__dict__.iteritems()
|
||||
except Exception as e:
|
||||
|
@ -82,43 +83,40 @@ class DataObject(Mapping):
|
|||
return (k for k in self.keys())
|
||||
|
||||
def __unicode__(self):
|
||||
obj = _get(self, "_obj")
|
||||
obj = _get(self, OBJ)
|
||||
return text_type(obj)
|
||||
|
||||
def __str__(self):
|
||||
obj = _get(self, "_obj")
|
||||
obj = _get(self, OBJ)
|
||||
return str(obj)
|
||||
|
||||
def __len__(self):
|
||||
obj = _get(self, "_obj")
|
||||
obj = _get(self, OBJ)
|
||||
return len(obj)
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
obj = _get(self, "_obj")
|
||||
obj = _get(self, OBJ)
|
||||
return obj(*args, **kwargs)
|
||||
|
||||
|
||||
register_data(DataObject)
|
||||
|
||||
|
||||
def datawrap(v):
|
||||
type_ = _get(v, "__class__")
|
||||
type_ = _get(v, CLASS)
|
||||
|
||||
if type_ is dict:
|
||||
m = Data()
|
||||
_set(m, SLOT, v) # INJECT m.__dict__=v SO THERE IS NO COPY
|
||||
return m
|
||||
elif type_ is Data:
|
||||
return v
|
||||
elif type_ is DataObject:
|
||||
return v
|
||||
elif type_ is none_type:
|
||||
return None # So we allow `is None`
|
||||
elif type_ is list:
|
||||
return FlatList(v)
|
||||
elif type_ in (Data, DataObject, none_type, FlatList, text_type, binary_type, int, float, Decimal, datetime, date, NullType, none_type):
|
||||
return v
|
||||
elif type_ in generator_types:
|
||||
return (wrap(vv) for vv in v)
|
||||
elif isinstance(v, (text_type, binary_type, int, float, Decimal, datetime, date, Data, FlatList, NullType, none_type)):
|
||||
elif isinstance(v, (text_type, binary_type, int, float, Decimal, datetime, date, FlatList, NullType, Mapping, none_type)):
|
||||
return v
|
||||
elif isinstance(v, Mapping):
|
||||
return DataObject(v)
|
||||
elif hasattr(v, "__data__"):
|
||||
return v.__data__()
|
||||
else:
|
||||
|
|
|
@ -7,14 +7,15 @@
|
|||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
import importlib
|
||||
import sys
|
||||
|
||||
from mo_future import PY2
|
||||
from mo_future import PY2, text_type
|
||||
|
||||
OBJ = text_type("_obj")
|
||||
CLASS = text_type("__class__")
|
||||
|
||||
_Log = None
|
||||
|
||||
|
|
|
@ -8,17 +8,18 @@
|
|||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
import base64
|
||||
from datetime import datetime
|
||||
import io
|
||||
from mimetypes import MimeTypes
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
from datetime import datetime
|
||||
from mimetypes import MimeTypes
|
||||
from tempfile import mkdtemp, NamedTemporaryFile
|
||||
from tempfile import NamedTemporaryFile, mkdtemp
|
||||
|
||||
from mo_dots import get_module, coalesce, Null
|
||||
from mo_future import text_type, binary_type, PY3
|
||||
from mo_logs import Log, Except
|
||||
from mo_dots import Null, coalesce, get_module, is_list
|
||||
from mo_files.url import URL
|
||||
from mo_future import PY3, binary_type, text_type, is_text
|
||||
from mo_logs import Except, Log
|
||||
from mo_logs.exceptions import extract_stack
|
||||
from mo_threads import Thread, Till
|
||||
|
||||
|
@ -42,12 +43,12 @@ class File(object):
|
|||
"""
|
||||
YOU MAY SET filename TO {"path":p, "key":k} FOR CRYPTO FILES
|
||||
"""
|
||||
self._mime_type = mime_type
|
||||
if filename == None:
|
||||
Log.error(u"File must be given a filename")
|
||||
elif isinstance(filename, File):
|
||||
if isinstance(filename, File):
|
||||
return
|
||||
elif isinstance(filename, (binary_type, text_type)):
|
||||
|
||||
self._mime_type = mime_type
|
||||
|
||||
if isinstance(filename, (binary_type, text_type)):
|
||||
try:
|
||||
self.key = None
|
||||
if filename==".":
|
||||
|
@ -272,10 +273,10 @@ class File(object):
|
|||
if not self.parent.exists:
|
||||
self.parent.create()
|
||||
with open(self._filename, "wb") as f:
|
||||
if isinstance(data, list) and self.key:
|
||||
if is_list(data) and self.key:
|
||||
Log.error(u"list of data and keys are not supported, encrypt before sending to file")
|
||||
|
||||
if isinstance(data, list):
|
||||
if is_list(data):
|
||||
pass
|
||||
elif isinstance(data, (binary_type, text_type)):
|
||||
data=[data]
|
||||
|
@ -283,7 +284,7 @@ class File(object):
|
|||
pass
|
||||
|
||||
for d in data:
|
||||
if not isinstance(d, text_type):
|
||||
if not is_text(d):
|
||||
Log.error(u"Expecting unicode data only")
|
||||
if self.key:
|
||||
from mo_math.crypto import encrypt
|
||||
|
@ -317,7 +318,7 @@ class File(object):
|
|||
if not self.parent.exists:
|
||||
self.parent.create()
|
||||
with open(self._filename, "ab") as output_file:
|
||||
if not isinstance(content, text_type):
|
||||
if not is_text(content):
|
||||
Log.error(u"expecting to write unicode only")
|
||||
output_file.write(content.encode(encoding))
|
||||
output_file.write(b"\n")
|
||||
|
@ -440,7 +441,7 @@ class TempDirectory(File):
|
|||
WILL BE DELETED WHEN EXITED
|
||||
"""
|
||||
def __new__(cls):
|
||||
return File.__new__(cls, None)
|
||||
return object.__new__(cls)
|
||||
|
||||
def __init__(self):
|
||||
File.__init__(self, mkdtemp())
|
||||
|
@ -460,7 +461,9 @@ class TempFile(File):
|
|||
def __new__(cls, *args, **kwargs):
|
||||
return object.__new__(cls)
|
||||
|
||||
def __init__(self):
|
||||
def __init__(self, filename=None):
|
||||
if isinstance(filename, File):
|
||||
return
|
||||
self.temp = NamedTemporaryFile(delete=False)
|
||||
self.temp.close()
|
||||
File.__init__(self, self.temp.name)
|
||||
|
|
|
@ -7,11 +7,9 @@
|
|||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
from collections import Mapping
|
||||
|
||||
from mo_dots import wrap, Data, coalesce, Null
|
||||
from mo_future import urlparse, text_type, PY2, unichr
|
||||
from mo_json import value2json, json2value
|
||||
from mo_dots import Data, Null, coalesce, is_data, is_list, wrap
|
||||
from mo_future import PY2, is_text, text_type, unichr, urlparse, is_binary
|
||||
from mo_json import json2value, value2json
|
||||
from mo_logs import Log
|
||||
|
||||
|
||||
|
@ -62,7 +60,7 @@ class URL(object):
|
|||
return False
|
||||
|
||||
def __truediv__(self, other):
|
||||
if not isinstance(other, text_type):
|
||||
if not is_text(other):
|
||||
Log.error(u"Expecting text path")
|
||||
output = self.__copy__()
|
||||
output.path = output.path.rstrip('/') + "/" + other.lstrip('/')
|
||||
|
@ -186,7 +184,7 @@ def url_param2value(param):
|
|||
u = query.get(k)
|
||||
if u is None:
|
||||
query[k] = v
|
||||
elif isinstance(u, list):
|
||||
elif is_list(u):
|
||||
u += [v]
|
||||
else:
|
||||
query[k] = [u, v]
|
||||
|
@ -202,15 +200,15 @@ def value2url_param(value):
|
|||
if value == None:
|
||||
Log.error("Can not encode None into a URL")
|
||||
|
||||
if isinstance(value, Mapping):
|
||||
if is_data(value):
|
||||
value_ = wrap(value)
|
||||
output = "&".join([
|
||||
value2url_param(k) + "=" + (value2url_param(v) if isinstance(v, text_type) else value2url_param(value2json(v)))
|
||||
value2url_param(k) + "=" + (value2url_param(v) if is_text(v) else value2url_param(value2json(v)))
|
||||
for k, v in value_.leaves()
|
||||
])
|
||||
elif isinstance(value, text_type):
|
||||
elif is_text(value):
|
||||
output = "".join(_map2url[c] for c in value.encode('utf8'))
|
||||
elif isinstance(value, str):
|
||||
elif is_binary(value):
|
||||
output = "".join(_map2url[c] for c in value)
|
||||
elif hasattr(value, "__iter__"):
|
||||
output = ",".join(value2url_param(v) for v in value)
|
||||
|
|
|
@ -7,14 +7,11 @@
|
|||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
import json
|
||||
import sys
|
||||
|
||||
|
||||
PY3 = sys.version_info[0] == 3
|
||||
PY2 = sys.version_info[0] == 2
|
||||
|
||||
|
@ -35,6 +32,8 @@ if PY3:
|
|||
from functools import cmp_to_key
|
||||
from configparser import ConfigParser
|
||||
from itertools import zip_longest
|
||||
from functools import reduce
|
||||
import builtins as __builtin__
|
||||
|
||||
izip = zip
|
||||
zip_longest = itertools.zip_longest
|
||||
|
@ -93,6 +92,15 @@ if PY3:
|
|||
def sort_using_key(data, key):
|
||||
return sorted(data, key=key)
|
||||
|
||||
def first(values):
|
||||
return iter(values).__next__()
|
||||
|
||||
def is_text(t):
|
||||
return t.__class__ is str
|
||||
|
||||
def is_binary(b):
|
||||
return b.__class__ is bytes
|
||||
|
||||
utf8_json_encoder = json.JSONEncoder(
|
||||
skipkeys=False,
|
||||
ensure_ascii=False, # DIFF FROM DEFAULTS
|
||||
|
@ -115,6 +123,7 @@ else:
|
|||
from __builtin__ import zip as transpose
|
||||
from itertools import izip
|
||||
|
||||
reduce = __builtin__.reduce
|
||||
text_type = __builtin__.unicode
|
||||
string_types = (str, unicode)
|
||||
binary_type = str
|
||||
|
@ -162,6 +171,15 @@ else:
|
|||
# lambda a, b: (1 if (a[0]>b[0]) else (-1 if (a[0]<b[0]) else 0))
|
||||
# )
|
||||
|
||||
def first(values):
|
||||
return iter(values).next()
|
||||
|
||||
def is_text(t):
|
||||
return t.__class__ is unicode
|
||||
|
||||
def is_binary(b):
|
||||
return b.__class__ is str
|
||||
|
||||
utf8_json_encoder = json.JSONEncoder(
|
||||
skipkeys=False,
|
||||
ensure_ascii=False, # DIFF FROM DEFAULTS
|
||||
|
@ -237,4 +255,4 @@ else:
|
|||
d[key] = value
|
||||
return d
|
||||
|
||||
|
||||
_keep_imports = (ConfigParser, zip_longest, reduce, transpose, izip, HTMLParser, urlparse, StringIO, BytesIO, allocate_lock, get_ident, start_new_thread, interrupt_main)
|
||||
|
|
|
@ -7,27 +7,39 @@
|
|||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from mo_future import is_text, is_binary
|
||||
from datetime import date, datetime, timedelta
|
||||
from decimal import Decimal
|
||||
import math
|
||||
import re
|
||||
from collections import Mapping
|
||||
from datetime import date, timedelta, datetime
|
||||
from decimal import Decimal
|
||||
|
||||
from mo_dots import FlatList, NullType, Data, wrap_leaves, wrap, Null, SLOT
|
||||
from mo_dots import Data, FlatList, Null, NullType, SLOT, is_data, wrap, wrap_leaves
|
||||
from mo_dots.objects import DataObject
|
||||
from mo_future import text_type, none_type, long, binary_type, PY2
|
||||
from mo_logs import Except, strings, Log
|
||||
from mo_future import PY2, is_binary, is_text, items, long, none_type, text_type
|
||||
from mo_logs import Except, Log, strings
|
||||
from mo_logs.strings import expand_template
|
||||
from mo_times import Date, Duration
|
||||
|
||||
FIND_LOOPS = False
|
||||
SNAP_TO_BASE_10 = True # Identify floats near a round base10 value (has 000 or 999) and shorten
|
||||
SNAP_TO_BASE_10 = False # Identify floats near a round base10 value (has 000 or 999) and shorten
|
||||
CAN_NOT_DECODE_JSON = "Can not decode JSON"
|
||||
|
||||
IS_NULL = '0'
|
||||
BOOLEAN = 'boolean'
|
||||
INTEGER = 'integer'
|
||||
NUMBER = 'number'
|
||||
STRING = 'string'
|
||||
OBJECT = 'object'
|
||||
NESTED = "nested"
|
||||
EXISTS = "exists"
|
||||
|
||||
ALL_TYPES = {IS_NULL: IS_NULL, BOOLEAN: BOOLEAN, INTEGER: INTEGER, NUMBER: NUMBER, STRING: STRING, OBJECT: OBJECT, NESTED: NESTED, EXISTS: EXISTS}
|
||||
JSON_TYPES = [BOOLEAN, INTEGER, NUMBER, STRING, OBJECT]
|
||||
PRIMITIVE = [EXISTS, BOOLEAN, INTEGER, NUMBER, STRING]
|
||||
STRUCT = [EXISTS, OBJECT, NESTED]
|
||||
|
||||
|
||||
_get = object.__getattribute__
|
||||
|
||||
|
@ -81,6 +93,7 @@ def float2json(value):
|
|||
|
||||
|
||||
def _snap_to_base_10(mantissa):
|
||||
# TODO: https://lists.nongnu.org/archive/html/gcl-devel/2012-10/pdfkieTlklRzN.pdf
|
||||
digits = mantissa.replace('.', '')
|
||||
if SNAP_TO_BASE_10:
|
||||
f9 = strings.find(digits, '999')
|
||||
|
@ -159,7 +172,7 @@ def _scrub(value, is_done, stack, scrub_text, scrub_number):
|
|||
return scrub_number(value)
|
||||
elif type_ is Data:
|
||||
return _scrub(_get(value, SLOT), is_done, stack, scrub_text, scrub_number)
|
||||
elif isinstance(value, Mapping):
|
||||
elif is_data(value):
|
||||
_id = id(value)
|
||||
if _id in is_done:
|
||||
Log.warning("possible loop in structure detected")
|
||||
|
@ -168,16 +181,16 @@ def _scrub(value, is_done, stack, scrub_text, scrub_number):
|
|||
|
||||
output = {}
|
||||
for k, v in value.items():
|
||||
if isinstance(k, text_type):
|
||||
if is_text(k):
|
||||
pass
|
||||
elif isinstance(k, binary_type):
|
||||
elif is_binary(k):
|
||||
k = k.decode('utf8')
|
||||
# elif hasattr(k, "__unicode__"):
|
||||
# k = text_type(k)
|
||||
else:
|
||||
Log.error("keys must be strings")
|
||||
v = _scrub(v, is_done, stack, scrub_text, scrub_number)
|
||||
if v != None or isinstance(v, Mapping):
|
||||
if v != None or is_data(v):
|
||||
output[k] = v
|
||||
|
||||
is_done.discard(_id)
|
||||
|
@ -187,7 +200,7 @@ def _scrub(value, is_done, stack, scrub_text, scrub_number):
|
|||
for v in value:
|
||||
v = _scrub(v, is_done, stack, scrub_text, scrub_number)
|
||||
output.append(v)
|
||||
return output
|
||||
return output # if output else None
|
||||
elif type_ is type:
|
||||
return value.__name__
|
||||
elif type_.__name__ == "bool_": # DEAR ME! Numpy has it's own booleans (value==False could be used, but 0==False in Python. DOH!)
|
||||
|
@ -276,7 +289,7 @@ def json2value(json_string, params=Null, flexible=False, leaves=False):
|
|||
:param leaves: ASSUME JSON KEYS ARE DOT-DELIMITED
|
||||
:return: Python value
|
||||
"""
|
||||
if not isinstance(json_string, text_type):
|
||||
if not is_text(json_string):
|
||||
Log.error("only unicode json accepted")
|
||||
|
||||
try:
|
||||
|
@ -367,5 +380,45 @@ def datetime2unix(d):
|
|||
Log.error("Can not convert {{value}}", value= d, cause=e)
|
||||
|
||||
|
||||
python_type_to_json_type = {
|
||||
int: NUMBER,
|
||||
text_type: STRING,
|
||||
float: NUMBER,
|
||||
bool: BOOLEAN,
|
||||
NullType: OBJECT,
|
||||
none_type: OBJECT,
|
||||
Data: OBJECT,
|
||||
dict: OBJECT,
|
||||
object: OBJECT,
|
||||
list: NESTED,
|
||||
set: NESTED,
|
||||
# tuple: NESTED, # DO NOT INCLUDE, WILL HIDE LOGIC ERRORS
|
||||
FlatList: NESTED,
|
||||
Date: NUMBER
|
||||
}
|
||||
|
||||
if PY2:
|
||||
python_type_to_json_type[str] = STRING
|
||||
python_type_to_json_type[long] = NUMBER
|
||||
|
||||
for k, v in items(python_type_to_json_type):
|
||||
python_type_to_json_type[k.__name__] = v
|
||||
|
||||
_merge_order = {
|
||||
BOOLEAN: 1,
|
||||
INTEGER: 2,
|
||||
NUMBER: 3,
|
||||
STRING: 4,
|
||||
OBJECT: 5,
|
||||
NESTED: 6
|
||||
}
|
||||
|
||||
|
||||
def _merge_json_type(A, B):
|
||||
a = _merge_order[A]
|
||||
b = _merge_order[B]
|
||||
return A if a >= b else B
|
||||
|
||||
|
||||
from mo_json.decoder import json_decoder
|
||||
from mo_json.encoder import json_encoder, pypy_json_encode
|
||||
|
|
|
@ -7,10 +7,9 @@
|
|||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from mo_future import is_text, is_binary
|
||||
import json
|
||||
|
||||
json_decoder = json.loads
|
||||
|
|
|
@ -7,24 +7,22 @@
|
|||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
import json
|
||||
import math
|
||||
import time
|
||||
from collections import Mapping
|
||||
from datetime import datetime, date, timedelta
|
||||
from mo_future import is_text, is_binary
|
||||
from datetime import date, datetime, timedelta
|
||||
from decimal import Decimal
|
||||
import json
|
||||
from json.encoder import encode_basestring
|
||||
import math
|
||||
from math import floor
|
||||
import time
|
||||
|
||||
from mo_dots import Data, FlatList, NullType, Null, SLOT
|
||||
from mo_future import text_type, binary_type, long, utf8_json_encoder, sort_using_key, xrange, PYPY
|
||||
from mo_json import ESCAPE_DCT, scrub, float2json
|
||||
from mo_dots import Data, FlatList, Null, NullType, SLOT, is_data, is_list
|
||||
from mo_future import PYPY, binary_type, is_binary, is_text, long, sort_using_key, text_type, utf8_json_encoder, xrange
|
||||
from mo_json import ESCAPE_DCT, float2json, scrub
|
||||
from mo_logs import Except
|
||||
from mo_logs.strings import utf82unicode, quote
|
||||
from mo_logs.strings import quote, utf82unicode
|
||||
from mo_times import Timer
|
||||
from mo_times.dates import Date
|
||||
from mo_times.durations import Duration
|
||||
|
@ -111,8 +109,11 @@ class cPythonJSONEncoder(object):
|
|||
try:
|
||||
with Timer("scrub", too_long=0.1):
|
||||
scrubbed = scrub(value)
|
||||
with Timer("encode", too_long=0.1):
|
||||
return text_type(self.encoder(scrubbed))
|
||||
param = {"size": 0}
|
||||
with Timer("encode {{size}} characters", param=param, too_long=0.1):
|
||||
output = text_type(self.encoder(scrubbed))
|
||||
param["size"] = len(output)
|
||||
return output
|
||||
except Exception as e:
|
||||
from mo_logs.exceptions import Except
|
||||
from mo_logs import Log
|
||||
|
@ -198,7 +199,7 @@ def _value2json(value, _buffer):
|
|||
append(_buffer, float2json(value.seconds))
|
||||
elif type is NullType:
|
||||
append(_buffer, u"null")
|
||||
elif isinstance(value, Mapping):
|
||||
elif is_data(value):
|
||||
if not value:
|
||||
append(_buffer, u"{}")
|
||||
else:
|
||||
|
@ -250,7 +251,7 @@ def _dict2json(value, _buffer):
|
|||
for k, v in value.items():
|
||||
append(_buffer, prefix)
|
||||
prefix = COMMA_QUOTE
|
||||
if isinstance(k, binary_type):
|
||||
if is_binary(k):
|
||||
k = utf82unicode(k)
|
||||
for c in k:
|
||||
append(_buffer, ESCAPE_DCT.get(c, c))
|
||||
|
@ -275,21 +276,21 @@ def pretty_json(value):
|
|||
return "false"
|
||||
elif value is True:
|
||||
return "true"
|
||||
elif isinstance(value, Mapping):
|
||||
elif is_data(value):
|
||||
try:
|
||||
items = sort_using_key(value.items(), lambda r: r[0])
|
||||
values = [encode_basestring(k) + PRETTY_COLON + indent(pretty_json(v)).strip() for k, v in items if v != None]
|
||||
values = [encode_basestring(k) + PRETTY_COLON + pretty_json(v) for k, v in items if v != None]
|
||||
if not values:
|
||||
return "{}"
|
||||
elif len(values) == 1:
|
||||
return "{" + values[0] + "}"
|
||||
else:
|
||||
return "{\n" + INDENT + (",\n" + INDENT).join(values) + "\n}"
|
||||
return "{\n" + ",\n".join(indent(v) for v in values) + "\n}"
|
||||
except Exception as e:
|
||||
from mo_logs import Log
|
||||
from mo_math import OR
|
||||
|
||||
if OR(not isinstance(k, text_type) for k in value.keys()):
|
||||
if OR(not is_text(k) for k in value.keys()):
|
||||
Log.error(
|
||||
"JSON must have string keys: {{keys}}:",
|
||||
keys=[k for k in value.keys()],
|
||||
|
@ -303,8 +304,8 @@ def pretty_json(value):
|
|||
)
|
||||
elif value in (None, Null):
|
||||
return "null"
|
||||
elif isinstance(value, (text_type, binary_type)):
|
||||
if isinstance(value, binary_type):
|
||||
elif value.__class__ in (binary_type, text_type):
|
||||
if is_binary(value):
|
||||
value = utf82unicode(value)
|
||||
try:
|
||||
return quote(value)
|
||||
|
@ -330,9 +331,9 @@ def pretty_json(value):
|
|||
Log.note("return value of length {{length}}", length=len(output))
|
||||
return output
|
||||
except BaseException as f:
|
||||
Log.warning("can not even explicit convert {{type}}", type=f.__class__.__name__, cause=f)
|
||||
Log.warning("can not convert {{type}} to json", type=f.__class__.__name__, cause=f)
|
||||
return "null"
|
||||
elif isinstance(value, list):
|
||||
elif is_list(value):
|
||||
if not value:
|
||||
return "[]"
|
||||
|
||||
|
|
|
@ -7,15 +7,13 @@
|
|||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from mo_future import is_text, is_binary
|
||||
import json
|
||||
from collections import Mapping
|
||||
from types import GeneratorType
|
||||
|
||||
from mo_dots import split_field, startswith_field, relative_field, Data, join_field, Null, wrap
|
||||
from mo_dots import Data, Null, is_data, join_field, relative_field, split_field, startswith_field, wrap
|
||||
from mo_logs import Log
|
||||
|
||||
DEBUG = False
|
||||
|
@ -31,7 +29,6 @@ NO_VARS = set()
|
|||
json_decoder = json.JSONDecoder().decode
|
||||
|
||||
|
||||
|
||||
def parse(json, query_path, expected_vars=NO_VARS):
|
||||
"""
|
||||
INTENDED TO TREAT JSON AS A STREAM; USING MINIMAL MEMORY WHILE IT ITERATES
|
||||
|
@ -154,7 +151,7 @@ def parse(json, query_path, expected_vars=NO_VARS):
|
|||
pass
|
||||
elif e == ".":
|
||||
destination[i] = value
|
||||
elif isinstance(value, Mapping):
|
||||
elif is_data(value):
|
||||
destination[i] = value[e]
|
||||
else:
|
||||
destination[i] = Null
|
||||
|
@ -312,7 +309,7 @@ def parse(json, query_path, expected_vars=NO_VARS):
|
|||
c = json[index]
|
||||
return c, index + 1
|
||||
|
||||
if isinstance(query_path, Mapping) and query_path.get("items"):
|
||||
if is_data(query_path) and query_path.get("items"):
|
||||
path_list = split_field(query_path.get("items")) + ["$items"] # INSERT A MARKER SO THAT OBJECT IS STREAM DECODED
|
||||
else:
|
||||
path_list = split_field(query_path)
|
||||
|
|
|
@ -7,20 +7,19 @@
|
|||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
import time
|
||||
from collections import Mapping
|
||||
from mo_future import is_text, is_binary
|
||||
from datetime import date, datetime, timedelta
|
||||
from decimal import Decimal
|
||||
from json.encoder import encode_basestring
|
||||
import time
|
||||
|
||||
from mo_dots import Data, FlatList, NullType, join_field, split_field, _get, SLOT, DataObject
|
||||
from mo_future import text_type, binary_type, sort_using_key, long, PY2, none_type, generator_types
|
||||
from mo_json import ESCAPE_DCT, float2json
|
||||
from mo_json.encoder import UnicodeBuilder, COLON, COMMA, problem_serializing, json_encoder
|
||||
from mo_dots import CLASS, Data, DataObject, FlatList, NullType, SLOT, _get, is_data, join_field, split_field
|
||||
from mo_dots.objects import OBJ
|
||||
from mo_future import binary_type, generator_types, is_binary, is_text, long, sort_using_key, text_type
|
||||
from mo_json import BOOLEAN, ESCAPE_DCT, EXISTS, INTEGER, NESTED, NUMBER, STRING, float2json, python_type_to_json_type
|
||||
from mo_json.encoder import COLON, COMMA, UnicodeBuilder, json_encoder, problem_serializing
|
||||
from mo_logs import Log
|
||||
from mo_logs.strings import quote, utf82unicode
|
||||
from mo_times import Date, Duration
|
||||
|
@ -45,11 +44,19 @@ def untype_path(encoded):
|
|||
|
||||
def unnest_path(encoded):
|
||||
if encoded.startswith(".."):
|
||||
encoded = encoded.lstrip(".")
|
||||
if not encoded:
|
||||
encoded = "."
|
||||
remainder = encoded.lstrip(".")
|
||||
back = len(encoded) - len(remainder)
|
||||
return ("." * back) + unnest_path(remainder)
|
||||
|
||||
return join_field(decode_property(c) for c in split_field(encoded) if c != NESTED_TYPE)
|
||||
path = split_field(encoded)
|
||||
if not path:
|
||||
return "."
|
||||
if path[-1] == NESTED_TYPE:
|
||||
path = path[:-1]
|
||||
if not path:
|
||||
return "."
|
||||
|
||||
return join_field([decode_property(c) for c in path[:-1] if not c.startswith(TYPE_PREFIX)] + [decode_property(path[-1])])
|
||||
|
||||
|
||||
def untyped(value):
|
||||
|
@ -57,7 +64,7 @@ def untyped(value):
|
|||
|
||||
|
||||
def _untype_list(value):
|
||||
if any(isinstance(v, Mapping) for v in value):
|
||||
if any(is_data(v) for v in value):
|
||||
# MAY BE MORE TYPED OBJECTS IN THIS LIST
|
||||
output = [_untype_value(v) for v in value]
|
||||
else:
|
||||
|
@ -91,7 +98,7 @@ def _untype_dict(value):
|
|||
|
||||
|
||||
def _untype_value(value):
|
||||
_type = _get(value, "__class__")
|
||||
_type = _get(value, CLASS)
|
||||
if _type is Data:
|
||||
return _untype_dict(_get(value, SLOT))
|
||||
elif _type is dict:
|
||||
|
@ -103,7 +110,7 @@ def _untype_value(value):
|
|||
elif _type is NullType:
|
||||
return None
|
||||
elif _type is DataObject:
|
||||
return _untype_value(_get(value, "_obj"))
|
||||
return _untype_value(_get(value, OBJ))
|
||||
elif _type in generator_types:
|
||||
return _untype_list(value)
|
||||
else:
|
||||
|
@ -144,7 +151,7 @@ def typed_encode(value, sub_schema, path, net_new_properties, buffer):
|
|||
else:
|
||||
from mo_logs import Log
|
||||
|
||||
Log.error("Can not store {{value}} in {{column|quote}}", value=value, column=sub_schema.names['.'])
|
||||
Log.error("Can not store {{value}} in {{column|quote}}", value=value, column=sub_schema.name)
|
||||
|
||||
sub_schema = {json_type_to_inserter_type[value_json_type]: sub_schema}
|
||||
|
||||
|
@ -186,10 +193,15 @@ def typed_encode(value, sub_schema, path, net_new_properties, buffer):
|
|||
append(buffer, text_type(len(value)))
|
||||
append(buffer, '}')
|
||||
else:
|
||||
# SINGLETON LISTS OF null SHOULD NOT EXIST
|
||||
from mo_logs import Log
|
||||
|
||||
Log.error("should not happen")
|
||||
# SINGLETON LIST
|
||||
append(buffer, '{')
|
||||
append(buffer, QUOTED_NESTED_TYPE)
|
||||
append(buffer, '[{')
|
||||
append(buffer, QUOTED_EXISTS_TYPE)
|
||||
append(buffer, '1}]')
|
||||
append(buffer, COMMA)
|
||||
append(buffer, QUOTED_EXISTS_TYPE)
|
||||
append(buffer, '1}')
|
||||
else:
|
||||
if EXISTS_TYPE not in sub_schema:
|
||||
sub_schema[EXISTS_TYPE] = {}
|
||||
|
@ -200,7 +212,7 @@ def typed_encode(value, sub_schema, path, net_new_properties, buffer):
|
|||
else:
|
||||
append(buffer, '{')
|
||||
append(buffer, QUOTED_EXISTS_TYPE)
|
||||
append(buffer, '0}')
|
||||
append(buffer, '1}')
|
||||
elif _type is binary_type:
|
||||
if STRING_TYPE not in sub_schema:
|
||||
sub_schema[STRING_TYPE] = True
|
||||
|
@ -246,16 +258,27 @@ def typed_encode(value, sub_schema, path, net_new_properties, buffer):
|
|||
elif _type in (set, list, tuple, FlatList):
|
||||
if len(value) == 0:
|
||||
append(buffer, '{')
|
||||
append(buffer, QUOTED_NESTED_TYPE)
|
||||
append(buffer, '[]}')
|
||||
elif any(isinstance(v, (Mapping, set, list, tuple, FlatList)) for v in value):
|
||||
if NESTED_TYPE not in sub_schema:
|
||||
sub_schema[NESTED_TYPE] = {}
|
||||
net_new_properties.append(path + [NESTED_TYPE])
|
||||
append(buffer, '{')
|
||||
append(buffer, QUOTED_NESTED_TYPE)
|
||||
_list2json(value, sub_schema[NESTED_TYPE], path + [NESTED_TYPE], net_new_properties, buffer)
|
||||
append(buffer, '}')
|
||||
append(buffer, QUOTED_EXISTS_TYPE)
|
||||
append(buffer, '0}')
|
||||
elif any(v.__class__ in (Data, dict, set, list, tuple, FlatList) for v in value):
|
||||
# THIS IS NOT DONE BECAUSE
|
||||
if len(value) == 1:
|
||||
if NESTED_TYPE in sub_schema:
|
||||
append(buffer, '{')
|
||||
append(buffer, QUOTED_NESTED_TYPE)
|
||||
_list2json(value, sub_schema[NESTED_TYPE], path + [NESTED_TYPE], net_new_properties, buffer)
|
||||
append(buffer, '}')
|
||||
else:
|
||||
# NO NEED TO NEST, SO DO NOT DO IT
|
||||
typed_encode(value[0], sub_schema, path, net_new_properties, buffer)
|
||||
else:
|
||||
if NESTED_TYPE not in sub_schema:
|
||||
sub_schema[NESTED_TYPE] = {}
|
||||
net_new_properties.append(path + [NESTED_TYPE])
|
||||
append(buffer, '{')
|
||||
append(buffer, QUOTED_NESTED_TYPE)
|
||||
_list2json(value, sub_schema[NESTED_TYPE], path + [NESTED_TYPE], net_new_properties, buffer)
|
||||
append(buffer, '}')
|
||||
else:
|
||||
# ALLOW PRIMITIVE MULTIVALUES
|
||||
value = [v for v in value if v != None]
|
||||
|
@ -390,9 +413,9 @@ def _dict2json(value, sub_schema, path, net_new_properties, buffer):
|
|||
continue
|
||||
append(buffer, prefix)
|
||||
prefix = COMMA
|
||||
if isinstance(k, binary_type):
|
||||
if is_binary(k):
|
||||
k = utf82unicode(k)
|
||||
if not isinstance(k, text_type):
|
||||
if not is_text(k):
|
||||
Log.error("Expecting property name to be a string")
|
||||
if k not in sub_schema:
|
||||
sub_schema[k] = {}
|
||||
|
@ -410,40 +433,6 @@ def _dict2json(value, sub_schema, path, net_new_properties, buffer):
|
|||
append(buffer, '1}')
|
||||
|
||||
|
||||
IS_NULL = '0'
|
||||
BOOLEAN = 'boolean'
|
||||
INTEGER = 'integer'
|
||||
NUMBER = 'number'
|
||||
STRING = 'string'
|
||||
OBJECT = 'object'
|
||||
NESTED = "nested"
|
||||
EXISTS = "exists"
|
||||
|
||||
JSON_TYPES = [BOOLEAN, INTEGER, NUMBER, STRING, OBJECT]
|
||||
PRIMITIVE = [EXISTS, BOOLEAN, INTEGER, NUMBER, STRING]
|
||||
STRUCT = [EXISTS, OBJECT, NESTED]
|
||||
|
||||
|
||||
python_type_to_json_type = {
|
||||
int: NUMBER,
|
||||
text_type: STRING,
|
||||
float: NUMBER,
|
||||
None: OBJECT,
|
||||
bool: BOOLEAN,
|
||||
NullType: OBJECT,
|
||||
none_type: OBJECT,
|
||||
Data: OBJECT,
|
||||
dict: OBJECT,
|
||||
object: OBJECT,
|
||||
Mapping: OBJECT,
|
||||
list: NESTED,
|
||||
FlatList: NESTED,
|
||||
Date: NUMBER
|
||||
}
|
||||
|
||||
if PY2:
|
||||
python_type_to_json_type[str] = STRING
|
||||
python_type_to_json_type[long] = NUMBER
|
||||
|
||||
|
||||
TYPE_PREFIX = "~" # u'\u0442\u0443\u0440\u0435-' # "туре"
|
||||
|
@ -461,6 +450,12 @@ QUOTED_STRING_TYPE = quote(STRING_TYPE) + COLON
|
|||
QUOTED_NESTED_TYPE = quote(NESTED_TYPE) + COLON
|
||||
QUOTED_EXISTS_TYPE = quote(EXISTS_TYPE) + COLON
|
||||
|
||||
inserter_type_to_json_type = {
|
||||
BOOLEAN_TYPE: BOOLEAN,
|
||||
NUMBER_TYPE: NUMBER,
|
||||
STRING_TYPE: STRING
|
||||
}
|
||||
|
||||
json_type_to_inserter_type = {
|
||||
BOOLEAN: BOOLEAN_TYPE,
|
||||
INTEGER: NUMBER_TYPE,
|
||||
|
|
|
@ -8,21 +8,19 @@
|
|||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from mo_future import is_text, is_binary
|
||||
import os
|
||||
from collections import Mapping
|
||||
|
||||
import mo_dots
|
||||
from mo_dots import set_default, wrap, unwrap
|
||||
from mo_dots import is_data, is_list, set_default, unwrap, wrap
|
||||
from mo_files import File
|
||||
from mo_files.url import URL
|
||||
from mo_future import text_type
|
||||
from mo_json import json2value
|
||||
from mo_json_config.convert import ini2value
|
||||
from mo_logs import Log, Except
|
||||
from mo_logs import Except, Log
|
||||
|
||||
DEBUG = False
|
||||
|
||||
|
@ -86,7 +84,7 @@ def _replace_ref(node, url):
|
|||
if url.path.endswith("/"):
|
||||
url.path = url.path[:-1]
|
||||
|
||||
if isinstance(node, Mapping):
|
||||
if is_data(node):
|
||||
ref = None
|
||||
output = {}
|
||||
for k, v in node.items():
|
||||
|
@ -123,7 +121,7 @@ def _replace_ref(node, url):
|
|||
|
||||
if not output:
|
||||
output = new_value
|
||||
elif isinstance(output, text_type):
|
||||
elif is_text(output):
|
||||
Log.error("Can not handle set_default({{output}},{{new_value}})", output=output, new_value=new_value)
|
||||
else:
|
||||
output = unwrap(set_default(output, new_value))
|
||||
|
@ -131,7 +129,7 @@ def _replace_ref(node, url):
|
|||
DEBUG and Log.note("Return {{output}}", output=output)
|
||||
|
||||
return output
|
||||
elif isinstance(node, list):
|
||||
elif is_list(node):
|
||||
output = [_replace_ref(n, url) for n in node]
|
||||
# if all(p[0] is p[1] for p in zip(output, node)):
|
||||
# return node
|
||||
|
@ -141,7 +139,7 @@ def _replace_ref(node, url):
|
|||
|
||||
|
||||
def _replace_locals(node, doc_path):
|
||||
if isinstance(node, Mapping):
|
||||
if is_data(node):
|
||||
# RECURS, DEEP COPY
|
||||
ref = None
|
||||
output = {}
|
||||
|
@ -179,7 +177,7 @@ def _replace_locals(node, doc_path):
|
|||
else:
|
||||
return unwrap(set_default(output, new_value))
|
||||
|
||||
elif isinstance(node, list):
|
||||
elif is_list(node):
|
||||
candidate = [_replace_locals(n, [n] + doc_path) for n in node]
|
||||
# if all(p[0] is p[1] for p in zip(candidate, node)):
|
||||
# return node
|
||||
|
|
|
@ -8,12 +8,11 @@
|
|||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from mo_future import StringIO, ConfigParser
|
||||
from mo_future import is_text, is_binary
|
||||
from mo_dots import wrap
|
||||
from mo_future import ConfigParser, StringIO
|
||||
|
||||
|
||||
def ini2value(ini_content):
|
||||
|
|
|
@ -7,14 +7,11 @@
|
|||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from collections import Mapping
|
||||
|
||||
from mo_dots import zip as dict_zip, get_logger, wrap
|
||||
from mo_future import text_type, get_function_arguments, get_function_defaults, get_function_name
|
||||
from mo_future import is_text, is_binary
|
||||
from mo_dots import get_logger, is_data, wrap, zip as dict_zip
|
||||
from mo_future import get_function_arguments, get_function_defaults, get_function_name, text_type
|
||||
from mo_logs import Except
|
||||
|
||||
|
||||
|
@ -74,25 +71,26 @@ def override(func):
|
|||
elif func_name in ("__init__", "__new__"):
|
||||
def w_constructor(*args, **kwargs):
|
||||
if "kwargs" in kwargs:
|
||||
packed = params_pack(params, kwargs, dict_zip(params, args), kwargs["kwargs"], defaults)
|
||||
elif len(args) == 2 and len(kwargs) == 0 and isinstance(args[1], Mapping):
|
||||
packed = params_pack(params, dict_zip(params[1:], args[1:]), kwargs, kwargs["kwargs"], defaults)
|
||||
elif len(args) == 2 and len(kwargs) == 0 and is_data(args[1]):
|
||||
# ASSUME SECOND UNNAMED PARAM IS kwargs
|
||||
packed = params_pack(params, args[1], defaults)
|
||||
else:
|
||||
# DO NOT INCLUDE self IN kwargs
|
||||
packed = params_pack(params, kwargs, dict_zip(params, args), defaults)
|
||||
packed = params_pack(params, dict_zip(params[1:], args[1:]), kwargs, defaults)
|
||||
try:
|
||||
return func(**packed)
|
||||
return func(args[0], **packed)
|
||||
except TypeError as e:
|
||||
packed['self'] = args[0] # DO NOT SAY IS MISSING
|
||||
raise_error(e, packed)
|
||||
return w_constructor
|
||||
|
||||
elif params[0] == "self":
|
||||
def w_bound_method(*args, **kwargs):
|
||||
if len(args) == 2 and len(kwargs) == 0 and isinstance(args[1], Mapping):
|
||||
if len(args) == 2 and len(kwargs) == 0 and is_data(args[1]):
|
||||
# ASSUME SECOND UNNAMED PARAM IS kwargs
|
||||
packed = params_pack(params, args[1], defaults)
|
||||
elif "kwargs" in kwargs and isinstance(kwargs["kwargs"], Mapping):
|
||||
elif "kwargs" in kwargs and is_data(kwargs["kwargs"]):
|
||||
# PUT args INTO kwargs
|
||||
packed = params_pack(params, kwargs, dict_zip(params[1:], args[1:]), kwargs["kwargs"], defaults)
|
||||
else:
|
||||
|
@ -105,10 +103,10 @@ def override(func):
|
|||
|
||||
else:
|
||||
def w_kwargs(*args, **kwargs):
|
||||
if len(args) == 1 and len(kwargs) == 0 and isinstance(args[0], Mapping):
|
||||
if len(args) == 1 and len(kwargs) == 0 and is_data(args[0]):
|
||||
# ASSUME SINGLE PARAMETER IS kwargs
|
||||
packed = params_pack(params, args[0], defaults)
|
||||
elif "kwargs" in kwargs and isinstance(kwargs["kwargs"], Mapping):
|
||||
elif "kwargs" in kwargs and is_data(kwargs["kwargs"]):
|
||||
# PUT args INTO kwargs
|
||||
packed = params_pack(params, kwargs, dict_zip(params, args), kwargs["kwargs"], defaults)
|
||||
else:
|
||||
|
@ -127,6 +125,8 @@ def params_pack(params, *args):
|
|||
if a == None:
|
||||
continue
|
||||
for k, v in a.items():
|
||||
if v == None:
|
||||
continue
|
||||
k = text_type(k)
|
||||
if k in settings:
|
||||
continue
|
||||
|
|
|
@ -7,21 +7,19 @@
|
|||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from mo_future import is_text, is_binary
|
||||
from datetime import datetime
|
||||
import os
|
||||
import platform
|
||||
import sys
|
||||
from collections import Mapping
|
||||
from datetime import datetime
|
||||
|
||||
from mo_dots import coalesce, listwrap, wrap, unwrap, unwraplist, set_default, FlatList
|
||||
from mo_future import text_type, PY3
|
||||
from mo_dots import Data, FlatList, coalesce, is_data, is_list, listwrap, unwraplist, wrap
|
||||
from mo_future import PY3, text_type
|
||||
from mo_logs import constants
|
||||
from mo_logs.exceptions import Except, suppress_exception
|
||||
from mo_logs.strings import indent
|
||||
from mo_logs.exceptions import Except, LogItem, suppress_exception
|
||||
from mo_logs.strings import CR, indent
|
||||
|
||||
_Thread = None
|
||||
if PY3:
|
||||
|
@ -30,7 +28,6 @@ else:
|
|||
STDOUT = sys.stdout
|
||||
|
||||
|
||||
|
||||
class Log(object):
|
||||
"""
|
||||
FOR STRUCTURED LOGGING AND EXCEPTION CHAINING
|
||||
|
@ -78,7 +75,7 @@ class Log(object):
|
|||
from mo_threads import profiles
|
||||
profiles.enable_profilers(settings.cprofile.filename)
|
||||
|
||||
if settings.profile is True or (isinstance(settings.profile, Mapping) and settings.profile.enabled):
|
||||
if settings.profile is True or (is_data(settings.profile) and settings.profile.enabled):
|
||||
Log.error("REMOVED 2018-09-02, Activedata revision 3f30ff46f5971776f8ba18")
|
||||
# from mo_logs import profiles
|
||||
#
|
||||
|
@ -175,38 +172,20 @@ class Log(object):
|
|||
:param more_params: *any more parameters (which will overwrite default_params)
|
||||
:return:
|
||||
"""
|
||||
if not isinstance(template, text_type):
|
||||
timestamp = datetime.utcnow()
|
||||
if not is_text(template):
|
||||
Log.error("Log.note was expecting a unicode template")
|
||||
|
||||
if len(template) > 10000:
|
||||
template = template[:10000]
|
||||
|
||||
params = dict(unwrap(default_params), **more_params)
|
||||
|
||||
log_params = set_default({
|
||||
"template": template,
|
||||
"params": params,
|
||||
"timestamp": datetime.utcnow(),
|
||||
"machine": machine_metadata
|
||||
}, log_context, {"context": exceptions.NOTE})
|
||||
|
||||
if not template.startswith("\n") and template.find("\n") > -1:
|
||||
template = "\n" + template
|
||||
|
||||
if cls.trace:
|
||||
log_template = "{{machine.name}} (pid {{machine.pid}}) - {{timestamp|datetime}} - {{thread.name}} - \"{{location.file}}:{{location.line}}\" ({{location.method}}) - " + template.replace("{{", "{{params.")
|
||||
f = sys._getframe(stack_depth + 1)
|
||||
log_params.location = {
|
||||
"line": f.f_lineno,
|
||||
"file": text_type(f.f_code.co_filename.split(os.sep)[-1]),
|
||||
"method": text_type(f.f_code.co_name)
|
||||
}
|
||||
thread = _Thread.current()
|
||||
log_params.thread = {"name": thread.name, "id": thread.id}
|
||||
else:
|
||||
log_template = "{{timestamp|datetime}} - " + template.replace("{{", "{{params.")
|
||||
|
||||
cls.main_log.write(log_template, log_params)
|
||||
Log._annotate(
|
||||
LogItem(
|
||||
context=exceptions.NOTE,
|
||||
format=template,
|
||||
template=template,
|
||||
params=dict(default_params, **more_params)
|
||||
),
|
||||
timestamp,
|
||||
stack_depth+1
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def unexpected(
|
||||
|
@ -227,22 +206,26 @@ class Log(object):
|
|||
:param more_params: *any more parameters (which will overwrite default_params)
|
||||
:return:
|
||||
"""
|
||||
timestamp = datetime.utcnow()
|
||||
if not is_text(template):
|
||||
Log.error("Log.warning was expecting a unicode template")
|
||||
|
||||
if isinstance(default_params, BaseException):
|
||||
cause = default_params
|
||||
default_params = {}
|
||||
|
||||
params = dict(unwrap(default_params), **more_params)
|
||||
if "values" in more_params.keys():
|
||||
Log.error("Can not handle a logging parameter by name `values`")
|
||||
|
||||
if cause and not isinstance(cause, Except):
|
||||
cause = Except(exceptions.UNEXPECTED, text_type(cause), trace=exceptions._extract_traceback(0))
|
||||
params = Data(dict(default_params, **more_params))
|
||||
cause = unwraplist([Except.wrap(c) for c in listwrap(cause)])
|
||||
trace = exceptions.extract_stack(stack_depth + 1)
|
||||
|
||||
trace = exceptions.extract_stack(1)
|
||||
e = Except(type=exceptions.UNEXPECTED, template=template, params=params, cause=cause, trace=trace)
|
||||
Log.note(
|
||||
"{{error}}",
|
||||
error=e,
|
||||
log_context=set_default({"context": exceptions.WARNING}, log_context),
|
||||
stack_depth=stack_depth + 1
|
||||
e = Except(exceptions.UNEXPECTED, template=template, params=params, cause=cause, trace=trace)
|
||||
Log._annotate(
|
||||
e,
|
||||
timestamp,
|
||||
stack_depth+1
|
||||
)
|
||||
|
||||
@classmethod
|
||||
|
@ -259,44 +242,23 @@ class Log(object):
|
|||
:param default_params: *dict* parameters to fill in template
|
||||
:param stack_depth: *int* how many calls you want popped off the stack to report the *true* caller
|
||||
:param log_context: *dict* extra key:value pairs for your convenience
|
||||
:param more_params: *any more parameters (which will overwrite default_params)
|
||||
:param more_params: more parameters (which will overwrite default_params)
|
||||
:return:
|
||||
"""
|
||||
# USE replace() AS POOR MAN'S CHILD TEMPLATE
|
||||
|
||||
template = ("*" * 80) + "\n" + indent(template, prefix="** ").strip() + "\n" + ("*" * 80)
|
||||
Log.note(
|
||||
template,
|
||||
default_params=default_params,
|
||||
stack_depth=stack_depth + 1,
|
||||
log_context=set_default({"context": exceptions.ALARM}, log_context),
|
||||
**more_params
|
||||
timestamp = datetime.utcnow()
|
||||
format = ("*" * 80) + CR + indent(template, prefix="** ").strip() + CR + ("*" * 80)
|
||||
Log._annotate(
|
||||
LogItem(
|
||||
context=exceptions.ALARM,
|
||||
format=format,
|
||||
template=template,
|
||||
params=dict(default_params, **more_params)
|
||||
),
|
||||
timestamp,
|
||||
stack_depth + 1
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def alert(
|
||||
cls,
|
||||
template,
|
||||
default_params={},
|
||||
stack_depth=0,
|
||||
log_context=None,
|
||||
**more_params
|
||||
):
|
||||
"""
|
||||
:param template: *string* human readable string with placeholders for parameters
|
||||
:param default_params: *dict* parameters to fill in template
|
||||
:param stack_depth: *int* how many calls you want popped off the stack to report the *true* caller
|
||||
:param log_context: *dict* extra key:value pairs for your convenience
|
||||
:param more_params: *any more parameters (which will overwrite default_params)
|
||||
:return:
|
||||
"""
|
||||
return Log.alarm(
|
||||
template,
|
||||
default_params=default_params,
|
||||
stack_depth=stack_depth + 1,
|
||||
log_context=set_default({"context": exceptions.ALARM}, log_context),
|
||||
**more_params
|
||||
)
|
||||
alert = alarm
|
||||
|
||||
@classmethod
|
||||
def warning(
|
||||
|
@ -317,7 +279,8 @@ class Log(object):
|
|||
:param more_params: *any more parameters (which will overwrite default_params)
|
||||
:return:
|
||||
"""
|
||||
if not isinstance(template, text_type):
|
||||
timestamp = datetime.utcnow()
|
||||
if not is_text(template):
|
||||
Log.error("Log.warning was expecting a unicode template")
|
||||
|
||||
if isinstance(default_params, BaseException):
|
||||
|
@ -326,19 +289,18 @@ class Log(object):
|
|||
|
||||
if "values" in more_params.keys():
|
||||
Log.error("Can not handle a logging parameter by name `values`")
|
||||
params = dict(unwrap(default_params), **more_params)
|
||||
|
||||
params = Data(dict(default_params, **more_params))
|
||||
cause = unwraplist([Except.wrap(c) for c in listwrap(cause)])
|
||||
trace = exceptions.extract_stack(stack_depth + 1)
|
||||
|
||||
e = Except(type=exceptions.WARNING, template=template, params=params, cause=cause, trace=trace)
|
||||
Log.note(
|
||||
"{{error|unicode}}",
|
||||
error=e,
|
||||
log_context=set_default({"context": exceptions.WARNING}, log_context),
|
||||
stack_depth=stack_depth + 1
|
||||
e = Except(exceptions.WARNING, template=template, params=params, cause=cause, trace=trace)
|
||||
Log._annotate(
|
||||
e,
|
||||
timestamp,
|
||||
stack_depth+1
|
||||
)
|
||||
|
||||
|
||||
@classmethod
|
||||
def error(
|
||||
cls,
|
||||
|
@ -359,7 +321,7 @@ class Log(object):
|
|||
:param more_params: *any more parameters (which will overwrite default_params)
|
||||
:return:
|
||||
"""
|
||||
if not isinstance(template, text_type):
|
||||
if not is_text(template):
|
||||
sys.stderr.write(str("Log.error was expecting a unicode template"))
|
||||
Log.error("Log.error was expecting a unicode template")
|
||||
|
||||
|
@ -367,12 +329,12 @@ class Log(object):
|
|||
cause = default_params
|
||||
default_params = {}
|
||||
|
||||
params = dict(unwrap(default_params), **more_params)
|
||||
params = Data(dict(default_params, **more_params))
|
||||
|
||||
add_to_trace = False
|
||||
if cause == None:
|
||||
causes = None
|
||||
elif isinstance(cause, list):
|
||||
elif is_list(cause):
|
||||
causes = []
|
||||
for c in listwrap(cause): # CAN NOT USE LIST-COMPREHENSION IN PYTHON3 (EXTRA STACK DEPTH FROM THE IN-LINED GENERATOR)
|
||||
causes.append(Except.wrap(c, stack_depth=1))
|
||||
|
@ -388,55 +350,47 @@ class Log(object):
|
|||
if add_to_trace:
|
||||
cause[0].trace.extend(trace[1:])
|
||||
|
||||
e = Except(type=exceptions.ERROR, template=template, params=params, cause=causes, trace=trace)
|
||||
e = Except(context=exceptions.ERROR, template=template, params=params, cause=causes, trace=trace)
|
||||
raise_from_none(e)
|
||||
|
||||
@classmethod
|
||||
def fatal(
|
||||
def _annotate(
|
||||
cls,
|
||||
template, # human readable template
|
||||
default_params={}, # parameters for template
|
||||
cause=None, # pausible cause
|
||||
stack_depth=0,
|
||||
log_context=None,
|
||||
**more_params
|
||||
item,
|
||||
timestamp,
|
||||
stack_depth
|
||||
):
|
||||
"""
|
||||
SEND TO STDERR
|
||||
|
||||
:param template: *string* human readable string with placeholders for parameters
|
||||
:param default_params: *dict* parameters to fill in template
|
||||
:param cause: *Exception* for chaining
|
||||
:param stack_depth: *int* how many calls you want popped off the stack to report the *true* caller
|
||||
:param log_context: *dict* extra key:value pairs for your convenience
|
||||
:param more_params: *any more parameters (which will overwrite default_params)
|
||||
:param itemt: A LogItemTHE TYPE OF MESSAGE
|
||||
:param stack_depth: FOR TRACKING WHAT LINE THIS CAME FROM
|
||||
:return:
|
||||
"""
|
||||
if default_params and isinstance(listwrap(default_params)[0], BaseException):
|
||||
cause = default_params
|
||||
default_params = {}
|
||||
item.timestamp = timestamp
|
||||
item.machine = machine_metadata
|
||||
item.template = strings.limit(item.template, 10000)
|
||||
|
||||
params = dict(unwrap(default_params), **more_params)
|
||||
item.format = strings.limit(item.format, 10000)
|
||||
if item.format == None:
|
||||
format = text_type(item)
|
||||
else:
|
||||
format = item.format.replace("{{", "{{params.")
|
||||
if not format.startswith(CR) and format.find(CR) > -1:
|
||||
format = CR + format
|
||||
|
||||
cause = unwraplist([Except.wrap(c) for c in listwrap(cause)])
|
||||
trace = exceptions.extract_stack(stack_depth + 1)
|
||||
|
||||
e = Except(type=exceptions.ERROR, template=template, params=params, cause=cause, trace=trace)
|
||||
|
||||
error_mode = cls.error_mode
|
||||
with suppress_exception:
|
||||
if not error_mode:
|
||||
cls.error_mode = True
|
||||
Log.note(
|
||||
"{{error|unicode}}",
|
||||
error=e,
|
||||
log_context=set_default({"context": exceptions.FATAL}, log_context),
|
||||
stack_depth=stack_depth + 1
|
||||
)
|
||||
cls.error_mode = error_mode
|
||||
|
||||
sys.stderr.write(str(e))
|
||||
if cls.trace:
|
||||
log_format = item.format = "{{machine.name}} (pid {{machine.pid}}) - {{timestamp|datetime}} - {{thread.name}} - \"{{location.file}}:{{location.line}}\" - ({{location.method}}) - " + format
|
||||
f = sys._getframe(stack_depth + 1)
|
||||
item.location = {
|
||||
"line": f.f_lineno,
|
||||
"file": text_type(f.f_code.co_filename),
|
||||
"method": text_type(f.f_code.co_name)
|
||||
}
|
||||
thread = _Thread.current()
|
||||
item.thread = {"name": thread.name, "id": thread.id}
|
||||
else:
|
||||
log_format = item.format = "{{timestamp|datetime}} - " + format
|
||||
|
||||
cls.main_log.write(log_format, item.__data__())
|
||||
|
||||
def write(self):
|
||||
raise NotImplementedError
|
||||
|
|
|
@ -6,14 +6,12 @@
|
|||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from mo_future import is_text, is_binary
|
||||
import sys
|
||||
|
||||
from mo_dots import set_attr as mo_dots_set_attr
|
||||
from mo_dots import wrap, join_field, split_field
|
||||
from mo_dots import join_field, set_attr as mo_dots_set_attr, split_field, wrap
|
||||
|
||||
DEBUG = True
|
||||
|
||||
|
|
|
@ -8,14 +8,13 @@
|
|||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from mo_future import is_text, is_binary
|
||||
from datetime import date, datetime
|
||||
import json as _json
|
||||
from datetime import datetime, date
|
||||
|
||||
from mo_future import text_type, PY3
|
||||
from mo_future import PY3
|
||||
|
||||
|
||||
def unix2datetime(u):
|
||||
|
|
|
@ -9,17 +9,14 @@
|
|||
#
|
||||
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from mo_future import is_text, is_binary
|
||||
import sys
|
||||
from collections import Mapping
|
||||
|
||||
from mo_dots import Data, listwrap, unwraplist, set_default, Null, coalesce
|
||||
from mo_future import text_type, PY3
|
||||
from mo_logs.strings import indent, expand_template
|
||||
|
||||
from mo_dots import Data, Null, is_data, listwrap, unwraplist
|
||||
from mo_future import PY3, text_type
|
||||
from mo_logs.strings import CR, expand_template, indent
|
||||
|
||||
FATAL = "FATAL"
|
||||
ERROR = "ERROR"
|
||||
|
@ -29,27 +26,47 @@ UNEXPECTED = "UNEXPECTED"
|
|||
NOTE = "NOTE"
|
||||
|
||||
|
||||
class Except(Exception):
|
||||
class LogItem(object):
|
||||
|
||||
def __init__(self, context, format, template, params):
|
||||
self.context = context
|
||||
self.format = format
|
||||
self.template = template
|
||||
self.params = params
|
||||
|
||||
def __data__(self):
|
||||
return Data(self.__dict__)
|
||||
|
||||
|
||||
class Except(Exception, LogItem):
|
||||
|
||||
@staticmethod
|
||||
def new_instance(desc):
|
||||
return Except(
|
||||
type=desc.type,
|
||||
context=desc.context,
|
||||
template=desc.template,
|
||||
params=desc.params,
|
||||
cause=[Except.new_instance(c) for c in listwrap(desc.cause)],
|
||||
trace=desc.trace
|
||||
)
|
||||
|
||||
def __init__(self, type=ERROR, template=Null, params=Null, cause=Null, trace=Null, **kwargs):
|
||||
Exception.__init__(self)
|
||||
self.type = type
|
||||
self.template = template
|
||||
self.params = set_default(kwargs, params)
|
||||
def __init__(self, context=ERROR, template=Null, params=Null, cause=Null, trace=Null, **_):
|
||||
if context == None:
|
||||
raise ValueError("expecting context to not be None")
|
||||
|
||||
self.cause = Except.wrap(cause)
|
||||
|
||||
Exception.__init__(self)
|
||||
LogItem.__init__(
|
||||
self,
|
||||
context=context,
|
||||
format=None,
|
||||
template=template,
|
||||
params=params
|
||||
)
|
||||
|
||||
if not trace:
|
||||
self.trace=extract_stack(2)
|
||||
self.trace = extract_stack(2)
|
||||
else:
|
||||
self.trace = trace
|
||||
|
||||
|
@ -66,7 +83,7 @@ class Except(Exception):
|
|||
return Null
|
||||
elif isinstance(e, (list, Except)):
|
||||
return e
|
||||
elif isinstance(e, Mapping):
|
||||
elif is_data(e):
|
||||
e.cause = unwraplist([Except.wrap(c) for c in listwrap(e.cause)])
|
||||
return Except(**e)
|
||||
else:
|
||||
|
@ -78,9 +95,9 @@ class Except(Exception):
|
|||
|
||||
cause = Except.wrap(getattr(e, '__cause__', None))
|
||||
if hasattr(e, "message") and e.message:
|
||||
output = Except(type=ERROR, template=text_type(e.message), trace=trace, cause=cause)
|
||||
output = Except(context=ERROR, template=text_type(e.message), trace=trace, cause=cause)
|
||||
else:
|
||||
output = Except(type=ERROR, template=text_type(e), trace=trace, cause=cause)
|
||||
output = Except(context=ERROR, template=text_type(e), trace=trace, cause=cause)
|
||||
|
||||
trace = extract_stack(stack_depth + 2) # +2 = to remove the caller, and it's call to this' Except.wrap()
|
||||
output.trace.extend(trace)
|
||||
|
@ -91,11 +108,11 @@ class Except(Exception):
|
|||
return expand_template(self.template, self.params)
|
||||
|
||||
def __contains__(self, value):
|
||||
if isinstance(value, text_type):
|
||||
if is_text(value):
|
||||
if self.template.find(value) >= 0 or self.message.find(value) >= 0:
|
||||
return True
|
||||
|
||||
if self.type == value:
|
||||
if self.context == value:
|
||||
return True
|
||||
for c in listwrap(self.cause):
|
||||
if value in c:
|
||||
|
@ -103,7 +120,7 @@ class Except(Exception):
|
|||
return False
|
||||
|
||||
def __unicode__(self):
|
||||
output = self.type + ": " + self.template + "\n"
|
||||
output = self.context + ": " + self.template + CR
|
||||
if self.params:
|
||||
output = expand_template(output, self.params)
|
||||
|
||||
|
@ -113,8 +130,10 @@ class Except(Exception):
|
|||
if self.cause:
|
||||
cause_strings = []
|
||||
for c in listwrap(self.cause):
|
||||
with suppress_exception:
|
||||
try:
|
||||
cause_strings.append(text_type(c))
|
||||
except Exception as e:
|
||||
sys.stderr("Problem serializing cause"+text_type(c))
|
||||
|
||||
output += "caused by\n\t" + "and caused by\n\t".join(cause_strings)
|
||||
|
||||
|
@ -128,13 +147,9 @@ class Except(Exception):
|
|||
return self.__unicode__().encode('latin1', 'replace')
|
||||
|
||||
def __data__(self):
|
||||
return Data(
|
||||
type=self.type,
|
||||
template=self.template,
|
||||
params=self.params,
|
||||
cause=self.cause,
|
||||
trace=self.trace
|
||||
)
|
||||
output = Data({k:getattr(self,k) for k in vars(self)})
|
||||
output.cause=unwraplist([c.__data__() for c in listwrap(output.cause)])
|
||||
return output
|
||||
|
||||
|
||||
def extract_stack(start=0):
|
||||
|
@ -195,11 +210,10 @@ def _parse_traceback(tb):
|
|||
|
||||
|
||||
def format_trace(tbs, start=0):
|
||||
trace = []
|
||||
for d in tbs[start::]:
|
||||
item = expand_template('File "{{file}}", line {{line}}, in {{method}}\n', d)
|
||||
trace.append(item)
|
||||
return "".join(trace)
|
||||
return "".join(
|
||||
expand_template('File "{{file}}", line {{line}}, in {{method}}\n', d)
|
||||
for d in tbs[start::]
|
||||
)
|
||||
|
||||
|
||||
class Suppress(object):
|
||||
|
@ -208,13 +222,13 @@ class Suppress(object):
|
|||
"""
|
||||
|
||||
def __init__(self, exception_type):
|
||||
self.type = exception_type
|
||||
self.context = exception_type
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
if not exc_val or isinstance(exc_val, self.type):
|
||||
if not exc_val or isinstance(exc_val, self.context):
|
||||
return True
|
||||
|
||||
suppress_exception = Suppress(Exception)
|
||||
|
|
|
@ -7,59 +7,73 @@
|
|||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from collections import Mapping
|
||||
from datetime import date, datetime
|
||||
import sys
|
||||
|
||||
import mo_json
|
||||
from jx_python import jx
|
||||
from mo_dots import wrap, coalesce, FlatList
|
||||
from mo_future import text_type, binary_type, number_types
|
||||
from mo_json import value2json
|
||||
from mo_dots import coalesce, is_data, is_sequence, listwrap, wrap
|
||||
from mo_future import is_binary, is_text, number_types, text_type
|
||||
from mo_json import datetime2unix, json2value, value2json
|
||||
from mo_kwargs import override
|
||||
from mo_logs import Log, strings
|
||||
from mo_logs.exceptions import suppress_exception
|
||||
from mo_logs.exceptions import Except, suppress_exception
|
||||
from mo_logs.log_usingNothing import StructuredLogger
|
||||
from mo_threads import Thread, Queue, Till, THREAD_STOP
|
||||
from mo_times import MINUTE, Duration
|
||||
from mo_math.randoms import Random
|
||||
from mo_threads import Queue, THREAD_STOP, Thread, Till
|
||||
from mo_times import Duration, MINUTE
|
||||
from mo_times.dates import datetime2unix
|
||||
from pyLibrary.convert import bytes2base64
|
||||
from pyLibrary.env.elasticsearch import Cluster
|
||||
|
||||
MAX_BAD_COUNT = 5
|
||||
LOG_STRING_LENGTH = 2000
|
||||
PAUSE_AFTER_GOOD_INSERT = 1
|
||||
PAUSE_AFTER_BAD_INSERT = 60
|
||||
|
||||
|
||||
class StructuredLogger_usingElasticSearch(StructuredLogger):
|
||||
@override
|
||||
def __init__(self, host, index, port=9200, type="log", queue_size=1000, batch_size=100, kwargs=None):
|
||||
def __init__(
|
||||
self,
|
||||
host,
|
||||
index,
|
||||
port=9200,
|
||||
type="log",
|
||||
queue_size=1000,
|
||||
batch_size=100,
|
||||
kwargs=None,
|
||||
):
|
||||
"""
|
||||
settings ARE FOR THE ELASTICSEARCH INDEX
|
||||
"""
|
||||
kwargs.timeout = Duration(coalesce(kwargs.timeout, "30second")).seconds
|
||||
kwargs.retry.times = coalesce(kwargs.retry.times, 3)
|
||||
kwargs.retry.sleep = Duration(coalesce(kwargs.retry.sleep, MINUTE)).seconds
|
||||
kwargs.host = Random.sample(listwrap(host), 1)[0]
|
||||
|
||||
schema = json2value(value2json(SCHEMA), leaves=True)
|
||||
schema.mappings[type].properties["~N~"].type = "nested"
|
||||
self.es = Cluster(kwargs).get_or_create_index(
|
||||
schema=mo_json.json2value(value2json(SCHEMA), leaves=True),
|
||||
schema=schema,
|
||||
limit_replicas=True,
|
||||
typed=True,
|
||||
kwargs=kwargs
|
||||
kwargs=kwargs,
|
||||
)
|
||||
self.batch_size = batch_size
|
||||
self.es.add_alias(coalesce(kwargs.alias, kwargs.index))
|
||||
self.queue = Queue("debug logs to es", max=queue_size, silent=True)
|
||||
|
||||
Thread.run("add debug logs to es", self._insert_loop)
|
||||
self.worker = Thread.run("add debug logs to es", self._insert_loop)
|
||||
|
||||
def write(self, template, params):
|
||||
if params.get("template"):
|
||||
# DETECTED INNER TEMPLATE, ASSUME TRACE IS ON, SO DO NOT NEED THE OUTER TEMPLATE
|
||||
self.queue.add({"value": params})
|
||||
else:
|
||||
template = strings.limit(template, 2000)
|
||||
self.queue.add({"value": {"template": template, "params": params}}, timeout=3 * MINUTE)
|
||||
try:
|
||||
params.template = strings.limit(params.template, 2000)
|
||||
params.format = None
|
||||
self.queue.add({"value": _deep_json_to_string(params, 3)}, timeout=3 * 60)
|
||||
except Exception as e:
|
||||
sys.stdout.write(text_type(Except.wrap(e)))
|
||||
return self
|
||||
|
||||
def _insert_loop(self, please_stop=None):
|
||||
|
@ -68,7 +82,7 @@ class StructuredLogger_usingElasticSearch(StructuredLogger):
|
|||
try:
|
||||
messages = wrap(self.queue.pop_all())
|
||||
if not messages:
|
||||
Till(seconds=1).wait()
|
||||
Till(seconds=PAUSE_AFTER_GOOD_INSERT).wait()
|
||||
continue
|
||||
|
||||
for g, mm in jx.groupby(messages, size=self.batch_size):
|
||||
|
@ -76,9 +90,17 @@ class StructuredLogger_usingElasticSearch(StructuredLogger):
|
|||
for i, message in enumerate(mm):
|
||||
if message is THREAD_STOP:
|
||||
please_stop.go()
|
||||
return
|
||||
continue
|
||||
try:
|
||||
scrubbed.append(_deep_json_to_string(message, depth=3))
|
||||
messages = flatten_causal_chain(message.value)
|
||||
scrubbed.append(
|
||||
{
|
||||
"value": [
|
||||
_deep_json_to_string(m, depth=3)
|
||||
for m in messages
|
||||
]
|
||||
}
|
||||
)
|
||||
except Exception as e:
|
||||
Log.warning("Problem adding to scrubbed list", cause=e)
|
||||
|
||||
|
@ -88,13 +110,18 @@ class StructuredLogger_usingElasticSearch(StructuredLogger):
|
|||
Log.warning("Problem inserting logs into ES", cause=f)
|
||||
bad_count += 1
|
||||
if bad_count > MAX_BAD_COUNT:
|
||||
Log.warning("Given up trying to write debug logs to ES index {{index}}", index=self.es.settings.index)
|
||||
Till(seconds=30).wait()
|
||||
Log.warning(
|
||||
"Given up trying to write debug logs to ES index {{index}}",
|
||||
index=self.es.settings.index,
|
||||
)
|
||||
Till(seconds=PAUSE_AFTER_BAD_INSERT).wait()
|
||||
|
||||
self.es.flush()
|
||||
|
||||
# CONTINUE TO DRAIN THIS QUEUE
|
||||
while not please_stop:
|
||||
try:
|
||||
Till(seconds=1).wait()
|
||||
Till(seconds=PAUSE_AFTER_GOOD_INSERT).wait()
|
||||
self.queue.pop_all()
|
||||
except Exception as e:
|
||||
Log.warning("Should not happen", cause=e)
|
||||
|
@ -105,6 +132,21 @@ class StructuredLogger_usingElasticSearch(StructuredLogger):
|
|||
|
||||
with suppress_exception:
|
||||
self.queue.close()
|
||||
self.worker.join()
|
||||
|
||||
|
||||
def flatten_causal_chain(log_item, output=None):
|
||||
output = output or []
|
||||
|
||||
if is_text(log_item):
|
||||
output.append({"template": log_item})
|
||||
return
|
||||
|
||||
output.append(log_item)
|
||||
for c in listwrap(log_item.cause):
|
||||
flatten_causal_chain(c, output)
|
||||
log_item.cause = None
|
||||
return output
|
||||
|
||||
|
||||
def _deep_json_to_string(value, depth):
|
||||
|
@ -113,31 +155,32 @@ def _deep_json_to_string(value, depth):
|
|||
:param depth: THE MAX DEPTH OF PROPERTIES, DEEPER WILL BE STRING-IFIED
|
||||
:return: FLATTER STRUCTURE
|
||||
"""
|
||||
if isinstance(value, Mapping):
|
||||
if is_data(value):
|
||||
if depth == 0:
|
||||
return strings.limit(value2json(value), LOG_STRING_LENGTH)
|
||||
|
||||
return {k: _deep_json_to_string(v, depth - 1) for k, v in value.items()}
|
||||
elif isinstance(value, (list, FlatList)):
|
||||
elif is_sequence(value):
|
||||
return strings.limit(value2json(value), LOG_STRING_LENGTH)
|
||||
elif isinstance(value, number_types):
|
||||
return value
|
||||
elif isinstance(value, text_type):
|
||||
elif is_text(value):
|
||||
return strings.limit(value, LOG_STRING_LENGTH)
|
||||
elif isinstance(value, binary_type):
|
||||
elif is_binary(value):
|
||||
return strings.limit(bytes2base64(value), LOG_STRING_LENGTH)
|
||||
elif isinstance(value, (date, datetime)):
|
||||
return datetime2unix(value)
|
||||
else:
|
||||
return strings.limit(value2json(value), LOG_STRING_LENGTH)
|
||||
|
||||
|
||||
SCHEMA = {
|
||||
"settings": {"index.number_of_shards": 2, "index.number_of_replicas": 2},
|
||||
"mappings": {"_default_": {
|
||||
"dynamic_templates": [
|
||||
{"everything_else": {
|
||||
"match": "*",
|
||||
"mapping": {"index": False}
|
||||
}}
|
||||
]
|
||||
}}
|
||||
"mappings": {
|
||||
"_default_": {
|
||||
"dynamic_templates": [
|
||||
{"everything_else": {"match": "*", "mapping": {"index": False}}}
|
||||
]
|
||||
},
|
||||
},
|
||||
}
|
||||
|
|
|
@ -9,11 +9,10 @@
|
|||
#
|
||||
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from mo_dots import listwrap, literal_field, Data
|
||||
from mo_future import is_text, is_binary
|
||||
from mo_dots import Data, listwrap, literal_field
|
||||
from mo_kwargs import override
|
||||
from mo_logs import Log
|
||||
from mo_logs.exceptions import ALARM, NOTE
|
||||
|
|
|
@ -9,10 +9,9 @@
|
|||
#
|
||||
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from mo_future import is_text, is_binary
|
||||
import time
|
||||
|
||||
from mo_future import allocate_lock
|
||||
|
|
|
@ -9,18 +9,16 @@
|
|||
#
|
||||
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from mo_future import is_text, is_binary
|
||||
import logging
|
||||
|
||||
from mo_dots import unwrap
|
||||
from mo_logs import Log
|
||||
from mo_logs.exceptions import suppress_exception
|
||||
from mo_logs.log_usingNothing import StructuredLogger
|
||||
from mo_logs.log_usingThreadedStream import StructuredLogger_usingThreadedStream, time_delta_pusher
|
||||
from mo_dots import unwrap
|
||||
|
||||
|
||||
_THREAD_STOP = None
|
||||
_Queue = None
|
||||
|
|
|
@ -7,17 +7,16 @@
|
|||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from mo_future import is_text, is_binary
|
||||
from decimal import Decimal
|
||||
|
||||
from mo_dots import wrap
|
||||
from mo_json import value2json, datetime2unix
|
||||
from mo_json import datetime2unix, value2json
|
||||
from mo_kwargs import override
|
||||
from mo_logs import Log
|
||||
from mo_logs.exceptions import ERROR, NOTE, WARNING, ALARM
|
||||
from mo_logs.exceptions import ALARM, ERROR, NOTE, WARNING
|
||||
from mo_logs.log_usingElasticSearch import _deep_json_to_string
|
||||
from mo_logs.log_usingNothing import StructuredLogger
|
||||
|
||||
|
|
|
@ -9,10 +9,9 @@
|
|||
#
|
||||
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from mo_future import is_text, is_binary
|
||||
from mo_logs import Log
|
||||
from mo_logs.exceptions import suppress_exception
|
||||
from mo_logs.log_usingNothing import StructuredLogger
|
||||
|
|
|
@ -9,11 +9,10 @@
|
|||
#
|
||||
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
|
||||
from mo_future import is_text, is_binary
|
||||
class StructuredLogger(object):
|
||||
"""
|
||||
ABSTRACT BASE CLASS FOR JSON LOGGING
|
||||
|
|
|
@ -1,44 +0,0 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from mo_logs.log_usingNothing import StructuredLogger
|
||||
from mo_logs.strings import expand_template
|
||||
from mo_threads import Queue
|
||||
|
||||
|
||||
class StructuredLogger_usingQueue(StructuredLogger):
|
||||
|
||||
def __init__(self, name=None):
|
||||
queue_name = "log messages to queue"
|
||||
if name:
|
||||
queue_name += " "+name
|
||||
self.queue = Queue(queue_name)
|
||||
|
||||
def write(self, template, params):
|
||||
self.queue.add(expand_template(template, params))
|
||||
|
||||
def stop(self):
|
||||
self.queue.close()
|
||||
|
||||
def pop(self):
|
||||
lines = self.queue.pop()
|
||||
output = []
|
||||
for l in lines.split("\n"):
|
||||
if l[19:22] == " - ":
|
||||
l = l[22:]
|
||||
if l.strip().startswith("File"):
|
||||
continue
|
||||
output.append(l)
|
||||
return "\n".join(output).strip()
|
|
@ -9,13 +9,12 @@
|
|||
#
|
||||
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from mo_future import is_text, is_binary
|
||||
from boto.ses import connect_to_region
|
||||
|
||||
from mo_dots import listwrap, unwrap, literal_field, Data
|
||||
from mo_dots import Data, listwrap, literal_field, unwrap
|
||||
from mo_kwargs import override
|
||||
from mo_logs import Log, suppress_exception
|
||||
from mo_logs.exceptions import ALARM, NOTE
|
||||
|
|
|
@ -9,15 +9,14 @@
|
|||
#
|
||||
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from mo_future import is_text, is_binary
|
||||
import sys
|
||||
|
||||
from mo_future import PY3, allocate_lock
|
||||
from mo_logs.log_usingNothing import StructuredLogger
|
||||
from mo_logs.strings import expand_template
|
||||
from mo_logs.strings import CR, expand_template
|
||||
|
||||
|
||||
class StructuredLogger_usingStream(StructuredLogger):
|
||||
|
@ -36,7 +35,7 @@ class StructuredLogger_usingStream(StructuredLogger):
|
|||
value = expand_template(template, params)
|
||||
self.locker.acquire()
|
||||
try:
|
||||
self.writer(value + "\n")
|
||||
self.writer(value + CR)
|
||||
finally:
|
||||
self.locker.release()
|
||||
|
||||
|
|
|
@ -9,13 +9,12 @@
|
|||
#
|
||||
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from mo_logs import Log, Except, suppress_exception
|
||||
from mo_future import is_text, is_binary
|
||||
from mo_logs import Except, Log, suppress_exception
|
||||
from mo_logs.log_usingNothing import StructuredLogger
|
||||
from mo_threads import Thread, Queue, Till, THREAD_STOP
|
||||
from mo_threads import Queue, THREAD_STOP, Thread, Till
|
||||
|
||||
DEBUG = False
|
||||
|
||||
|
|
Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше
Загрузка…
Ссылка в новой задаче