lib updates
This commit is contained in:
Родитель
2fc0169c0f
Коммит
210c0883f9
|
@ -0,0 +1,32 @@
|
|||
|
||||
## Some help for the programmer
|
||||
|
||||
Some nomenclature is required to help follow the logic of these modules
|
||||
|
||||
### Table
|
||||
|
||||
Same as with database terminology; it is a single, unordered, set of rows;
|
||||
|
||||
### Schema
|
||||
|
||||
A set of columns that describe all the (possibly optional) properties available on all rows of a table.
|
||||
|
||||
### Facts
|
||||
|
||||
Represents the multiple tables in the hierarchical database
|
||||
|
||||
### Snowflake
|
||||
|
||||
JSON Query Expressions are used the query hierarchical databases. The relations in a hierarchical database are limited to a tree; the path between any two tables is unique; in a query, no matter which table is "origin", any column in the hierarchical database can be accessed using a unique combination of joins with the origin.
|
||||
|
||||
With this in mind, a Snowflake is a list of all columns, for all the tables, in the hierarchical database.
|
||||
|
||||
### Container
|
||||
|
||||
Datastore that has multiple facts
|
||||
|
||||
### Namespace
|
||||
|
||||
Metadata for a container: Information on multiple snowflakes.
|
||||
|
||||
|
|
@ -14,14 +14,11 @@ from __future__ import unicode_literals
|
|||
from collections import Mapping
|
||||
from uuid import uuid4
|
||||
|
||||
from mo_json import value2json
|
||||
|
||||
from mo_logs.strings import expand_template, quote
|
||||
|
||||
from mo_logs import Log
|
||||
|
||||
from mo_dots import NullType, Data, FlatList, wrap, coalesce, listwrap
|
||||
from mo_future import text_type, none_type, PY2
|
||||
from mo_json import value2json
|
||||
from mo_logs import Log
|
||||
from mo_logs.strings import expand_template, quote
|
||||
from mo_times import Date
|
||||
|
||||
IS_NULL = '0'
|
||||
|
@ -39,7 +36,7 @@ STRUCT = [EXISTS, OBJECT, NESTED]
|
|||
|
||||
|
||||
python_type_to_json_type = {
|
||||
int: INTEGER,
|
||||
int: NUMBER,
|
||||
text_type: STRING,
|
||||
float: NUMBER,
|
||||
None: OBJECT,
|
||||
|
@ -223,7 +220,7 @@ class {{class_name}}(Mapping):
|
|||
return _exec(code, name)
|
||||
|
||||
|
||||
class Table(DataClass(
|
||||
class TableDesc(DataClass(
|
||||
"Table",
|
||||
[
|
||||
"name",
|
||||
|
@ -241,6 +238,7 @@ class Table(DataClass(
|
|||
# return singlton.get_columns(table_name=self.name)
|
||||
|
||||
|
||||
|
||||
Column = DataClass(
|
||||
"Column",
|
||||
[
|
||||
|
@ -248,8 +246,8 @@ Column = DataClass(
|
|||
"names", # MAP FROM TABLE NAME TO COLUMN NAME (ONE COLUMN CAN HAVE MULTIPLE NAMES)
|
||||
"es_column",
|
||||
"es_index",
|
||||
# "es_type",
|
||||
"type",
|
||||
"es_type",
|
||||
{"name": "jx_type", "nulls": True},
|
||||
{"name": "useSource", "default": False},
|
||||
{"name": "nested_path", "nulls": True}, # AN ARRAY OF PATHS (FROM DEEPEST TO SHALLOWEST) INDICATING THE JSON SUB-ARRAYS
|
||||
{"name": "count", "nulls": True},
|
||||
|
@ -262,3 +260,11 @@ Column = DataClass(
|
|||
{"eq": [{"last": "nested_path"}, {"literal": "."}]}
|
||||
]}
|
||||
)
|
||||
|
||||
|
||||
from jx_base.container import Container
|
||||
from jx_base.namespace import Namespace
|
||||
from jx_base.facts import Facts
|
||||
from jx_base.snowflake import Snowflake
|
||||
from jx_base.table import Table
|
||||
from jx_base.schema import Schema
|
||||
|
|
|
@ -47,7 +47,9 @@ def _delayed_imports():
|
|||
|
||||
class Container(object):
|
||||
"""
|
||||
Containers are data storage capable of handing queries on that storage
|
||||
CONTAINERS HOLD MULTIPLE FACTS AND CAN HANDLE
|
||||
GENERAL JSON QUERY EXPRESSIONS ON ITS CONTENTS
|
||||
METADATA FOR A Container IS CALL A Namespace
|
||||
"""
|
||||
|
||||
__slots__ = ["data", "namespaces"]
|
||||
|
@ -95,16 +97,6 @@ class Container(object):
|
|||
else:
|
||||
Log.error("Do not know how to handle {{type}}", type=frum.__class__.__name__)
|
||||
|
||||
|
||||
def __init__(self, frum, schema=None):
|
||||
object.__init__(self)
|
||||
if not type2container:
|
||||
_delayed_imports()
|
||||
|
||||
self.data = frum
|
||||
if isinstance(schema, list):
|
||||
Log.error("expecting map from es_column to column object")
|
||||
|
||||
def query(self, query):
|
||||
if query.frum != self:
|
||||
Log.error("not expected")
|
||||
|
|
|
@ -11,7 +11,6 @@ from __future__ import absolute_import
|
|||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
import operator
|
||||
from collections import Mapping
|
||||
from decimal import Decimal
|
||||
|
@ -20,7 +19,7 @@ import mo_json
|
|||
from jx_base import OBJECT, python_type_to_json_type, BOOLEAN, NUMBER, INTEGER, STRING, IS_NULL
|
||||
from jx_base.queries import is_variable_name, get_property_name
|
||||
from mo_dots import coalesce, wrap, Null, split_field
|
||||
from mo_future import text_type, utf8_json_encoder, get_function_name
|
||||
from mo_future import text_type, utf8_json_encoder, get_function_name, zip_longest
|
||||
from mo_json import scrub
|
||||
from mo_logs import Log, Except
|
||||
from mo_math import Math, MAX, MIN, UNION
|
||||
|
@ -63,7 +62,7 @@ def jx_expression(expr, schema=None):
|
|||
if len(leaves) == 0:
|
||||
v.data_type = IS_NULL
|
||||
if len(leaves) == 1:
|
||||
v.data_type = list(leaves)[0].type
|
||||
v.data_type = list(leaves)[0].jx_type
|
||||
return output
|
||||
|
||||
|
||||
|
@ -74,7 +73,9 @@ def _jx_expression(expr):
|
|||
if isinstance(expr, Expression):
|
||||
Log.error("Expecting JSON, not expression")
|
||||
|
||||
if expr in (True, False, None) or expr == None or isinstance(expr, (float, int, Decimal, Date)):
|
||||
if expr is None:
|
||||
return TRUE
|
||||
elif expr in (True, False, None) or expr == None or isinstance(expr, (float, int, Decimal, Date)):
|
||||
return Literal(None, expr)
|
||||
elif isinstance(expr, text_type):
|
||||
return Variable(expr)
|
||||
|
@ -262,16 +263,17 @@ class Variable(Expression):
|
|||
return {self}
|
||||
|
||||
def map(self, map_):
|
||||
if not isinstance(map_, Mapping):
|
||||
Log.error("Expecting Mapping")
|
||||
|
||||
return Variable(coalesce(map_.get(self.var), self.var))
|
||||
|
||||
def __hash__(self):
|
||||
return self.var.__hash__()
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.var.__eq__(other)
|
||||
if isinstance(other, Variable):
|
||||
return self.var == other.var
|
||||
elif isinstance(other, text_type):
|
||||
return self.var == other
|
||||
return False
|
||||
|
||||
def __unicode__(self):
|
||||
return self.var
|
||||
|
@ -419,12 +421,13 @@ class ScriptOp(Expression):
|
|||
ONLY FOR WHEN YOU TRUST THE SCRIPT SOURCE
|
||||
"""
|
||||
|
||||
def __init__(self, op, script):
|
||||
def __init__(self, op, script, data_type=OBJECT):
|
||||
Expression.__init__(self, op, None)
|
||||
if not isinstance(script, text_type):
|
||||
Log.error("expecting text of a script")
|
||||
self.simplified = True
|
||||
self.script = script
|
||||
self.data_type = data_type
|
||||
|
||||
@classmethod
|
||||
def define(cls, expr):
|
||||
|
@ -498,15 +501,8 @@ class Literal(Expression):
|
|||
elif self.term == None:
|
||||
return False
|
||||
|
||||
Log.warning("expensive")
|
||||
|
||||
from mo_testing.fuzzytestcase import assertAlmostEqual
|
||||
|
||||
try:
|
||||
assertAlmostEqual(self.term, other)
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
if isinstance(other, Literal):
|
||||
return (self.term == other.term) or (self.json == other.json)
|
||||
|
||||
def __data__(self):
|
||||
return {"literal": self.value}
|
||||
|
@ -553,6 +549,7 @@ class Literal(Expression):
|
|||
def partial_eval(self):
|
||||
return self
|
||||
ZERO = Literal("literal", 0)
|
||||
ONE = Literal("literal", 1)
|
||||
|
||||
|
||||
class NullOp(Literal):
|
||||
|
@ -721,7 +718,10 @@ class DateOp(Literal):
|
|||
def __init__(self, op, term):
|
||||
if hasattr(self, "date"):
|
||||
return
|
||||
if isinstance(term, text_type):
|
||||
self.date = term
|
||||
else:
|
||||
self.date = coalesce(term.literal, term)
|
||||
v = unicode2Date(self.date)
|
||||
if isinstance(v, Date):
|
||||
Literal.__init__(self, op, v.unix)
|
||||
|
@ -928,6 +928,10 @@ class FloorOp(Expression):
|
|||
|
||||
def __init__(self, op, terms, default=NULL):
|
||||
Expression.__init__(self, op, terms)
|
||||
if len(terms) == 1:
|
||||
self.lhs = terms[0]
|
||||
self.rhs = ONE
|
||||
else:
|
||||
self.lhs, self.rhs = terms
|
||||
self.default = default
|
||||
|
||||
|
@ -984,6 +988,11 @@ class EqOp(Expression):
|
|||
else:
|
||||
return {"eq": [self.lhs.__data__(), self.rhs.__data__()]}
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, EqOp):
|
||||
return self.lhs == other.lhs and self.rhs == other.rhs
|
||||
return False
|
||||
|
||||
def vars(self):
|
||||
return self.lhs.vars() | self.rhs.vars()
|
||||
|
||||
|
@ -1135,6 +1144,11 @@ class AndOp(Expression):
|
|||
def __data__(self):
|
||||
return {"and": [t.__data__() for t in self.terms]}
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, AndOp):
|
||||
return all(a == b for a, b in zip_longest(self.terms, other.terms))
|
||||
return False
|
||||
|
||||
def vars(self):
|
||||
output = set()
|
||||
for t in self.terms:
|
||||
|
@ -1149,53 +1163,46 @@ class AndOp(Expression):
|
|||
|
||||
@simplified
|
||||
def partial_eval(self):
|
||||
terms = []
|
||||
ors = []
|
||||
for t in self.terms:
|
||||
or_terms = [[]] # LIST OF TUPLES FOR or-ing and and-ing
|
||||
for i, t in enumerate(self.terms):
|
||||
simple = BooleanOp("boolean", t).partial_eval()
|
||||
if simple is TRUE:
|
||||
pass
|
||||
continue
|
||||
elif simple is FALSE:
|
||||
return FALSE
|
||||
elif isinstance(simple, AndOp):
|
||||
terms.extend([tt for tt in simple.terms if tt not in terms])
|
||||
for and_terms in or_terms:
|
||||
and_terms.extend([tt for tt in simple.terms if tt not in and_terms])
|
||||
continue
|
||||
elif isinstance(simple, OrOp):
|
||||
ors.append(simple.terms)
|
||||
or_terms = [
|
||||
and_terms + [o]
|
||||
for o in simple.terms
|
||||
for and_terms in or_terms
|
||||
]
|
||||
continue
|
||||
elif simple.type != BOOLEAN:
|
||||
Log.error("expecting boolean value")
|
||||
elif NotOp("not", simple).partial_eval() in terms:
|
||||
return FALSE
|
||||
elif simple not in terms:
|
||||
terms.append(simple)
|
||||
if len(ors) == 0:
|
||||
if len(terms) == 0:
|
||||
|
||||
for and_terms in list(or_terms):
|
||||
if NotOp("not", simple).partial_eval() in and_terms:
|
||||
or_terms.remove(and_terms)
|
||||
elif simple not in and_terms:
|
||||
and_terms.append(simple)
|
||||
|
||||
if len(or_terms) == 1:
|
||||
and_terms = or_terms[0]
|
||||
if len(and_terms) == 0:
|
||||
return TRUE
|
||||
if len(terms) == 1:
|
||||
return terms[0]
|
||||
output = AndOp("and", terms)
|
||||
return output
|
||||
elif len(ors) == 1: # SOME SIMPLE COMMON FACTORING
|
||||
if len(terms) == 0:
|
||||
return OrOp("or", ors[0])
|
||||
elif len(terms) == 1 and terms[0] in ors[0]:
|
||||
return terms[0]
|
||||
elif len(and_terms) == 1:
|
||||
return and_terms[0]
|
||||
else:
|
||||
agg_terms = []
|
||||
for combo in ors[0]:
|
||||
agg_terms.append(
|
||||
AndOp("and", [combo]+terms).partial_eval()
|
||||
)
|
||||
return OrOp("or", agg_terms).partial_eval()
|
||||
elif len(terms) == 0:
|
||||
return OrOp("or", ors[0])
|
||||
|
||||
agg_terms = []
|
||||
for combo in itertools.product(*ors):
|
||||
agg_terms.append(
|
||||
AndOp("and", list(combo)+terms).partial_eval()
|
||||
)
|
||||
return OrOp("or", agg_terms)
|
||||
return AndOp("and", and_terms)
|
||||
|
||||
return OrOp("or", [
|
||||
AndOp("and", and_terms) if len(and_terms) > 1 else and_terms[0]
|
||||
for and_terms in or_terms
|
||||
])
|
||||
|
||||
class OrOp(Expression):
|
||||
data_type = BOOLEAN
|
||||
|
@ -2390,9 +2397,9 @@ class SplitOp(Expression):
|
|||
)
|
||||
|
||||
def missing(self):
|
||||
v = self.value.to_ruby(not_null=True)
|
||||
find = self.find.to_ruby(not_null=True)
|
||||
index = v + ".indexOf(" + find + ", " + self.start.to_ruby() + ")"
|
||||
v = self.value.to_es_script(not_null=True)
|
||||
find = self.find.to_es_script(not_null=True)
|
||||
index = v + ".indexOf(" + find + ", " + self.start.to_es_script() + ")"
|
||||
|
||||
return AndOp("and", [
|
||||
self.default.missing(),
|
||||
|
|
|
@ -0,0 +1,27 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http:# mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
class Facts(object):
|
||||
"""
|
||||
REPRESENT A HIERARCHICAL DATASTORE: MULTIPLE TABLES IN A DATABASE ALONG
|
||||
WITH THE RELATIONS THAT CONNECT THEM ALL, BUT LIMITED TO A TREE
|
||||
"""
|
||||
|
||||
def __init__(self, container, snowflake):
|
||||
self.container = container
|
||||
self.snowflake = snowflake
|
||||
|
||||
@property
|
||||
def namespace(self):
|
||||
return self.container.namespace
|
|
@ -0,0 +1,69 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from collections import Mapping
|
||||
|
||||
from jx_base.query import QueryOp
|
||||
|
||||
|
||||
class Namespace(object):
|
||||
"""
|
||||
A CONGLOMERATION OF Snowflake METADATA
|
||||
A Namespace HOLDS METADATA FOR A Collection
|
||||
"""
|
||||
|
||||
def get_snowflake(self, fact_table):
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_schema(self, name):
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
def convert(self, expr):
|
||||
raise NotImplementedError()
|
||||
|
||||
def _convert_query(self, query):
|
||||
output = QueryOp("from", None)
|
||||
output.select = self._convert_clause(query.select)
|
||||
output.where = self.convert(query.where)
|
||||
output["from"] = self._convert_from(query["from"])
|
||||
output.edges = self._convert_clause(query.edges)
|
||||
output.having = convert_list(self._convert_having, query.having)
|
||||
output.window = convert_list(self._convert_window, query.window)
|
||||
output.sort = self._convert_clause(query.sort)
|
||||
output.format = query.format
|
||||
|
||||
return output
|
||||
|
||||
def _convert_from(self, frum):
|
||||
raise NotImplementedError()
|
||||
|
||||
def _convert_clause(self, clause):
|
||||
raise NotImplementedError()
|
||||
|
||||
def _convert_having(self, clause):
|
||||
raise NotImplementedError()
|
||||
|
||||
def _convert_window(self, clause):
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
def convert_list(operator, operand):
|
||||
if operand==None:
|
||||
return None
|
||||
elif isinstance(operand, Mapping):
|
||||
return operator(operand)
|
||||
else:
|
||||
return map(operator, operand)
|
||||
|
||||
|
|
@ -15,7 +15,7 @@ from mo_future import text_type
|
|||
|
||||
from mo_logs import Log
|
||||
|
||||
keyword_pattern = re.compile(r"(\w|[\\.,$])+(?:\.(\w|[\\.,$])+)*")
|
||||
keyword_pattern = re.compile(r"(\w|[\\.,$-])+(?:\.(\w|[\\.,$-])+)*")
|
||||
|
||||
|
||||
def is_variable_name(value):
|
||||
|
|
|
@ -14,23 +14,20 @@ from __future__ import unicode_literals
|
|||
from collections import Mapping
|
||||
from copy import copy
|
||||
|
||||
from mo_future import text_type
|
||||
|
||||
import jx_base
|
||||
from jx_base import STRUCT
|
||||
from jx_base.container import Container
|
||||
from jx_base.dimensions import Dimension
|
||||
from jx_base.domains import Domain, SetDomain, DefaultDomain
|
||||
from jx_base.expressions import jx_expression, Expression, Variable, LeavesOp, ScriptOp, OffsetOp, TRUE, FALSE
|
||||
from jx_base.queries import is_variable_name
|
||||
from jx_base.schema import Schema
|
||||
from mo_dots import Data, relative_field, concat_field
|
||||
from mo_dots import coalesce, Null, set_default, unwraplist, literal_field
|
||||
from mo_dots import wrap, unwrap, listwrap
|
||||
from mo_dots.lists import FlatList
|
||||
from mo_future import text_type
|
||||
from mo_json.typed_encoder import untype_path
|
||||
from mo_logs import Log
|
||||
from mo_math import AND, UNION
|
||||
from mo_math import Math
|
||||
from mo_math import AND, UNION, Math
|
||||
|
||||
DEFAULT_LIMIT = 10
|
||||
MAX_LIMIT = 10000
|
||||
|
@ -62,7 +59,7 @@ class QueryOp(Expression):
|
|||
# return output
|
||||
|
||||
def __init__(self, op, frum, select=None, edges=None, groupby=None, window=None, where=None, sort=None, limit=None, format=None):
|
||||
if isinstance(frum, Container):
|
||||
if isinstance(frum, jx_base.Table):
|
||||
pass
|
||||
else:
|
||||
Expression.__init__(self, op, frum)
|
||||
|
@ -206,7 +203,7 @@ class QueryOp(Expression):
|
|||
return FALSE
|
||||
|
||||
@staticmethod
|
||||
def wrap(query, table, schema):
|
||||
def wrap(query, container, namespace):
|
||||
"""
|
||||
NORMALIZE QUERY SO IT CAN STILL BE JSON
|
||||
"""
|
||||
|
@ -214,10 +211,14 @@ class QueryOp(Expression):
|
|||
return query
|
||||
|
||||
query = wrap(query)
|
||||
|
||||
output = QueryOp("from", table)
|
||||
output.format = query.format
|
||||
output.limit = Math.min(MAX_LIMIT, coalesce(query.limit, DEFAULT_LIMIT))
|
||||
table = container.get_table(query['from'])
|
||||
schema = table.schema
|
||||
output = QueryOp(
|
||||
op="from",
|
||||
frum=table,
|
||||
format=query.format,
|
||||
limit=Math.min(MAX_LIMIT, coalesce(query.limit, DEFAULT_LIMIT))
|
||||
)
|
||||
|
||||
if query.select or isinstance(query.select, (Mapping, list)):
|
||||
output.select = _normalize_selects(query.select, query.frum, schema=schema)
|
||||
|
@ -361,7 +362,7 @@ def _normalize_select(select, frum, schema=None):
|
|||
canonical
|
||||
)
|
||||
for c in frum.get_columns()
|
||||
if c.type not in STRUCT
|
||||
if c.jx_type not in STRUCT
|
||||
])
|
||||
else:
|
||||
Log.error("do not know what to do")
|
||||
|
@ -773,9 +774,11 @@ def _normalize_sort(sort=None):
|
|||
output.append({"value": s, "sort": 1})
|
||||
elif Math.is_integer(s):
|
||||
output.append({"value": OffsetOp("offset", s), "sort": 1})
|
||||
elif all(d in sort_direction for d in s.values()) and not s.sort and not s.value:
|
||||
elif not s.sort and not s.value and all(d in sort_direction for d in s.values()):
|
||||
for v, d in s.items():
|
||||
output.append({"value": jx_expression(v), "sort": sort_direction[d]})
|
||||
elif not s.sort and not s.value:
|
||||
Log.error("`sort` clause must have a `value` property")
|
||||
else:
|
||||
output.append({"value": jx_expression(coalesce(s.value, s.field)), "sort": coalesce(sort_direction[s.sort], 1)})
|
||||
return output
|
||||
|
|
|
@ -13,12 +13,99 @@ from __future__ import unicode_literals
|
|||
|
||||
from copy import copy
|
||||
|
||||
from jx_base import STRUCT, NESTED, PRIMITIVE, OBJECT, EXISTS
|
||||
from mo_dots import join_field, split_field, Null, startswith_field, set_default, wrap
|
||||
from mo_json.typed_encoder import unnest_path, untype_path, NESTED_TYPE
|
||||
from jx_base import STRUCT, NESTED, OBJECT, EXISTS
|
||||
from mo_dots import Null, startswith_field, set_default, wrap
|
||||
from mo_json.typed_encoder import unnest_path, untype_path
|
||||
from mo_logs import Log
|
||||
|
||||
|
||||
class Schema(object):
|
||||
"""
|
||||
A Schema MAPS COLUMN NAMES OF A SINGLE TABLE TO COLUMN INSTANCES THAT MATCH
|
||||
"""
|
||||
|
||||
def __init__(self, table_name, columns):
|
||||
"""
|
||||
:param table_name: A FULL NAME FOR THIS TABLE (NOT USED)
|
||||
:param columns: ALL COLUMNS IN SNOWFLAKE
|
||||
"""
|
||||
self._columns = copy(columns)
|
||||
self.table = table_name
|
||||
self.query_path = "."
|
||||
self.lookup, self.lookup_leaves, self.lookup_variables = _indexer(columns, self.query_path)
|
||||
|
||||
def __getitem__(self, column_name):
|
||||
cs = self.lookup.get(column_name)
|
||||
if cs:
|
||||
return list(cs)
|
||||
else:
|
||||
return [wrap({"es_column": column_name})]
|
||||
|
||||
def items(self):
|
||||
return self.lookup.items()
|
||||
|
||||
def get_column(self, name, table=None):
|
||||
return self.lookup[name]
|
||||
|
||||
@property
|
||||
def columns(self):
|
||||
return self._columns
|
||||
|
||||
def get_column_name(self, column):
|
||||
"""
|
||||
RETURN THE COLUMN NAME, FROM THE PERSPECTIVE OF THIS SCHEMA
|
||||
:param column:
|
||||
:return: NAME OF column
|
||||
"""
|
||||
return column.names[self.query_path]
|
||||
|
||||
def values(self, name):
|
||||
"""
|
||||
RETURN VALUES FOR THE GIVEN PATH NAME
|
||||
:param name:
|
||||
:return:
|
||||
"""
|
||||
return list(self.lookup_variables.get(unnest_path(name), Null))
|
||||
|
||||
def leaves(self, name):
|
||||
"""
|
||||
RETURN LEAVES OF GIVEN PATH NAME
|
||||
pull leaves, considering query_path and namespace
|
||||
pull all first-level properties
|
||||
pull leaves, including parent leaves
|
||||
pull the head of any tree by name
|
||||
:param name:
|
||||
:return:
|
||||
"""
|
||||
|
||||
return list(self.lookup_leaves.get(unnest_path(name), Null))
|
||||
|
||||
def map_to_es(self):
|
||||
"""
|
||||
RETURN A MAP FROM THE NAMESPACE TO THE es_column NAME
|
||||
"""
|
||||
full_name = self.query_path
|
||||
return set_default(
|
||||
{
|
||||
c.names[full_name]: c.es_column
|
||||
for k, cs in self.lookup.items()
|
||||
# if startswith_field(k, full_name)
|
||||
for c in cs if c.jx_type not in STRUCT
|
||||
},
|
||||
{
|
||||
c.names["."]: c.es_column
|
||||
for k, cs in self.lookup.items()
|
||||
# if startswith_field(k, full_name)
|
||||
for c in cs if c.jx_type not in STRUCT
|
||||
}
|
||||
)
|
||||
|
||||
@property
|
||||
def columns(self):
|
||||
return copy(self._columns)
|
||||
|
||||
|
||||
|
||||
def _indexer(columns, query_path):
|
||||
all_names = set(unnest_path(n) for c in columns for n in c.names.values()) | {"."}
|
||||
|
||||
|
@ -29,7 +116,7 @@ def _indexer(columns, query_path):
|
|||
nfp = unnest_path(cname)
|
||||
if (
|
||||
startswith_field(nfp, full_name) and
|
||||
c.type not in [EXISTS, OBJECT, NESTED] and
|
||||
c.es_type not in [EXISTS, OBJECT, NESTED] and
|
||||
(c.es_column != "_id" or full_name == "_id")
|
||||
):
|
||||
cs = lookup_leaves.setdefault(full_name, set())
|
||||
|
@ -44,7 +131,7 @@ def _indexer(columns, query_path):
|
|||
nfp = unnest_path(cname)
|
||||
if (
|
||||
startswith_field(nfp, full_name) and
|
||||
c.type not in [EXISTS, OBJECT] and
|
||||
c.es_type not in [EXISTS, OBJECT] and
|
||||
(c.es_column != "_id" or full_name == "_id") and
|
||||
startswith_field(c.nested_path[0], query_path)
|
||||
):
|
||||
|
@ -81,93 +168,3 @@ def _indexer(columns, query_path):
|
|||
|
||||
return relative_lookup, lookup_leaves, lookup_variables
|
||||
|
||||
|
||||
class Schema(object):
|
||||
"""
|
||||
A Schema MAPS ALL COLUMNS IN SNOWFLAKE FROM NAME TO COLUMN INSTANCE
|
||||
"""
|
||||
|
||||
def __init__(self, table_name, columns):
|
||||
"""
|
||||
:param table_name: THE FACT TABLE
|
||||
:param query_path: PATH TO ARM OF SNOWFLAKE
|
||||
:param columns: ALL COLUMNS IN SNOWFLAKE
|
||||
"""
|
||||
self._columns = copy(columns)
|
||||
table_path = split_field(table_name)
|
||||
self.table = join_field(table_path[:1]) # USED AS AN EXPLICIT STATEMENT OF PERSPECTIVE IN THE DATABASE
|
||||
query_path = join_field(table_path[1:]) # TODO: REPLACE WITH THE nested_path ARRAY
|
||||
if query_path == ".":
|
||||
self.query_path = query_path
|
||||
else:
|
||||
query_path += "."+NESTED_TYPE
|
||||
self.query_path = [c for c in columns if c.type == NESTED and c.names["."] == query_path][0].es_column
|
||||
self.lookup, self.lookup_leaves, self.lookup_variables = _indexer(columns, self.query_path)
|
||||
|
||||
def __getitem__(self, column_name):
|
||||
cs = self.lookup.get(column_name)
|
||||
if cs:
|
||||
return list(cs)
|
||||
else:
|
||||
return [wrap({"es_column": column_name})]
|
||||
|
||||
def items(self):
|
||||
return self.lookup.items()
|
||||
|
||||
def get_column(self, name, table=None):
|
||||
return self.lookup[name]
|
||||
|
||||
def get_column_name(self, column):
|
||||
"""
|
||||
RETURN THE COLUMN NAME, FROM THE PERSPECTIVE OF THIS SCHEMA
|
||||
:param column:
|
||||
:return: NAME OF column
|
||||
"""
|
||||
return column.names[self.query_path]
|
||||
|
||||
def values(self, name):
|
||||
"""
|
||||
RETURN VALUES FOR THE GIVEN PATH NAME
|
||||
:param name:
|
||||
:return:
|
||||
"""
|
||||
return list(self.lookup_variables.get(unnest_path(name), Null))
|
||||
|
||||
def leaves(self, name, meta=False):
|
||||
"""
|
||||
RETURN LEAVES OF GIVEN PATH NAME
|
||||
pull leaves, considering query_path and namespace
|
||||
pull all first-level properties
|
||||
pull leaves, including parent leaves
|
||||
pull the head of any tree by name
|
||||
:param name:
|
||||
:return:
|
||||
"""
|
||||
|
||||
return list(self.lookup_leaves.get(unnest_path(name), Null))
|
||||
|
||||
def map_to_es(self):
|
||||
"""
|
||||
RETURN A MAP FROM THE NAME SPACE TO THE es_column NAME
|
||||
"""
|
||||
full_name = self.query_path
|
||||
return set_default(
|
||||
{
|
||||
c.names[full_name]: c.es_column
|
||||
for k, cs in self.lookup.items()
|
||||
# if startswith_field(k, full_name)
|
||||
for c in cs if c.type not in STRUCT
|
||||
},
|
||||
{
|
||||
c.names["."]: c.es_column
|
||||
for k, cs in self.lookup.items()
|
||||
# if startswith_field(k, full_name)
|
||||
for c in cs if c.type not in STRUCT
|
||||
}
|
||||
)
|
||||
|
||||
@property
|
||||
def columns(self):
|
||||
return copy(self._columns)
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,30 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http:# mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
class Snowflake(object):
|
||||
"""
|
||||
REPRESENT ONE ALIAS, AND ITS NESTED ARRAYS
|
||||
"""
|
||||
|
||||
def get_schema(self, query_path):
|
||||
raise NotImplemented()
|
||||
|
||||
@property
|
||||
def query_paths(self):
|
||||
raise NotImplemented()
|
||||
|
||||
@property
|
||||
def columns(self):
|
||||
raise NotImplemented()
|
||||
|
|
@ -0,0 +1,22 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
class Table(object):
|
||||
|
||||
def __init__(self, full_name):
|
||||
self.name = full_name
|
||||
|
||||
def map(self, mapping):
|
||||
return self
|
||||
|
|
@ -11,11 +11,12 @@ from __future__ import absolute_import
|
|||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from jx_base.expressions import Variable
|
||||
from jx_base.queries import is_variable_name
|
||||
from jx_elasticsearch import es09
|
||||
from jx_elasticsearch.es09.util import aggregates, fix_es_stats, build_es_query
|
||||
from jx_elasticsearch import post as es_post
|
||||
from jx_elasticsearch.es52.expressions import Variable
|
||||
# from jx_elasticsearch.es52.expressions import Variable
|
||||
from jx_python.containers.cube import Cube
|
||||
from jx_python.expressions import jx_expression_to_function
|
||||
from mo_collections.matrix import Matrix
|
||||
|
|
|
@ -15,6 +15,8 @@ from collections import Mapping
|
|||
from datetime import datetime
|
||||
import re
|
||||
|
||||
from jx_base.queries import keyword_pattern
|
||||
|
||||
from mo_future import text_type
|
||||
from pyLibrary import convert
|
||||
from mo_collections import reverse
|
||||
|
@ -129,13 +131,13 @@ class _MVEL(object):
|
|||
list = []
|
||||
for s in selectList:
|
||||
if is_deep:
|
||||
if s.value and isKeyword(s.value):
|
||||
if s.value and is_variable_name(s.value):
|
||||
shortForm = self._translate(s.value)
|
||||
list.append("Value2Pipe(" + shortForm + ")\n")
|
||||
else:
|
||||
Log.error("do not know how to handle yet")
|
||||
else:
|
||||
if s.value and isKeyword(s.value):
|
||||
if s.value and is_variable_name(s.value):
|
||||
list.append("Value2Pipe(getDocValue(" + value2MVEL(s.value) + "))\n")
|
||||
elif s.value:
|
||||
shortForm = self._translate(s.value)
|
||||
|
@ -490,19 +492,8 @@ def _where(esFilter, _translate):
|
|||
|
||||
|
||||
VAR_CHAR = "abcdefghijklmnopqurstvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_.\""
|
||||
keyword_pattern = re.compile(r"\.*\w*(?:\.\w+)*")
|
||||
|
||||
|
||||
def isKeyword(value):
|
||||
"""
|
||||
RETURN TRUE IF THE value IS JUST A NAME OF A FIELD, A LIST OF FIELDS, (OR A VALUE)
|
||||
"""
|
||||
if not value or not isinstance(value, text_type):
|
||||
Log.error("Expecting a string")
|
||||
|
||||
if keyword_pattern.match(value):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def value2MVEL(value):
|
||||
|
|
|
@ -13,6 +13,10 @@ from __future__ import unicode_literals
|
|||
|
||||
from datetime import datetime
|
||||
|
||||
from jx_base.queries import is_variable_name
|
||||
|
||||
from mo_logs.strings import quote
|
||||
|
||||
from mo_logs import Log, strings
|
||||
from mo_dots import Data
|
||||
from mo_dots import coalesce
|
||||
|
@ -23,7 +27,7 @@ from mo_math import COUNT
|
|||
from mo_math import Math
|
||||
from mo_math import stats
|
||||
from jx_base import domains
|
||||
from jx_elasticsearch.es09.expressions import value2MVEL, isKeyword
|
||||
from jx_elasticsearch.es09.expressions import value2MVEL
|
||||
from mo_times import durations
|
||||
|
||||
|
||||
|
@ -68,7 +72,7 @@ def compileTime2Term(edge):
|
|||
# IS THERE A LIMIT ON THE DOMAIN?
|
||||
numPartitions = len(edge.domain.partitions)
|
||||
value = edge.value
|
||||
if isKeyword(value):
|
||||
if is_variable_name(value):
|
||||
value = "doc[\"" + value + "\"].value"
|
||||
|
||||
nullTest = compileNullTest(edge)
|
||||
|
@ -109,7 +113,7 @@ def compileDuration2Term(edge):
|
|||
# IS THERE A LIMIT ON THE DOMAIN?
|
||||
numPartitions = len(edge.domain.partitions)
|
||||
value = edge.value
|
||||
if isKeyword(value):
|
||||
if is_variable_name(value):
|
||||
value = "doc[\"" + value + "\"].value"
|
||||
|
||||
ref = coalesce(edge.domain.min, edge.domain.max, durations.ZERO)
|
||||
|
@ -141,7 +145,7 @@ def compileNumeric2Term(edge):
|
|||
|
||||
numPartitions = len(edge.domain.partitions)
|
||||
value = edge.value
|
||||
if isKeyword(value):
|
||||
if is_variable_name(value):
|
||||
value = "doc[\"" + value + "\"].value"
|
||||
|
||||
if not edge.domain.max:
|
||||
|
@ -179,7 +183,7 @@ def compileString2Term(edge):
|
|||
Log.error("edge script not supported yet")
|
||||
|
||||
value = edge.value
|
||||
if isKeyword(value):
|
||||
if is_variable_name(value):
|
||||
value = strings.expand_template("getDocValue({{path}})", {"path": quote(value)})
|
||||
else:
|
||||
Log.error("not handled")
|
||||
|
@ -202,7 +206,7 @@ def compileNullTest(edge):
|
|||
|
||||
# IS THERE A LIMIT ON THE DOMAIN?
|
||||
value = edge.value
|
||||
if isKeyword(value):
|
||||
if is_variable_name(value):
|
||||
value = "doc[\"" + value + "\"].value"
|
||||
|
||||
if not edge.domain.max:
|
||||
|
@ -240,7 +244,7 @@ def compileEdges2Term(mvel_compiler, edges, constants):
|
|||
def temp(term):
|
||||
return FlatList([edge0.domain.getPartByKey(term)])
|
||||
|
||||
if edge0.value and isKeyword(edge0.value):
|
||||
if edge0.value and is_variable_name(edge0.value):
|
||||
return Data(
|
||||
field=edge0.value,
|
||||
term2parts=temp
|
||||
|
|
|
@ -19,22 +19,17 @@ from jx_base.dimensions import Dimension
|
|||
from jx_base.expressions import jx_expression
|
||||
from jx_base.queries import is_variable_name
|
||||
from jx_base.query import QueryOp
|
||||
from jx_base.schema import Schema
|
||||
from jx_elasticsearch.es14.aggs import es_aggsop, is_aggsop
|
||||
from jx_elasticsearch.es14.deep import is_deepop, es_deepop
|
||||
from jx_elasticsearch.es14.setop import is_setop, es_setop
|
||||
from jx_elasticsearch.es14.util import aggregates
|
||||
from jx_elasticsearch.meta import FromESMetadata
|
||||
from jx_elasticsearch.meta import ElasticsearchMetadata, Table
|
||||
from jx_python import jx
|
||||
from mo_dots import Data, Null, unwrap
|
||||
from mo_dots import coalesce, split_field, literal_field, unwraplist, join_field
|
||||
from mo_dots import wrap, listwrap
|
||||
from mo_dots.lists import FlatList
|
||||
from mo_json import scrub
|
||||
from mo_dots import Data, Null, unwrap, coalesce, split_field, literal_field, unwraplist, join_field, wrap, listwrap, FlatList
|
||||
from mo_json import scrub, value2json
|
||||
from mo_json.typed_encoder import TYPE_PREFIX
|
||||
from mo_kwargs import override
|
||||
from mo_logs import Log
|
||||
from mo_logs.exceptions import Except
|
||||
from pyLibrary import convert
|
||||
from mo_logs import Log, Except
|
||||
from pyLibrary.env import elasticsearch, http
|
||||
|
||||
|
||||
|
@ -45,7 +40,7 @@ class ES14(Container):
|
|||
|
||||
def __new__(cls, *args, **kwargs):
|
||||
if (len(args) == 1 and args[0].get("index") == "meta") or kwargs.get("index") == "meta":
|
||||
output = FromESMetadata.__new__(FromESMetadata, *args, **kwargs)
|
||||
output = ElasticsearchMetadata.__new__(ElasticsearchMetadata, *args, **kwargs)
|
||||
output.__init__(*args, **kwargs)
|
||||
return output
|
||||
else:
|
||||
|
@ -66,36 +61,46 @@ class ES14(Container):
|
|||
typed=None,
|
||||
kwargs=None
|
||||
):
|
||||
Container.__init__(self, None)
|
||||
Container.__init__(self)
|
||||
if not container.config.default:
|
||||
container.config.default = {
|
||||
"type": "elasticsearch",
|
||||
"settings": unwrap(kwargs)
|
||||
}
|
||||
self.settings = kwargs
|
||||
self.name = coalesce(name, alias, index)
|
||||
self.name = name = coalesce(name, alias, index)
|
||||
if read_only:
|
||||
self.es = elasticsearch.Alias(alias=coalesce(alias, index), kwargs=kwargs)
|
||||
else:
|
||||
self.es = elasticsearch.Cluster(kwargs=kwargs).get_index(read_only=read_only, kwargs=kwargs)
|
||||
|
||||
self.meta = FromESMetadata(kwargs=kwargs)
|
||||
self._namespace = ElasticsearchMetadata(kwargs=kwargs)
|
||||
self.settings.type = self.es.settings.type
|
||||
self.edges = Data()
|
||||
self.worker = None
|
||||
|
||||
columns = self.meta.get_columns(table_name=coalesce(name, alias, index))
|
||||
self._schema = Schema(coalesce(name, alias, index), columns)
|
||||
columns = self._namespace.get_snowflake(self._es.settings.alias).columns # ABSOLUTE COLUMNS
|
||||
|
||||
if typed == None:
|
||||
# SWITCH ON TYPED MODE
|
||||
self.typed = any(c.es_column.find(".$") != -1 for c in columns)
|
||||
self.typed = any(c.es_column.find("."+TYPE_PREFIX) != -1 for c in columns)
|
||||
else:
|
||||
self.typed = typed
|
||||
|
||||
@property
|
||||
def schema(self):
|
||||
return self._schema
|
||||
def snowflake(self):
|
||||
return self._namespace.get_snowflake(self._es.settings.alias)
|
||||
|
||||
@property
|
||||
def namespace(self):
|
||||
return self._namespace
|
||||
|
||||
|
||||
def get_table(self, full_name):
|
||||
return Table(full_name, self)
|
||||
|
||||
def get_schema(self, query_path):
|
||||
return self._namespace.get_schema(query_path)
|
||||
|
||||
def __data__(self):
|
||||
settings = self.settings.copy()
|
||||
|
@ -126,13 +131,10 @@ class ES14(Container):
|
|||
|
||||
def query(self, _query):
|
||||
try:
|
||||
query = QueryOp.wrap(_query, _query.frum, schema=self)
|
||||
|
||||
for n in self.namespaces:
|
||||
query = n.convert(query)
|
||||
query = QueryOp.wrap(_query, container=self, namespace=self.namespace)
|
||||
|
||||
for s in listwrap(query.select):
|
||||
if not aggregates.get(s.aggregate):
|
||||
if s.aggregate != None and not aggregates.get(s.aggregate):
|
||||
Log.error(
|
||||
"ES can not aggregate {{name}} because {{aggregate|quote}} is not a recognized aggregate",
|
||||
name=s.name,
|
||||
|
@ -213,7 +215,7 @@ class ES14(Container):
|
|||
scripts.append({"doc": v.doc})
|
||||
else:
|
||||
v = scrub(v)
|
||||
scripts.append({"script": "ctx._source." + k + " = " + jx_expression(v).to_ruby(schema).script(schema)})
|
||||
scripts.append({"script": "ctx._source." + k + " = " + jx_expression(v).to_es_script(schema).script(schema)})
|
||||
|
||||
if results.hits.hits:
|
||||
updates = []
|
||||
|
@ -221,7 +223,7 @@ class ES14(Container):
|
|||
for s in scripts:
|
||||
updates.append({"update": {"_id": h._id, "_routing": unwraplist(h.fields[literal_field(schema._routing.path)])}})
|
||||
updates.append(s)
|
||||
content = ("\n".join(convert.value2json(c) for c in updates) + "\n").encode('utf-8')
|
||||
content = ("\n".join(value2json(c) for c in updates) + "\n")
|
||||
response = self.es.cluster.post(
|
||||
self.es.path + "/_bulk",
|
||||
data=content,
|
||||
|
|
|
@ -11,29 +11,26 @@ from __future__ import absolute_import
|
|||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from mo_future import text_type
|
||||
|
||||
from jx_base import EXISTS
|
||||
from jx_base.domains import SetDomain
|
||||
from jx_base.expressions import TupleOp, NULL
|
||||
from jx_base.query import DEFAULT_LIMIT, MAX_LIMIT
|
||||
from jx_elasticsearch import post as es_post
|
||||
from jx_elasticsearch.es14.decoders import DefaultDecoder, AggsDecoder, ObjectDecoder
|
||||
from jx_elasticsearch.es14.decoders import DimFieldListDecoder
|
||||
from jx_elasticsearch.es14.decoders import DefaultDecoder, AggsDecoder, ObjectDecoder, DimFieldListDecoder
|
||||
from jx_elasticsearch.es14.expressions import split_expression_by_depth, AndOp, Variable, NullOp
|
||||
from jx_elasticsearch.es14.setop import get_pull_stats
|
||||
from jx_elasticsearch.es14.util import aggregates
|
||||
from jx_python import jx
|
||||
from jx_python.expressions import jx_expression_to_function
|
||||
from mo_dots import listwrap, Data, wrap, literal_field, set_default, coalesce, Null, split_field, FlatList, unwrap, unwraplist
|
||||
from mo_future import text_type
|
||||
from mo_json.typed_encoder import encode_property
|
||||
from mo_logs import Log
|
||||
from mo_logs.strings import quote
|
||||
from mo_math import Math, MAX, UNION
|
||||
from mo_times.timer import Timer
|
||||
|
||||
|
||||
def is_aggsop(es, query):
|
||||
es.cluster.get_metadata()
|
||||
if query.edges or query.groupby or any(a != None and a != "none" for a in listwrap(query.select).aggregate):
|
||||
return True
|
||||
return False
|
||||
|
@ -60,12 +57,12 @@ def get_decoders_by_depth(query):
|
|||
edge = edge.copy()
|
||||
vars_ = edge.value.vars()
|
||||
for v in vars_:
|
||||
if not schema.leaves(v, meta=True):
|
||||
if not schema.leaves(v.var):
|
||||
Log.error("{{var}} does not exist in schema", var=v)
|
||||
elif edge.range:
|
||||
vars_ = edge.range.min.vars() | edge.range.max.vars()
|
||||
for v in vars_:
|
||||
if not schema[v]:
|
||||
if not schema[v.var]:
|
||||
Log.error("{{var}} does not exist in schema", var=v)
|
||||
elif edge.domain.dimension:
|
||||
vars_ = edge.domain.dimension.fields
|
||||
|
@ -78,7 +75,7 @@ def get_decoders_by_depth(query):
|
|||
|
||||
try:
|
||||
vars_ |= edge.value.vars()
|
||||
depths = set(len(c.nested_path) - 1 for v in vars_ for c in schema.leaves(v))
|
||||
depths = set(len(c.nested_path) - 1 for v in vars_ for c in schema.leaves(v.var))
|
||||
if -1 in depths:
|
||||
Log.error(
|
||||
"Do not know of column {{column}}",
|
||||
|
@ -102,10 +99,8 @@ def sort_edges(query, prop):
|
|||
ordered_edges = []
|
||||
remaining_edges = getattr(query, prop)
|
||||
for s in query.sort:
|
||||
if not isinstance(s.value, Variable):
|
||||
Log.error("can only sort by terms")
|
||||
for e in remaining_edges:
|
||||
if e.value.var == s.value.var:
|
||||
if e.value == s.value:
|
||||
if isinstance(e.domain, SetDomain):
|
||||
pass # ALREADY SORTED?
|
||||
else:
|
||||
|
@ -113,6 +108,9 @@ def sort_edges(query, prop):
|
|||
ordered_edges.append(e)
|
||||
remaining_edges.remove(e)
|
||||
break
|
||||
else:
|
||||
Log.error("Can not sort by {{expr}}, can only sort by an existing edge expression", expr=s.value)
|
||||
|
||||
ordered_edges.extend(remaining_edges)
|
||||
return ordered_edges
|
||||
|
||||
|
@ -136,34 +134,38 @@ def es_aggsop(es, frum, query):
|
|||
new_select["count_"+literal_field(s.value.var)] += [s]
|
||||
else:
|
||||
new_select[literal_field(s.value.var)] += [s]
|
||||
else:
|
||||
elif s.aggregate:
|
||||
formula.append(s)
|
||||
|
||||
for canonical_name, many in new_select.items():
|
||||
for s in many:
|
||||
es_cols = frum.schema.values(s.value.var)
|
||||
columns = frum.schema.values(s.value.var)
|
||||
|
||||
if s.aggregate == "count":
|
||||
canonical_names = []
|
||||
for es_col in es_cols:
|
||||
cn = literal_field(es_col.es_column + "_count")
|
||||
canonical_names.append(cn)
|
||||
es_query.aggs[cn].value_count.field = es_col.es_column
|
||||
if len(es_cols) == 1:
|
||||
s.pull = jx_expression_to_function(canonical_names[0] + ".value")
|
||||
for column in columns:
|
||||
cn = literal_field(column.es_column + "_count")
|
||||
if column.jx_type == EXISTS:
|
||||
canonical_names.append(cn + ".doc_count")
|
||||
es_query.aggs[cn].filter.range = {column.es_column: {"gt": 0}}
|
||||
else:
|
||||
s.pull = jx_expression_to_function({"add": [cn + ".value" for cn in canonical_names]})
|
||||
canonical_names.append(cn+ ".value")
|
||||
es_query.aggs[cn].value_count.field = column.es_column
|
||||
if len(canonical_names) == 1:
|
||||
s.pull = jx_expression_to_function(canonical_names[0])
|
||||
else:
|
||||
s.pull = jx_expression_to_function({"add": canonical_names})
|
||||
elif s.aggregate == "median":
|
||||
if len(es_cols) > 1:
|
||||
if len(columns) > 1:
|
||||
Log.error("Do not know how to count columns with more than one type (script probably)")
|
||||
# ES USES DIFFERENT METHOD FOR PERCENTILES
|
||||
key = literal_field(canonical_name + " percentile")
|
||||
|
||||
es_query.aggs[key].percentiles.field = es_cols[0].es_column
|
||||
es_query.aggs[key].percentiles.field = columns[0].es_column
|
||||
es_query.aggs[key].percentiles.percents += [50]
|
||||
s.pull = jx_expression_to_function(key + ".values.50\\.0")
|
||||
elif s.aggregate == "percentile":
|
||||
if len(es_cols) > 1:
|
||||
if len(columns) > 1:
|
||||
Log.error("Do not know how to count columns with more than one type (script probably)")
|
||||
# ES USES DIFFERENT METHOD FOR PERCENTILES
|
||||
key = literal_field(canonical_name + " percentile")
|
||||
|
@ -171,49 +173,49 @@ def es_aggsop(es, frum, query):
|
|||
Log.error("Expecting percentile to be a float from 0.0 to 1.0")
|
||||
percent = Math.round(s.percentile * 100, decimal=6)
|
||||
|
||||
es_query.aggs[key].percentiles.field = es_cols[0].es_column
|
||||
es_query.aggs[key].percentiles.field = columns[0].es_column
|
||||
es_query.aggs[key].percentiles.percents += [percent]
|
||||
s.pull = jx_expression_to_function(key + ".values." + literal_field(text_type(percent)))
|
||||
elif s.aggregate == "cardinality":
|
||||
canonical_names = []
|
||||
for es_col in es_cols:
|
||||
cn = literal_field(es_col.es_column + "_cardinality")
|
||||
for column in columns:
|
||||
cn = literal_field(column.es_column + "_cardinality")
|
||||
canonical_names.append(cn)
|
||||
es_query.aggs[cn].cardinality.field = es_col.es_column
|
||||
if len(es_cols) == 1:
|
||||
es_query.aggs[cn].cardinality.field = column.es_column
|
||||
if len(columns) == 1:
|
||||
s.pull = jx_expression_to_function(canonical_names[0] + ".value")
|
||||
else:
|
||||
s.pull = jx_expression_to_function({"add": [cn + ".value" for cn in canonical_names], "default": 0})
|
||||
elif s.aggregate == "stats":
|
||||
if len(es_cols) > 1:
|
||||
if len(columns) > 1:
|
||||
Log.error("Do not know how to count columns with more than one type (script probably)")
|
||||
# REGULAR STATS
|
||||
stats_name = literal_field(canonical_name)
|
||||
es_query.aggs[stats_name].extended_stats.field = es_cols[0].es_column
|
||||
es_query.aggs[stats_name].extended_stats.field = columns[0].es_column
|
||||
|
||||
# GET MEDIAN TOO!
|
||||
median_name = literal_field(canonical_name + "_percentile")
|
||||
es_query.aggs[median_name].percentiles.field = es_cols[0].es_column
|
||||
es_query.aggs[median_name].percentiles.field = columns[0].es_column
|
||||
es_query.aggs[median_name].percentiles.percents += [50]
|
||||
|
||||
s.pull = get_pull_stats(stats_name, median_name)
|
||||
elif s.aggregate == "union":
|
||||
pulls = []
|
||||
for es_col in es_cols:
|
||||
stats_name = encode_property(es_col.es_column)
|
||||
for column in columns:
|
||||
stats_name = encode_property(column.es_column)
|
||||
|
||||
if es_col.nested_path[0] == ".":
|
||||
if column.nested_path[0] == ".":
|
||||
es_query.aggs[stats_name] = {"terms": {
|
||||
"field": es_col.es_column,
|
||||
"field": column.es_column,
|
||||
"size": Math.min(s.limit, MAX_LIMIT)
|
||||
}}
|
||||
pulls.append(get_bucket_keys(stats_name))
|
||||
|
||||
else:
|
||||
es_query.aggs[stats_name] = {
|
||||
"nested": {"path": es_col.nested_path[0]},
|
||||
"nested": {"path": column.nested_path[0]},
|
||||
"aggs": {"_nested": {"terms": {
|
||||
"field": es_col.es_column,
|
||||
"field": column.es_column,
|
||||
"size": Math.min(s.limit, MAX_LIMIT)
|
||||
}}}
|
||||
}
|
||||
|
@ -228,11 +230,11 @@ def es_aggsop(es, frum, query):
|
|||
for p in pulls
|
||||
)
|
||||
else:
|
||||
if len(es_cols) > 1:
|
||||
if len(columns) > 1:
|
||||
Log.error("Do not know how to count columns with more than one type (script probably)")
|
||||
|
||||
# PULL VALUE OUT OF THE stats AGGREGATE
|
||||
es_query.aggs[literal_field(canonical_name)].extended_stats.field = es_cols[0].es_column
|
||||
es_query.aggs[literal_field(canonical_name)].extended_stats.field = columns[0].es_column
|
||||
s.pull = jx_expression_to_function({"coalesce": [literal_field(canonical_name) + "." + aggregates[s.aggregate], s.default]})
|
||||
|
||||
for i, s in enumerate(formula):
|
||||
|
@ -245,13 +247,13 @@ def es_aggsop(es, frum, query):
|
|||
else:
|
||||
Log.error("{{agg}} is not a supported aggregate over a tuple", agg=s.aggregate)
|
||||
elif s.aggregate == "count":
|
||||
es_query.aggs[literal_field(canonical_name)].value_count.script = s.value.partial_eval().to_ruby(schema).script(schema)
|
||||
es_query.aggs[literal_field(canonical_name)].value_count.script = s.value.partial_eval().to_es_script(schema).script(schema)
|
||||
s.pull = jx_expression_to_function(literal_field(canonical_name) + ".value")
|
||||
elif s.aggregate == "median":
|
||||
# ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT
|
||||
key = literal_field(canonical_name + " percentile")
|
||||
|
||||
es_query.aggs[key].percentiles.script = s.value.to_ruby(schema).script(schema)
|
||||
es_query.aggs[key].percentiles.script = s.value.to_es_script(schema).script(schema)
|
||||
es_query.aggs[key].percentiles.percents += [50]
|
||||
s.pull = jx_expression_to_function(key + ".values.50\\.0")
|
||||
elif s.aggregate == "percentile":
|
||||
|
@ -259,35 +261,35 @@ def es_aggsop(es, frum, query):
|
|||
key = literal_field(canonical_name + " percentile")
|
||||
percent = Math.round(s.percentile * 100, decimal=6)
|
||||
|
||||
es_query.aggs[key].percentiles.script = s.value.to_ruby(schema).script(schema)
|
||||
es_query.aggs[key].percentiles.script = s.value.to_es_script(schema).script(schema)
|
||||
es_query.aggs[key].percentiles.percents += [percent]
|
||||
s.pull = jx_expression_to_function(key + ".values." + literal_field(text_type(percent)))
|
||||
elif s.aggregate == "cardinality":
|
||||
# ES USES DIFFERENT METHOD FOR CARDINALITY
|
||||
key = canonical_name + " cardinality"
|
||||
|
||||
es_query.aggs[key].cardinality.script = s.value.to_ruby(schema).script(schema)
|
||||
es_query.aggs[key].cardinality.script = s.value.to_es_script(schema).script(schema)
|
||||
s.pull = jx_expression_to_function(key + ".value")
|
||||
elif s.aggregate == "stats":
|
||||
# REGULAR STATS
|
||||
stats_name = literal_field(canonical_name)
|
||||
es_query.aggs[stats_name].extended_stats.script = s.value.to_ruby(schema).script(schema)
|
||||
es_query.aggs[stats_name].extended_stats.script = s.value.to_es_script(schema).script(schema)
|
||||
|
||||
# GET MEDIAN TOO!
|
||||
median_name = literal_field(canonical_name + " percentile")
|
||||
es_query.aggs[median_name].percentiles.script = s.value.to_ruby(schema).script(schema)
|
||||
es_query.aggs[median_name].percentiles.script = s.value.to_es_script(schema).script(schema)
|
||||
es_query.aggs[median_name].percentiles.percents += [50]
|
||||
|
||||
s.pull = get_pull_stats(stats_name, median_name)
|
||||
elif s.aggregate=="union":
|
||||
# USE TERMS AGGREGATE TO SIMULATE union
|
||||
stats_name = literal_field(canonical_name)
|
||||
es_query.aggs[stats_name].terms.script_field = s.value.to_ruby(schema).script(schema)
|
||||
es_query.aggs[stats_name].terms.script_field = s.value.to_es_script(schema).script(schema)
|
||||
s.pull = jx_expression_to_function(stats_name + ".buckets.key")
|
||||
else:
|
||||
# PULL VALUE OUT OF THE stats AGGREGATE
|
||||
s.pull = jx_expression_to_function(canonical_name + "." + aggregates[s.aggregate])
|
||||
es_query.aggs[canonical_name].extended_stats.script = s.value.to_ruby(schema).script(schema)
|
||||
es_query.aggs[canonical_name].extended_stats.script = s.value.to_es_script(schema).script(schema)
|
||||
|
||||
decoders = get_decoders_by_depth(query)
|
||||
start = 0
|
||||
|
@ -312,11 +314,7 @@ def es_aggsop(es, frum, query):
|
|||
|
||||
es_query = wrap({
|
||||
"aggs": {"_nested": set_default(
|
||||
{
|
||||
"nested": {
|
||||
"path": schema.query_path
|
||||
}
|
||||
},
|
||||
{"nested": {"path": schema.query_path[0]}},
|
||||
es_query
|
||||
)}
|
||||
})
|
||||
|
@ -442,6 +440,8 @@ def aggs_iterator(aggs, decoders, coord=True):
|
|||
if coord:
|
||||
for a, parts in _aggs_iterator(unwrap(aggs), depth - 1):
|
||||
coord = tuple(d.get_index(parts) for d in decoders)
|
||||
if any(c is None for c in coord):
|
||||
continue
|
||||
yield parts, coord, a
|
||||
else:
|
||||
for a, parts in _aggs_iterator(unwrap(aggs), depth - 1):
|
||||
|
|
|
@ -13,20 +13,20 @@ from __future__ import unicode_literals
|
|||
|
||||
from collections import Mapping
|
||||
|
||||
from mo_future import text_type, binary_type
|
||||
|
||||
from jx_base import STRING, NUMBER, BOOLEAN
|
||||
from jx_base.dimensions import Dimension
|
||||
from jx_base.domains import SimpleSetDomain, DefaultDomain, PARTITION
|
||||
from jx_base.expressions import TupleOp
|
||||
from jx_base.expressions import TupleOp, TRUE
|
||||
from jx_base.query import MAX_LIMIT, DEFAULT_LIMIT
|
||||
from jx_elasticsearch.es14.expressions import Variable, NotOp, InOp, Literal, OrOp, AndOp, InequalityOp, LeavesOp
|
||||
from jx_elasticsearch.es14.expressions import Variable, NotOp, InOp, Literal, AndOp, InequalityOp, LeavesOp, LIST_TO_PIPE
|
||||
from jx_python import jx
|
||||
from mo_dots import set_default, coalesce, literal_field, Data, relative_field
|
||||
from mo_dots import wrap
|
||||
from mo_dots import wrap, set_default, coalesce, literal_field, Data, relative_field, unwraplist
|
||||
from mo_future import text_type
|
||||
from mo_json.typed_encoder import untype_path
|
||||
from mo_logs import Log
|
||||
from mo_math import MAX, MIN
|
||||
from mo_math import Math
|
||||
from mo_logs.strings import quote, expand_template
|
||||
from mo_math import MAX, MIN, Math
|
||||
from pyLibrary.convert import string2boolean
|
||||
|
||||
|
||||
class AggsDecoder(object):
|
||||
|
@ -37,7 +37,7 @@ class AggsDecoder(object):
|
|||
# if query.groupby:
|
||||
# return object.__new__(DefaultDecoder, e)
|
||||
|
||||
if isinstance(e.value, (text_type, binary_type)):
|
||||
if isinstance(e.value, text_type):
|
||||
Log.error("Expecting Variable or Expression, not plain string")
|
||||
|
||||
if isinstance(e.value, LeavesOp):
|
||||
|
@ -63,6 +63,9 @@ class AggsDecoder(object):
|
|||
limit = coalesce(e.domain.limit, query.limit, DEFAULT_LIMIT)
|
||||
|
||||
if col.partitions != None:
|
||||
if col.multi > 1 and len(col.partitions) < 6:
|
||||
return object.__new__(MultivalueDecoder)
|
||||
|
||||
partitions = col.partitions[:limit:]
|
||||
if e.domain.sort==-1:
|
||||
partitions = list(reversed(sorted(partitions)))
|
||||
|
@ -138,18 +141,18 @@ class SetDecoder(AggsDecoder):
|
|||
def __init__(self, edge, query, limit):
|
||||
AggsDecoder.__init__(self, edge, query, limit)
|
||||
domain = self.domain = edge.domain
|
||||
self.sorted = None
|
||||
self.pull = pull_functions[STRING]
|
||||
|
||||
# WE ASSUME IF THE VARIABLES MATCH, THEN THE SORT TERM AND EDGE TERM MATCH, AND WE SORT BY TERM
|
||||
# self.sorted = {1: "asc", -1: "desc", None: None}[getattr(edge.domain, 'sort', None)]
|
||||
edge_var = edge.value.vars()
|
||||
edge_var = set(v.var for v in edge.value.vars())
|
||||
if query.sort:
|
||||
for s in query.sort:
|
||||
if not edge_var - s.value.vars():
|
||||
if not edge_var - set(v.var for v in s.value.vars()):
|
||||
self.sorted = {1: "asc", -1: "desc"}[s.sort]
|
||||
parts = jx.sort(domain.partitions, {"value": domain.key, "sort": s.sort})
|
||||
edge.domain = self.domain = SimpleSetDomain(key=domain.key, label=domain.label, partitions=parts)
|
||||
else:
|
||||
self.sorted = None
|
||||
|
||||
def append_query(self, es_query, start):
|
||||
self.start = start
|
||||
|
@ -180,7 +183,7 @@ class SetDecoder(AggsDecoder):
|
|||
}}, es_query)
|
||||
else:
|
||||
terms = set_default({"terms": {
|
||||
"script": value.to_ruby(self.schema).script(self.schema),
|
||||
"script": value.to_es_script(self.schema).script(self.schema),
|
||||
"size": limit
|
||||
}}, es_query)
|
||||
|
||||
|
@ -206,7 +209,7 @@ class SetDecoder(AggsDecoder):
|
|||
return self.domain.getKeyByIndex(index)
|
||||
|
||||
def get_value_from_row(self, row):
|
||||
return row[self.start].get('key')
|
||||
return self.pull(row[self.start].get('key'))
|
||||
|
||||
def get_index(self, row):
|
||||
try:
|
||||
|
@ -242,7 +245,7 @@ def _range_composer(edge, domain, es_query, to_float, schema):
|
|||
if isinstance(edge.value, Variable):
|
||||
calc = {"field": schema.leaves(edge.value.var)[0].es_column}
|
||||
else:
|
||||
calc = {"script": edge.value.to_ruby(schema).script(schema)}
|
||||
calc = {"script": edge.value.to_es_script(schema).script(schema)}
|
||||
|
||||
return wrap({"aggs": {
|
||||
"_match": set_default(
|
||||
|
@ -446,6 +449,44 @@ class RangeDecoder(AggsDecoder):
|
|||
return 1
|
||||
|
||||
|
||||
class MultivalueDecoder(SetDecoder):
|
||||
def __init__(self, edge, query, limit):
|
||||
AggsDecoder.__init__(self, edge, query, limit)
|
||||
self.var = edge.value.var
|
||||
self.values = query.frum.schema[edge.value.var][0].partitions
|
||||
self.parts = []
|
||||
|
||||
def append_query(self, es_query, start):
|
||||
self.start = start
|
||||
|
||||
es_field = self.query.frum.schema.leaves(self.var)[0].es_column
|
||||
es_query = wrap({"aggs": {
|
||||
"_match": set_default({"terms": {
|
||||
"script": expand_template(LIST_TO_PIPE, {"expr": 'doc[' + quote(es_field) + '].values'})
|
||||
}}, es_query)
|
||||
}})
|
||||
|
||||
return es_query
|
||||
|
||||
def get_value_from_row(self, row):
|
||||
values = row[self.start]['key'].replace("||", "\b").split("|")
|
||||
if len(values) == 2:
|
||||
return None
|
||||
return unwraplist([v.replace("\b", "|") for v in values[1:-1]])
|
||||
|
||||
def get_index(self, row):
|
||||
find = self.get_value_from_row(row)
|
||||
try:
|
||||
return self.parts.index(find)
|
||||
except Exception:
|
||||
self.parts.append(find)
|
||||
return len(self.parts)-1
|
||||
|
||||
@property
|
||||
def num_columns(self):
|
||||
return 1
|
||||
|
||||
|
||||
class ObjectDecoder(AggsDecoder):
|
||||
def __init__(self, edge, query, limit):
|
||||
AggsDecoder.__init__(self, edge, query, limit)
|
||||
|
@ -535,55 +576,51 @@ class DefaultDecoder(SetDecoder):
|
|||
self.parts = list()
|
||||
self.key2index = {}
|
||||
self.computed_domain = False
|
||||
self.script = self.edge.value.partial_eval().to_es_script(self.schema)
|
||||
self.pull = pull_functions[self.script.data_type]
|
||||
self.missing = self.script.miss.partial_eval()
|
||||
self.exists = NotOp("not", self.missing).partial_eval()
|
||||
|
||||
# WE ASSUME IF THE VARIABLES MATCH, THEN THE SORT TERM AND EDGE TERM MATCH, AND WE SORT BY TERM
|
||||
self.sorted = None
|
||||
edge_var = edge.value.vars()
|
||||
for s in query.sort:
|
||||
if not edge_var - s.value.vars():
|
||||
self.sorted = {1: "asc", -1: "desc"}[s.sort]
|
||||
# WHEN SORT VALUE AND EDGE VALUE MATCHES, WE SORT BY TERM
|
||||
sort_candidates = [s for s in self.query.sort if s.value == self.edge.value]
|
||||
if sort_candidates:
|
||||
self.es_order = {"_term": {1: "asc", -1: "desc"}[sort_candidates[0].sort]}
|
||||
else:
|
||||
self.es_order = None
|
||||
|
||||
def append_query(self, es_query, start):
|
||||
self.start = start
|
||||
|
||||
value = self.edge.value.partial_eval()
|
||||
script = value.to_ruby(self.schema)
|
||||
exists = NotOp("not", script.miss).partial_eval()
|
||||
if not isinstance(self.edge.value, Variable):
|
||||
|
||||
if self.exists is TRUE:
|
||||
# IF True THEN WE DO NOT NEED THE _filter OR THE _missing (THIS RARELY HAPPENS THOUGH)
|
||||
output = wrap({"aggs": {
|
||||
"_match": {
|
||||
"filter": exists.to_esfilter(self.schema),
|
||||
"_match": set_default(
|
||||
{"terms": {
|
||||
"script": self.script.expr,
|
||||
"size": self.domain.limit,
|
||||
"order": self.es_order
|
||||
}},
|
||||
es_query
|
||||
)
|
||||
}})
|
||||
else:
|
||||
output = wrap({"aggs": {
|
||||
"_match": { # _match AND _filter REVERSED SO _match LINES UP WITH _missing
|
||||
"filter": self.exists.to_esfilter(self.schema),
|
||||
"aggs": {
|
||||
"_filter": set_default(
|
||||
{"terms": {
|
||||
"script": script.expr,
|
||||
"script": self.script.expr,
|
||||
"size": self.domain.limit,
|
||||
"order": {"_term": self.sorted} if self.sorted else None
|
||||
"order": self.es_order
|
||||
}},
|
||||
es_query
|
||||
)
|
||||
}
|
||||
},
|
||||
"_missing": set_default(
|
||||
{"filter": NotOp("not", exists).to_esfilter(self.schema)},
|
||||
es_query
|
||||
)
|
||||
}})
|
||||
return output
|
||||
elif self.edge.value.var in [s.value.var for s in self.query.sort]:
|
||||
sort_dir = [s.sort for s in self.query.sort if s.value.var == self.edge.value.var][0]
|
||||
output = wrap({"aggs": {
|
||||
"_match": set_default(
|
||||
{"terms": {
|
||||
"field": self.schema.leaves(self.edge.value.var)[0].es_column,
|
||||
"size": self.domain.limit,
|
||||
"order": {"_term": "asc" if sort_dir == 1 else "desc"}
|
||||
}},
|
||||
es_query
|
||||
),
|
||||
"_missing": set_default(
|
||||
{"filter": NotOp("not", exists).to_esfilter(self.schema)},
|
||||
{"filter": self.missing.to_esfilter(self.schema)},
|
||||
es_query
|
||||
)
|
||||
}})
|
||||
|
@ -593,12 +630,13 @@ class DefaultDecoder(SetDecoder):
|
|||
"_match": set_default(
|
||||
{"terms": {
|
||||
"field": self.schema.leaves(self.edge.value.var)[0].es_column,
|
||||
"size": self.domain.limit
|
||||
"size": self.domain.limit,
|
||||
"order": self.es_order
|
||||
}},
|
||||
es_query
|
||||
),
|
||||
"_missing": set_default(
|
||||
{"filter": NotOp("not", exists).to_esfilter(self.schema)},
|
||||
{"filter": self.missing.to_esfilter(self.schema)},
|
||||
es_query
|
||||
)
|
||||
}})
|
||||
|
@ -608,7 +646,7 @@ class DefaultDecoder(SetDecoder):
|
|||
part = row[self.start]
|
||||
if part['doc_count']:
|
||||
if part.get('key') != None:
|
||||
self.parts.append(part.get('key'))
|
||||
self.parts.append(self.pull(part.get('key')))
|
||||
else:
|
||||
self.edge.allowNulls = True # OK! WE WILL ALLOW NULLS
|
||||
|
||||
|
@ -623,19 +661,19 @@ class DefaultDecoder(SetDecoder):
|
|||
if self.computed_domain:
|
||||
try:
|
||||
part = row[self.start]
|
||||
return self.domain.getIndexByKey(part.get('key'))
|
||||
return self.domain.getIndexByKey(self.pull(part.get('key')))
|
||||
except Exception as e:
|
||||
Log.error("problem", cause=e)
|
||||
else:
|
||||
try:
|
||||
part = row[self.start]
|
||||
key = part.get('key')
|
||||
key = self.pull(part.get('key'))
|
||||
i = self.key2index.get(key)
|
||||
if i is None:
|
||||
i = len(self.parts)
|
||||
part = {"key": key, "dataIndex": i}
|
||||
self.parts.append({"key": key, "dataIndex": i})
|
||||
self.key2index[i] = part
|
||||
self.parts.append(part)
|
||||
self.key2index[key] = i
|
||||
return i
|
||||
except Exception as e:
|
||||
Log.error("problem", cause=e)
|
||||
|
@ -648,6 +686,7 @@ class DefaultDecoder(SetDecoder):
|
|||
class DimFieldListDecoder(SetDecoder):
|
||||
def __init__(self, edge, query, limit):
|
||||
AggsDecoder.__init__(self, edge, query, limit)
|
||||
edge.allowNulls = False
|
||||
self.fields = edge.domain.dimension.fields
|
||||
self.domain = self.edge.domain
|
||||
self.domain.limit = Math.min(coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT)
|
||||
|
@ -665,7 +704,6 @@ class DimFieldListDecoder(SetDecoder):
|
|||
"size": self.domain.limit
|
||||
}}, es_query)}
|
||||
}}})
|
||||
if self.edge.allowNulls:
|
||||
nest.aggs._missing = set_default(
|
||||
{"filter": NotOp("not", exists).to_esfilter(self.schema)},
|
||||
es_query
|
||||
|
@ -696,11 +734,20 @@ class DimFieldListDecoder(SetDecoder):
|
|||
)
|
||||
|
||||
def get_index(self, row):
|
||||
find = tuple(p.get("key") for p in row[self.start:self.start + self.num_columns:])
|
||||
return self.domain.getIndexByKey(find)
|
||||
|
||||
part = row[self.start:self.start + len(self.fields):]
|
||||
if part[0]['doc_count']==0:
|
||||
return None
|
||||
find = tuple(p.get("key") for p in part)
|
||||
output = self.domain.getIndexByKey(find)
|
||||
return output
|
||||
@property
|
||||
def num_columns(self):
|
||||
return len(self.fields)
|
||||
|
||||
|
||||
pull_functions = {
|
||||
STRING: lambda x: x,
|
||||
NUMBER: lambda x: float(x) if x !=None else None,
|
||||
BOOLEAN: string2boolean
|
||||
}
|
||||
|
||||
|
|
|
@ -11,7 +11,7 @@ from __future__ import absolute_import
|
|||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from jx_base import STRUCT, NESTED, EXISTS
|
||||
from jx_base import NESTED
|
||||
from jx_base.expressions import NULL
|
||||
from jx_base.query import DEFAULT_LIMIT
|
||||
from jx_elasticsearch import post as es_post
|
||||
|
@ -49,8 +49,7 @@ def is_deepop(es, query):
|
|||
|
||||
def es_deepop(es, query):
|
||||
schema = query.frum.schema
|
||||
columns = schema.columns
|
||||
query_path = schema.query_path
|
||||
query_path = schema.query_path[0]
|
||||
|
||||
# TODO: FIX THE GREAT SADNESS CAUSED BY EXECUTING post_expressions
|
||||
# THE EXPRESSIONS SHOULD BE PUSHED TO THE CONTAINER: ES ALLOWS
|
||||
|
@ -97,7 +96,7 @@ def es_deepop(es, query):
|
|||
col_names = set()
|
||||
for c in leaves:
|
||||
if c.nested_path[0] == ".":
|
||||
if c.type == NESTED:
|
||||
if c.jx_type == NESTED:
|
||||
continue
|
||||
es_query.fields += [c.es_column]
|
||||
c_name = untype_path(c.names[query_path])
|
||||
|
@ -128,7 +127,7 @@ def es_deepop(es, query):
|
|||
for n in net_columns:
|
||||
pull = get_pull_function(n)
|
||||
if n.nested_path[0] == ".":
|
||||
if n.type == NESTED:
|
||||
if n.jx_type == NESTED:
|
||||
continue
|
||||
es_query.fields += [n.es_column]
|
||||
|
||||
|
@ -155,14 +154,14 @@ def es_deepop(es, query):
|
|||
else:
|
||||
expr = s.value
|
||||
for v in expr.vars():
|
||||
for c in schema[v]:
|
||||
for c in schema[v.var]:
|
||||
if c.nested_path[0] == ".":
|
||||
es_query.fields += [c.es_column]
|
||||
# else:
|
||||
# Log.error("deep field not expected")
|
||||
|
||||
pull_name = EXPRESSION_PREFIX + s.name
|
||||
map_to_local = {untype_path(k): get_pull(cc) for k, c in schema.lookup.items() for cc in c if cc.type not in STRUCT}
|
||||
map_to_local = MapToLocal(schema)
|
||||
pull = jx_expression_to_function(pull_name)
|
||||
post_expressions[pull_name] = compile_expression(expr.map(map_to_local).to_python())
|
||||
|
||||
|
@ -217,3 +216,23 @@ def es_deepop(es, query):
|
|||
Log.error("problem formatting", e)
|
||||
|
||||
|
||||
class MapToLocal(object):
|
||||
"""
|
||||
MAP FROM RELATIVE/ABSOLUTE NAMESPACE TO PYTHON THAT WILL EXTRACT RESULT
|
||||
"""
|
||||
def __init__(self, map_to_columns):
|
||||
self.map_to_columns = map_to_columns
|
||||
|
||||
def __getitem__(self, item):
|
||||
return self.get(item)
|
||||
|
||||
def get(self, item):
|
||||
cs = self.map_to_columns[item]
|
||||
if len(cs) == 0:
|
||||
return "Null"
|
||||
elif len(cs) == 1:
|
||||
return get_pull(cs[0])
|
||||
else:
|
||||
return "coalesce(" + (",".join(get_pull(c) for c in cs)) + ")"
|
||||
|
||||
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -11,17 +11,15 @@ from __future__ import absolute_import
|
|||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from collections import Mapping
|
||||
|
||||
from mo_dots import Data, set_default, wrap, split_field, coalesce
|
||||
from mo_logs import Log
|
||||
from pyLibrary import convert
|
||||
|
||||
from jx_base.expressions import TupleOp
|
||||
from jx_elasticsearch.es14.aggs import count_dim, aggs_iterator, format_dispatch, drill
|
||||
from jx_python.containers.cube import Cube
|
||||
from mo_collections.matrix import Matrix
|
||||
from mo_dots import Data, set_default, wrap, split_field, coalesce
|
||||
from mo_future import sort_using_key
|
||||
from mo_logs import Log
|
||||
from mo_logs.strings import quote
|
||||
from pyLibrary import convert
|
||||
|
||||
FunctionType = type(lambda: 1)
|
||||
|
||||
|
@ -51,7 +49,7 @@ def format_cube(decoders, aggs, start, query, select):
|
|||
|
||||
cube = Cube(
|
||||
query.select,
|
||||
sorted(new_edges, key=lambda e: e.dim), # ENSURE EDGES ARE IN SAME ORDER AS QUERY
|
||||
sort_using_key(new_edges, key=lambda e: e.dim), # ENSURE EDGES ARE IN SAME ORDER AS QUERY
|
||||
{s.name: m for s, m in matricies}
|
||||
)
|
||||
cube.frum = query
|
||||
|
@ -184,7 +182,7 @@ def format_list_from_groupby(decoders, aggs, start, query, select):
|
|||
continue
|
||||
output = Data()
|
||||
for g, d in zip(query.groupby, decoders):
|
||||
output[g.put.name] = d.get_value_from_row(row)
|
||||
output[coalesce(g.put.name, g.name)] = d.get_value_from_row(row)
|
||||
|
||||
for s in select:
|
||||
output[s.name] = s.pull(agg)
|
||||
|
@ -210,7 +208,7 @@ def format_list(decoders, aggs, start, query, select):
|
|||
if query.sort and not query.groupby:
|
||||
# TODO: USE THE format_table() TO PRODUCE THE NEEDED VALUES INSTEAD OF DUPLICATING LOGIC HERE
|
||||
all_coord = is_sent._all_combos() # TRACK THE EXPECTED COMBINATIONS
|
||||
for row, coord, agg in aggs_iterator(aggs, decoders):
|
||||
for _, coord, agg in aggs_iterator(aggs, decoders):
|
||||
missing_coord = all_coord.next()
|
||||
while coord != missing_coord:
|
||||
# INSERT THE MISSING COORDINATE INTO THE GENERATION
|
||||
|
@ -232,7 +230,7 @@ def format_list(decoders, aggs, start, query, select):
|
|||
output[s.name] = s.pull(agg)
|
||||
yield output
|
||||
else:
|
||||
is_sent = Matrix(dims=dims, zeros=0)
|
||||
|
||||
for row, coord, agg in aggs_iterator(aggs, decoders):
|
||||
is_sent[coord] = 1
|
||||
|
||||
|
@ -286,12 +284,6 @@ def format_list_from_aggop(decoders, aggs, start, query, select):
|
|||
})
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def format_line(decoders, aggs, start, query, select):
|
||||
list = format_list(decoders, aggs, start, query, select)
|
||||
|
||||
|
|
|
@ -19,12 +19,11 @@ from jx_base.expressions import IDENTITY
|
|||
from jx_base.query import DEFAULT_LIMIT
|
||||
from jx_elasticsearch import post as es_post
|
||||
from jx_elasticsearch.es14.expressions import Variable, LeavesOp
|
||||
from jx_elasticsearch.es14.util import jx_sort_to_es_sort, es_query_template
|
||||
from jx_elasticsearch.es14.util import jx_sort_to_es_sort, es_query_template, es_and, es_or, es_not, es_script
|
||||
from jx_python.containers.cube import Cube
|
||||
from jx_python.expressions import jx_expression_to_function
|
||||
from mo_collections.matrix import Matrix
|
||||
from mo_dots import coalesce, split_field, set_default, Data, unwraplist, literal_field, unwrap, wrap, concat_field, relative_field, join_field
|
||||
from mo_dots import listwrap
|
||||
from mo_dots import coalesce, split_field, set_default, Data, unwraplist, literal_field, unwrap, wrap, concat_field, relative_field, join_field, listwrap
|
||||
from mo_dots.lists import FlatList
|
||||
from mo_json.typed_encoder import untype_path, unnest_path, untyped
|
||||
from mo_logs import Log
|
||||
|
@ -56,7 +55,7 @@ def is_setop(es, query):
|
|||
def es_setop(es, query):
|
||||
schema = query.frum.schema
|
||||
|
||||
es_query, filters = es_query_template(schema.query_path)
|
||||
es_query, filters = es_query_template(schema.query_path[0])
|
||||
nested_filter = None
|
||||
set_default(filters[0], query.where.partial_eval().to_esfilter(schema))
|
||||
es_query.size = coalesce(query.limit, DEFAULT_LIMIT)
|
||||
|
@ -78,7 +77,7 @@ def es_setop(es, query):
|
|||
leaves = schema.leaves(term.var)
|
||||
for c in leaves:
|
||||
full_name = concat_field(select.name, relative_field(untype_path(c.names["."]), term.var))
|
||||
if c.type == NESTED:
|
||||
if c.jx_type == NESTED:
|
||||
es_query.fields = ["_source"]
|
||||
new_select.append({
|
||||
"name": full_name,
|
||||
|
@ -88,7 +87,7 @@ def es_setop(es, query):
|
|||
})
|
||||
put_index += 1
|
||||
elif c.nested_path[0] != ".":
|
||||
es_query.fields = ["_source"]
|
||||
pass # THE NESTED PARENT WILL CAPTURE THIS
|
||||
else:
|
||||
es_query.fields += [c.es_column]
|
||||
new_select.append({
|
||||
|
@ -103,7 +102,7 @@ def es_setop(es, query):
|
|||
leaves = schema.leaves(s_column)
|
||||
nested_selects = {}
|
||||
if leaves:
|
||||
if any(c.type == NESTED for c in leaves):
|
||||
if s_column == '.' or any(c.jx_type == NESTED for c in leaves):
|
||||
# PULL WHOLE NESTED ARRAYS
|
||||
es_query.fields = ["_source"]
|
||||
for c in leaves:
|
||||
|
@ -120,7 +119,7 @@ def es_setop(es, query):
|
|||
for c in leaves:
|
||||
if len(c.nested_path) == 1:
|
||||
jx_name = untype_path(c.names["."])
|
||||
if c.type == NESTED:
|
||||
if c.jx_type == NESTED:
|
||||
es_query.fields = ["_source"]
|
||||
new_select.append({
|
||||
"name": select.name,
|
||||
|
@ -144,7 +143,7 @@ def es_setop(es, query):
|
|||
filters[0][k] = None
|
||||
set_default(
|
||||
filters[0],
|
||||
{"and": [where, {"or": nested_filter}]}
|
||||
es_and([where, es_or(nested_filter)])
|
||||
)
|
||||
|
||||
nested_path = c.nested_path[0]
|
||||
|
@ -156,7 +155,7 @@ def es_setop(es, query):
|
|||
where.nested.inner_hits._source = False
|
||||
where.nested.inner_hits.fields += [c.es_column]
|
||||
|
||||
child = relative_field(untype_path(c.names[schema.query_path]), s_column)
|
||||
child = relative_field(untype_path(c.names[schema.query_path[0]]), s_column)
|
||||
pull = accumulate_nested_doc(nested_path, Variable(relative_field(s_column, unnest_path(nested_path))))
|
||||
new_select.append({
|
||||
"name": select.name,
|
||||
|
@ -178,9 +177,8 @@ def es_setop(es, query):
|
|||
})
|
||||
put_index += 1
|
||||
else:
|
||||
painless = select.value.partial_eval().to_ruby(schema)
|
||||
es_query.script_fields[literal_field(select.name)] = {"script": painless.script(schema)}
|
||||
|
||||
painless = select.value.partial_eval().to_es_script(schema)
|
||||
es_query.script_fields[literal_field(select.name)] = es_script(painless.script(schema))
|
||||
new_select.append({
|
||||
"name": select.name,
|
||||
"pull": jx_expression_to_function("fields." + literal_field(select.name)),
|
||||
|
@ -345,6 +343,7 @@ set_default(format_dispatch, {
|
|||
"list": (format_list, None, "application/json")
|
||||
})
|
||||
|
||||
|
||||
def get_pull(column):
|
||||
if column.nested_path[0] == ".":
|
||||
return concat_field("fields", literal_field(column.es_column))
|
||||
|
|
|
@ -11,6 +11,10 @@ from __future__ import absolute_import
|
|||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from mo_future import text_type
|
||||
|
||||
from mo_logs import Log
|
||||
|
||||
from jx_base import STRING, BOOLEAN, NUMBER, OBJECT
|
||||
from jx_elasticsearch.es14.expressions import Variable
|
||||
from mo_dots import wrap
|
||||
|
@ -23,18 +27,21 @@ def es_query_template(path):
|
|||
:return:
|
||||
"""
|
||||
|
||||
if not isinstance(path, text_type):
|
||||
Log.error("expecting path to be a string")
|
||||
|
||||
if path != ".":
|
||||
f0 = {}
|
||||
f1 = {}
|
||||
output = wrap({
|
||||
"query": {"filtered": {"filter": {"and":[
|
||||
"query": {"filtered": {"filter": es_and([
|
||||
f0,
|
||||
{"nested": {
|
||||
"path": path,
|
||||
"filter": f1,
|
||||
"inner_hits": {"size": 100000}
|
||||
}}
|
||||
]}}},
|
||||
])}},
|
||||
"from": 0,
|
||||
"size": 0,
|
||||
"sort": []
|
||||
|
@ -43,7 +50,7 @@ def es_query_template(path):
|
|||
else:
|
||||
f0 = {}
|
||||
output = wrap({
|
||||
"query": {"filtered": {"filter": f0}},
|
||||
"query": {"filtered": {"filter": es_and([f0])}},
|
||||
"from": 0,
|
||||
"size": 0,
|
||||
"sort": []
|
||||
|
@ -66,7 +73,7 @@ def jx_sort_to_es_sort(sort, schema):
|
|||
|
||||
for type in types:
|
||||
for c in cols:
|
||||
if c.type == type:
|
||||
if c.jx_type == type:
|
||||
if s.sort == -1:
|
||||
output.append({c.es_column: "desc"})
|
||||
else:
|
||||
|
@ -109,3 +116,22 @@ aggregates = {
|
|||
|
||||
NON_STATISTICAL_AGGS = {"none", "one"}
|
||||
|
||||
|
||||
def es_and(terms):
|
||||
return wrap({"and": terms})
|
||||
|
||||
|
||||
def es_or(terms):
|
||||
return wrap({"or": terms})
|
||||
|
||||
|
||||
def es_not(term):
|
||||
return wrap({"not": term})
|
||||
|
||||
|
||||
def es_script(term):
|
||||
return wrap({"script": term})
|
||||
|
||||
|
||||
def es_missing(term):
|
||||
return {"missing": {"field": term}}
|
||||
|
|
|
@ -19,12 +19,11 @@ from jx_base.dimensions import Dimension
|
|||
from jx_base.expressions import jx_expression
|
||||
from jx_base.queries import is_variable_name
|
||||
from jx_base.query import QueryOp
|
||||
from jx_base.schema import Schema
|
||||
from jx_elasticsearch.es52.aggs import es_aggsop, is_aggsop
|
||||
from jx_elasticsearch.es52.deep import is_deepop, es_deepop
|
||||
from jx_elasticsearch.es52.setop import is_setop, es_setop
|
||||
from jx_elasticsearch.es52.util import aggregates
|
||||
from jx_elasticsearch.meta import FromESMetadata
|
||||
from jx_elasticsearch.meta import ElasticsearchMetadata, Table
|
||||
from jx_python import jx
|
||||
from mo_dots import Data, Null, unwrap, coalesce, split_field, literal_field, unwraplist, join_field, wrap, listwrap, FlatList
|
||||
from mo_json import scrub, value2json
|
||||
|
@ -41,7 +40,7 @@ class ES52(Container):
|
|||
|
||||
def __new__(cls, *args, **kwargs):
|
||||
if (len(args) == 1 and args[0].get("index") == "meta") or kwargs.get("index") == "meta":
|
||||
output = FromESMetadata.__new__(FromESMetadata, *args, **kwargs)
|
||||
output = ElasticsearchMetadata.__new__(ElasticsearchMetadata, *args, **kwargs)
|
||||
output.__init__(*args, **kwargs)
|
||||
return output
|
||||
else:
|
||||
|
@ -62,26 +61,25 @@ class ES52(Container):
|
|||
typed=None,
|
||||
kwargs=None
|
||||
):
|
||||
Container.__init__(self, None)
|
||||
Container.__init__(self)
|
||||
if not container.config.default:
|
||||
container.config.default = {
|
||||
"type": "elasticsearch",
|
||||
"settings": unwrap(kwargs)
|
||||
}
|
||||
self.settings = kwargs
|
||||
self.name = coalesce(name, alias, index)
|
||||
self.name = name = coalesce(name, alias, index)
|
||||
if read_only:
|
||||
self.es = elasticsearch.Alias(alias=coalesce(alias, index), kwargs=kwargs)
|
||||
else:
|
||||
self.es = elasticsearch.Cluster(kwargs=kwargs).get_index(read_only=read_only, kwargs=kwargs)
|
||||
|
||||
self.meta = FromESMetadata(kwargs=kwargs)
|
||||
self._namespace = ElasticsearchMetadata(kwargs=kwargs)
|
||||
self.settings.type = self.es.settings.type
|
||||
self.edges = Data()
|
||||
self.worker = None
|
||||
|
||||
columns = self.meta.get_columns(table_name=coalesce(name, alias, index))
|
||||
self._schema = Schema(coalesce(name, alias, index), columns)
|
||||
columns = self._namespace.get_snowflake(self._es.settings.alias).columns # ABSOLUTE COLUMNS
|
||||
|
||||
if typed == None:
|
||||
# SWITCH ON TYPED MODE
|
||||
|
@ -90,8 +88,19 @@ class ES52(Container):
|
|||
self.typed = typed
|
||||
|
||||
@property
|
||||
def schema(self):
|
||||
return self._schema
|
||||
def snowflake(self):
|
||||
return self._namespace.get_snowflake(self._es.settings.alias)
|
||||
|
||||
@property
|
||||
def namespace(self):
|
||||
return self._namespace
|
||||
|
||||
|
||||
def get_table(self, full_name):
|
||||
return Table(full_name, self)
|
||||
|
||||
def get_schema(self, query_path):
|
||||
return self._namespace.get_schema(query_path)
|
||||
|
||||
def __data__(self):
|
||||
settings = self.settings.copy()
|
||||
|
@ -122,10 +131,7 @@ class ES52(Container):
|
|||
|
||||
def query(self, _query):
|
||||
try:
|
||||
query = QueryOp.wrap(_query, table=self, schema=self.schema)
|
||||
|
||||
for n in self.namespaces:
|
||||
query = n.convert(query)
|
||||
query = QueryOp.wrap(_query, container=self, namespace=self.namespace)
|
||||
|
||||
for s in listwrap(query.select):
|
||||
if s.aggregate != None and not aggregates.get(s.aggregate):
|
||||
|
@ -209,7 +215,7 @@ class ES52(Container):
|
|||
scripts.append({"doc": v.doc})
|
||||
else:
|
||||
v = scrub(v)
|
||||
scripts.append({"script": "ctx._source." + k + " = " + jx_expression(v).to_painless(schema).script(schema)})
|
||||
scripts.append({"script": "ctx._source." + k + " = " + jx_expression(v).to_es_script(schema).script(schema)})
|
||||
|
||||
if results.hits.hits:
|
||||
updates = []
|
||||
|
|
|
@ -14,7 +14,7 @@ from __future__ import unicode_literals
|
|||
from jx_base import EXISTS
|
||||
from jx_base.domains import SetDomain
|
||||
from jx_base.expressions import TupleOp, NULL
|
||||
from jx_base.query import DEFAULT_LIMIT
|
||||
from jx_base.query import DEFAULT_LIMIT, MAX_LIMIT
|
||||
from jx_elasticsearch import post as es_post
|
||||
from jx_elasticsearch.es52.decoders import DefaultDecoder, AggsDecoder, ObjectDecoder, DimFieldListDecoder
|
||||
from jx_elasticsearch.es52.expressions import split_expression_by_depth, AndOp, Variable, NullOp
|
||||
|
@ -30,7 +30,6 @@ from mo_logs.strings import quote, expand_template
|
|||
from mo_math import Math, MAX, UNION
|
||||
from mo_times.timer import Timer
|
||||
|
||||
|
||||
COMPARE_TUPLE = """
|
||||
(a, b)->{
|
||||
int i=0;
|
||||
|
@ -79,7 +78,6 @@ MAX_OF_TUPLE = """
|
|||
|
||||
|
||||
def is_aggsop(es, query):
|
||||
es.cluster.get_metadata()
|
||||
if query.edges or query.groupby or any(a != None and a != "none" for a in listwrap(query.select).aggregate):
|
||||
return True
|
||||
return False
|
||||
|
@ -106,12 +104,12 @@ def get_decoders_by_depth(query):
|
|||
edge = edge.copy()
|
||||
vars_ = edge.value.vars()
|
||||
for v in vars_:
|
||||
if not schema.leaves(v.var, meta=True):
|
||||
if not schema.leaves(v.var):
|
||||
Log.error("{{var}} does not exist in schema", var=v)
|
||||
elif edge.range:
|
||||
vars_ = edge.range.min.vars() | edge.range.max.vars()
|
||||
for v in vars_:
|
||||
if not schema[v]:
|
||||
if not schema[v.var]:
|
||||
Log.error("{{var}} does not exist in schema", var=v)
|
||||
elif edge.domain.dimension:
|
||||
vars_ = edge.domain.dimension.fields
|
||||
|
@ -148,10 +146,8 @@ def sort_edges(query, prop):
|
|||
ordered_edges = []
|
||||
remaining_edges = getattr(query, prop)
|
||||
for s in query.sort:
|
||||
if not isinstance(s.value, Variable):
|
||||
Log.error("can only sort by terms")
|
||||
for e in remaining_edges:
|
||||
if e.value.var == s.value.var:
|
||||
if e.value == s.value:
|
||||
if isinstance(e.domain, SetDomain):
|
||||
pass # ALREADY SORTED?
|
||||
else:
|
||||
|
@ -159,6 +155,9 @@ def sort_edges(query, prop):
|
|||
ordered_edges.append(e)
|
||||
remaining_edges.remove(e)
|
||||
break
|
||||
else:
|
||||
Log.error("Can not sort by {{expr}}, can only sort by an existing edge expression", expr=s.value)
|
||||
|
||||
ordered_edges.extend(remaining_edges)
|
||||
return ordered_edges
|
||||
|
||||
|
@ -187,33 +186,33 @@ def es_aggsop(es, frum, query):
|
|||
|
||||
for canonical_name, many in new_select.items():
|
||||
for s in many:
|
||||
es_cols = frum.schema.values(s.value.var)
|
||||
columns = frum.schema.values(s.value.var)
|
||||
|
||||
if s.aggregate == "count":
|
||||
canonical_names = []
|
||||
for es_col in es_cols:
|
||||
cn = literal_field(es_col.es_column + "_count")
|
||||
if es_col.type == EXISTS:
|
||||
for column in columns:
|
||||
cn = literal_field(column.es_column + "_count")
|
||||
if column.jx_type == EXISTS:
|
||||
canonical_names.append(cn + ".doc_count")
|
||||
es_query.aggs[cn].filter.range = {es_col.es_column: {"gt": 0}}
|
||||
es_query.aggs[cn].filter.range = {column.es_column: {"gt": 0}}
|
||||
else:
|
||||
canonical_names.append(cn+ ".value")
|
||||
es_query.aggs[cn].value_count.field = es_col.es_column
|
||||
if len(es_cols) == 1:
|
||||
es_query.aggs[cn].value_count.field = column.es_column
|
||||
if len(canonical_names) == 1:
|
||||
s.pull = jx_expression_to_function(canonical_names[0])
|
||||
else:
|
||||
s.pull = jx_expression_to_function({"add": canonical_names})
|
||||
elif s.aggregate == "median":
|
||||
if len(es_cols) > 1:
|
||||
if len(columns) > 1:
|
||||
Log.error("Do not know how to count columns with more than one type (script probably)")
|
||||
# ES USES DIFFERENT METHOD FOR PERCENTILES
|
||||
key = literal_field(canonical_name + " percentile")
|
||||
|
||||
es_query.aggs[key].percentiles.field = es_cols[0].es_column
|
||||
es_query.aggs[key].percentiles.field = columns[0].es_column
|
||||
es_query.aggs[key].percentiles.percents += [50]
|
||||
s.pull = jx_expression_to_function(key + ".values.50\\.0")
|
||||
elif s.aggregate == "percentile":
|
||||
if len(es_cols) > 1:
|
||||
if len(columns) > 1:
|
||||
Log.error("Do not know how to count columns with more than one type (script probably)")
|
||||
# ES USES DIFFERENT METHOD FOR PERCENTILES
|
||||
key = literal_field(canonical_name + " percentile")
|
||||
|
@ -221,48 +220,48 @@ def es_aggsop(es, frum, query):
|
|||
Log.error("Expecting percentile to be a float from 0.0 to 1.0")
|
||||
percent = Math.round(s.percentile * 100, decimal=6)
|
||||
|
||||
es_query.aggs[key].percentiles.field = es_cols[0].es_column
|
||||
es_query.aggs[key].percentiles.field = columns[0].es_column
|
||||
es_query.aggs[key].percentiles.percents += [percent]
|
||||
s.pull = jx_expression_to_function(key + ".values." + literal_field(text_type(percent)))
|
||||
elif s.aggregate == "cardinality":
|
||||
canonical_names = []
|
||||
for es_col in es_cols:
|
||||
cn = literal_field(es_col.es_column + "_cardinality")
|
||||
for column in columns:
|
||||
cn = literal_field(column.es_column + "_cardinality")
|
||||
canonical_names.append(cn)
|
||||
es_query.aggs[cn].cardinality.field = es_col.es_column
|
||||
if len(es_cols) == 1:
|
||||
es_query.aggs[cn].cardinality.field = column.es_column
|
||||
if len(columns) == 1:
|
||||
s.pull = jx_expression_to_function(canonical_names[0] + ".value")
|
||||
else:
|
||||
s.pull = jx_expression_to_function({"add": [cn + ".value" for cn in canonical_names], "default": 0})
|
||||
elif s.aggregate == "stats":
|
||||
if len(es_cols) > 1:
|
||||
if len(columns) > 1:
|
||||
Log.error("Do not know how to count columns with more than one type (script probably)")
|
||||
# REGULAR STATS
|
||||
stats_name = literal_field(canonical_name)
|
||||
es_query.aggs[stats_name].extended_stats.field = es_cols[0].es_column
|
||||
es_query.aggs[stats_name].extended_stats.field = columns[0].es_column
|
||||
|
||||
# GET MEDIAN TOO!
|
||||
median_name = literal_field(canonical_name + "_percentile")
|
||||
es_query.aggs[median_name].percentiles.field = es_cols[0].es_column
|
||||
es_query.aggs[median_name].percentiles.field = columns[0].es_column
|
||||
es_query.aggs[median_name].percentiles.percents += [50]
|
||||
|
||||
s.pull = get_pull_stats(stats_name, median_name)
|
||||
elif s.aggregate == "union":
|
||||
pulls = []
|
||||
for es_col in es_cols:
|
||||
for column in columns:
|
||||
script = {"scripted_metric": {
|
||||
'init_script': 'params._agg.terms = new HashSet()',
|
||||
'map_script': 'for (v in doc['+quote(es_col.es_column)+'].values) params._agg.terms.add(v)',
|
||||
'map_script': 'for (v in doc['+quote(column.es_column)+'].values) params._agg.terms.add(v)',
|
||||
'combine_script': 'return params._agg.terms.toArray()',
|
||||
'reduce_script': 'HashSet output = new HashSet(); for (a in params._aggs) { if (a!=null) for (v in a) {output.add(v)} } return output.toArray()',
|
||||
}}
|
||||
stats_name = encode_property(es_col.es_column)
|
||||
if es_col.nested_path[0] == ".":
|
||||
stats_name = encode_property(column.es_column)
|
||||
if column.nested_path[0] == ".":
|
||||
es_query.aggs[stats_name] = script
|
||||
pulls.append(jx_expression_to_function(stats_name + ".value"))
|
||||
else:
|
||||
es_query.aggs[stats_name] = {
|
||||
"nested": {"path": es_col.nested_path[0]},
|
||||
"nested": {"path": column.nested_path[0]},
|
||||
"aggs": {"_nested": script}
|
||||
}
|
||||
pulls.append(jx_expression_to_function(stats_name + "._nested.value"))
|
||||
|
@ -274,11 +273,11 @@ def es_aggsop(es, frum, query):
|
|||
else:
|
||||
s.pull = lambda row: UNION(p(row) for p in pulls)
|
||||
else:
|
||||
if len(es_cols) > 1:
|
||||
if len(columns) > 1:
|
||||
Log.error("Do not know how to count columns with more than one type (script probably)")
|
||||
|
||||
# PULL VALUE OUT OF THE stats AGGREGATE
|
||||
es_query.aggs[literal_field(canonical_name)].extended_stats.field = es_cols[0].es_column
|
||||
es_query.aggs[literal_field(canonical_name)].extended_stats.field = columns[0].es_column
|
||||
s.pull = jx_expression_to_function({"coalesce": [literal_field(canonical_name) + "." + aggregates[s.aggregate], s.default]})
|
||||
|
||||
for i, s in enumerate(formula):
|
||||
|
@ -296,8 +295,8 @@ def es_aggsop(es, frum, query):
|
|||
dir = -1
|
||||
op = 'min'
|
||||
|
||||
nully = TupleOp("tuple", [NULL]*len(s.value.terms)).partial_eval().to_painless(schema).expr
|
||||
selfy = s.value.partial_eval().to_painless(schema).expr
|
||||
nully = TupleOp("tuple", [NULL]*len(s.value.terms)).partial_eval().to_es_script(schema).expr
|
||||
selfy = s.value.partial_eval().to_es_script(schema).expr
|
||||
|
||||
script = {"scripted_metric": {
|
||||
'init_script': 'params._agg.best = ' + nully + ';',
|
||||
|
@ -317,13 +316,13 @@ def es_aggsop(es, frum, query):
|
|||
else:
|
||||
Log.error("{{agg}} is not a supported aggregate over a tuple", agg=s.aggregate)
|
||||
elif s.aggregate == "count":
|
||||
es_query.aggs[literal_field(canonical_name)].value_count.script = s.value.partial_eval().to_painless(schema).script(schema)
|
||||
es_query.aggs[literal_field(canonical_name)].value_count.script = s.value.partial_eval().to_es_script(schema).script(schema)
|
||||
s.pull = jx_expression_to_function(literal_field(canonical_name) + ".value")
|
||||
elif s.aggregate == "median":
|
||||
# ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT
|
||||
key = literal_field(canonical_name + " percentile")
|
||||
|
||||
es_query.aggs[key].percentiles.script = s.value.to_painless(schema).script(schema)
|
||||
es_query.aggs[key].percentiles.script = s.value.to_es_script(schema).script(schema)
|
||||
es_query.aggs[key].percentiles.percents += [50]
|
||||
s.pull = jx_expression_to_function(key + ".values.50\\.0")
|
||||
elif s.aggregate == "percentile":
|
||||
|
@ -331,35 +330,35 @@ def es_aggsop(es, frum, query):
|
|||
key = literal_field(canonical_name + " percentile")
|
||||
percent = Math.round(s.percentile * 100, decimal=6)
|
||||
|
||||
es_query.aggs[key].percentiles.script = s.value.to_painless(schema).script(schema)
|
||||
es_query.aggs[key].percentiles.script = s.value.to_es_script(schema).script(schema)
|
||||
es_query.aggs[key].percentiles.percents += [percent]
|
||||
s.pull = jx_expression_to_function(key + ".values." + literal_field(text_type(percent)))
|
||||
elif s.aggregate == "cardinality":
|
||||
# ES USES DIFFERENT METHOD FOR CARDINALITY
|
||||
key = canonical_name + " cardinality"
|
||||
|
||||
es_query.aggs[key].cardinality.script = s.value.to_painless(schema).script(schema)
|
||||
es_query.aggs[key].cardinality.script = s.value.to_es_script(schema).script(schema)
|
||||
s.pull = jx_expression_to_function(key + ".value")
|
||||
elif s.aggregate == "stats":
|
||||
# REGULAR STATS
|
||||
stats_name = literal_field(canonical_name)
|
||||
es_query.aggs[stats_name].extended_stats.script = s.value.to_painless(schema).script(schema)
|
||||
es_query.aggs[stats_name].extended_stats.script = s.value.to_es_script(schema).script(schema)
|
||||
|
||||
# GET MEDIAN TOO!
|
||||
median_name = literal_field(canonical_name + " percentile")
|
||||
es_query.aggs[median_name].percentiles.script = s.value.to_painless(schema).script(schema)
|
||||
es_query.aggs[median_name].percentiles.script = s.value.to_es_script(schema).script(schema)
|
||||
es_query.aggs[median_name].percentiles.percents += [50]
|
||||
|
||||
s.pull = get_pull_stats(stats_name, median_name)
|
||||
elif s.aggregate == "union":
|
||||
# USE TERMS AGGREGATE TO SIMULATE union
|
||||
stats_name = literal_field(canonical_name)
|
||||
es_query.aggs[stats_name].terms.script_field = s.value.to_painless(schema).script(schema)
|
||||
es_query.aggs[stats_name].terms.script_field = s.value.to_es_script(schema).script(schema)
|
||||
s.pull = jx_expression_to_function(stats_name + ".buckets.key")
|
||||
else:
|
||||
# PULL VALUE OUT OF THE stats AGGREGATE
|
||||
s.pull = jx_expression_to_function(canonical_name + "." + aggregates[s.aggregate])
|
||||
es_query.aggs[canonical_name].extended_stats.script = s.value.to_painless(schema).script(schema)
|
||||
es_query.aggs[canonical_name].extended_stats.script = s.value.to_es_script(schema).script(schema)
|
||||
|
||||
decoders = get_decoders_by_depth(query)
|
||||
start = 0
|
||||
|
@ -384,11 +383,7 @@ def es_aggsop(es, frum, query):
|
|||
|
||||
es_query = wrap({
|
||||
"aggs": {"_nested": set_default(
|
||||
{
|
||||
"nested": {
|
||||
"path": schema.query_path
|
||||
}
|
||||
},
|
||||
{"nested": {"path": schema.query_path[0]}},
|
||||
es_query
|
||||
)}
|
||||
})
|
||||
|
|
|
@ -13,21 +13,21 @@ from __future__ import unicode_literals
|
|||
|
||||
from collections import Mapping
|
||||
|
||||
from mo_future import text_type, binary_type
|
||||
|
||||
from jx_base import STRING, NUMBER, BOOLEAN
|
||||
from jx_base.dimensions import Dimension
|
||||
from jx_base.domains import SimpleSetDomain, DefaultDomain, PARTITION
|
||||
from jx_base.expressions import TupleOp, value2json
|
||||
from jx_base.expressions import TupleOp, TRUE
|
||||
from jx_base.query import MAX_LIMIT, DEFAULT_LIMIT
|
||||
from jx_elasticsearch.es52.expressions import Variable, NotOp, InOp, Literal, OrOp, AndOp, InequalityOp, LeavesOp, LIST_TO_PIPE
|
||||
from jx_elasticsearch.es52.expressions import Variable, NotOp, InOp, Literal, AndOp, InequalityOp, LeavesOp, LIST_TO_PIPE
|
||||
from jx_elasticsearch.es52.util import es_missing
|
||||
from jx_python import jx
|
||||
from mo_dots import set_default, coalesce, literal_field, Data, relative_field, unwraplist
|
||||
from mo_dots import wrap
|
||||
from mo_dots import wrap, set_default, coalesce, literal_field, Data, relative_field, unwraplist
|
||||
from mo_future import text_type
|
||||
from mo_json.typed_encoder import untype_path
|
||||
from mo_logs import Log
|
||||
from mo_logs.strings import quote, expand_template
|
||||
from mo_math import MAX, MIN
|
||||
from mo_math import Math
|
||||
from mo_math import MAX, MIN, Math
|
||||
from pyLibrary.convert import string2boolean
|
||||
|
||||
|
||||
class AggsDecoder(object):
|
||||
|
@ -144,6 +144,7 @@ class SetDecoder(AggsDecoder):
|
|||
AggsDecoder.__init__(self, edge, query, limit)
|
||||
domain = self.domain = edge.domain
|
||||
self.sorted = None
|
||||
self.pull = pull_functions[STRING]
|
||||
|
||||
# WE ASSUME IF THE VARIABLES MATCH, THEN THE SORT TERM AND EDGE TERM MATCH, AND WE SORT BY TERM
|
||||
# self.sorted = {1: "asc", -1: "desc", None: None}[getattr(edge.domain, 'sort', None)]
|
||||
|
@ -186,7 +187,7 @@ class SetDecoder(AggsDecoder):
|
|||
terms = set_default({"terms": {
|
||||
"script": {
|
||||
"lang": "painless",
|
||||
"inline": value.to_painless(self.schema).script(self.schema)
|
||||
"inline": value.to_es_script(self.schema).script(self.schema)
|
||||
},
|
||||
"size": limit
|
||||
}}, es_query)
|
||||
|
@ -213,7 +214,7 @@ class SetDecoder(AggsDecoder):
|
|||
return self.domain.getKeyByIndex(index)
|
||||
|
||||
def get_value_from_row(self, row):
|
||||
return row[self.start].get('key')
|
||||
return self.pull(row[self.start].get('key'))
|
||||
|
||||
def get_index(self, row):
|
||||
try:
|
||||
|
@ -249,7 +250,7 @@ def _range_composer(edge, domain, es_query, to_float, schema):
|
|||
if isinstance(edge.value, Variable):
|
||||
calc = {"field": schema.leaves(edge.value.var)[0].es_column}
|
||||
else:
|
||||
calc = {"script": edge.value.to_painless(schema).script(schema)}
|
||||
calc = {"script": edge.value.to_es_script(schema).script(schema)}
|
||||
|
||||
return wrap({"aggs": {
|
||||
"_match": set_default(
|
||||
|
@ -521,7 +522,7 @@ class ObjectDecoder(AggsDecoder):
|
|||
"size": self.domain.limit
|
||||
}}, es_query),
|
||||
"_missing": set_default(
|
||||
{"filter": {"bool": {"must_not": {"exists": {"field": v}}}}},
|
||||
{"filter": es_missing(v)},
|
||||
es_query
|
||||
)
|
||||
}})
|
||||
|
@ -580,58 +581,51 @@ class DefaultDecoder(SetDecoder):
|
|||
self.parts = list()
|
||||
self.key2index = {}
|
||||
self.computed_domain = False
|
||||
self.script = self.edge.value.partial_eval().to_es_script(self.schema)
|
||||
self.pull = pull_functions[self.script.data_type]
|
||||
self.missing = self.script.miss.partial_eval()
|
||||
self.exists = NotOp("not", self.missing).partial_eval()
|
||||
|
||||
# WE ASSUME IF THE VARIABLES MATCH, THEN THE SORT TERM AND EDGE TERM MATCH, AND WE SORT BY TERM
|
||||
self.sorted = None
|
||||
edge_var = edge.value.vars()
|
||||
for s in query.sort:
|
||||
if not edge_var - s.value.vars():
|
||||
self.sorted = {1: "asc", -1: "desc"}[s.sort]
|
||||
# WHEN SORT VALUE AND EDGE VALUE MATCHES, WE SORT BY TERM
|
||||
sort_candidates = [s for s in self.query.sort if s.value == self.edge.value]
|
||||
if sort_candidates:
|
||||
self.es_order = {"_term": {1: "asc", -1: "desc"}[sort_candidates[0].sort]}
|
||||
else:
|
||||
self.es_order = None
|
||||
|
||||
def append_query(self, es_query, start):
|
||||
self.start = start
|
||||
|
||||
value = self.edge.value.partial_eval()
|
||||
script = value.to_painless(self.schema)
|
||||
exists = NotOp("not", script.miss).partial_eval()
|
||||
if not isinstance(self.edge.value, Variable):
|
||||
|
||||
if self.exists is TRUE:
|
||||
# IF True THEN WE DO NOT NEED THE _filter OR THE _missing (THIS RARELY HAPPENS THOUGH)
|
||||
output = wrap({"aggs": {
|
||||
"_match": {
|
||||
"filter": exists.to_esfilter(self.schema),
|
||||
"_match": set_default(
|
||||
{"terms": {
|
||||
"script": {"lang": "painless", "inline": self.script.expr},
|
||||
"size": self.domain.limit,
|
||||
"order": self.es_order
|
||||
}},
|
||||
es_query
|
||||
)
|
||||
}})
|
||||
else:
|
||||
output = wrap({"aggs": {
|
||||
"_match": { # _match AND _filter REVERSED SO _match LINES UP WITH _missing
|
||||
"filter": self.exists.to_esfilter(self.schema),
|
||||
"aggs": {
|
||||
"_filter": set_default(
|
||||
{"terms": {
|
||||
"script": {
|
||||
"lang": "painless",
|
||||
"inline": script.expr
|
||||
},
|
||||
"script": {"lang": "painless", "inline": self.script.expr},
|
||||
"size": self.domain.limit,
|
||||
"order": {"_term": self.sorted} if self.sorted else None
|
||||
"order": self.es_order
|
||||
}},
|
||||
es_query
|
||||
)
|
||||
}
|
||||
},
|
||||
"_missing": set_default(
|
||||
{"filter": NotOp("not", exists).to_esfilter(self.schema)},
|
||||
es_query
|
||||
)
|
||||
}})
|
||||
return output
|
||||
elif self.edge.value.var in [s.value.var for s in self.query.sort]:
|
||||
sort_dir = [s.sort for s in self.query.sort if s.value.var == self.edge.value.var][0]
|
||||
output = wrap({"aggs": {
|
||||
"_match": set_default(
|
||||
{"terms": {
|
||||
"field": self.schema.leaves(self.edge.value.var)[0].es_column,
|
||||
"size": self.domain.limit,
|
||||
"order": {"_term": "asc" if sort_dir == 1 else "desc"}
|
||||
}},
|
||||
es_query
|
||||
),
|
||||
"_missing": set_default(
|
||||
{"filter": NotOp("not", exists).to_esfilter(self.schema)},
|
||||
{"filter": self.missing.to_esfilter(self.schema)},
|
||||
es_query
|
||||
)
|
||||
}})
|
||||
|
@ -641,12 +635,13 @@ class DefaultDecoder(SetDecoder):
|
|||
"_match": set_default(
|
||||
{"terms": {
|
||||
"field": self.schema.leaves(self.edge.value.var)[0].es_column,
|
||||
"size": self.domain.limit
|
||||
"size": self.domain.limit,
|
||||
"order": self.es_order
|
||||
}},
|
||||
es_query
|
||||
),
|
||||
"_missing": set_default(
|
||||
{"filter": NotOp("not", exists).to_esfilter(self.schema)},
|
||||
{"filter": self.missing.to_esfilter(self.schema)},
|
||||
es_query
|
||||
)
|
||||
}})
|
||||
|
@ -656,7 +651,7 @@ class DefaultDecoder(SetDecoder):
|
|||
part = row[self.start]
|
||||
if part['doc_count']:
|
||||
if part.get('key') != None:
|
||||
self.parts.append(part.get('key'))
|
||||
self.parts.append(self.pull(part.get('key')))
|
||||
else:
|
||||
self.edge.allowNulls = True # OK! WE WILL ALLOW NULLS
|
||||
|
||||
|
@ -671,19 +666,19 @@ class DefaultDecoder(SetDecoder):
|
|||
if self.computed_domain:
|
||||
try:
|
||||
part = row[self.start]
|
||||
return self.domain.getIndexByKey(part.get('key'))
|
||||
return self.domain.getIndexByKey(self.pull(part.get('key')))
|
||||
except Exception as e:
|
||||
Log.error("problem", cause=e)
|
||||
else:
|
||||
try:
|
||||
part = row[self.start]
|
||||
key = part.get('key')
|
||||
key = self.pull(part.get('key'))
|
||||
i = self.key2index.get(key)
|
||||
if i is None:
|
||||
i = len(self.parts)
|
||||
part = {"key": key, "dataIndex": i}
|
||||
self.parts.append({"key": key, "dataIndex": i})
|
||||
self.key2index[i] = part
|
||||
self.parts.append(part)
|
||||
self.key2index[key] = i
|
||||
return i
|
||||
except Exception as e:
|
||||
Log.error("problem", cause=e)
|
||||
|
@ -755,3 +750,8 @@ class DimFieldListDecoder(SetDecoder):
|
|||
return len(self.fields)
|
||||
|
||||
|
||||
pull_functions = {
|
||||
STRING: lambda x: x,
|
||||
NUMBER: lambda x: float(x) if x !=None else None,
|
||||
BOOLEAN: string2boolean
|
||||
}
|
||||
|
|
|
@ -11,7 +11,7 @@ from __future__ import absolute_import
|
|||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from jx_base import STRUCT, NESTED
|
||||
from jx_base import NESTED
|
||||
from jx_base.expressions import NULL
|
||||
from jx_base.query import DEFAULT_LIMIT
|
||||
from jx_elasticsearch import post as es_post
|
||||
|
@ -49,8 +49,7 @@ def is_deepop(es, query):
|
|||
|
||||
def es_deepop(es, query):
|
||||
schema = query.frum.schema
|
||||
columns = schema.columns
|
||||
query_path = schema.query_path
|
||||
query_path = schema.query_path[0]
|
||||
|
||||
# TODO: FIX THE GREAT SADNESS CAUSED BY EXECUTING post_expressions
|
||||
# THE EXPRESSIONS SHOULD BE PUSHED TO THE CONTAINER: ES ALLOWS
|
||||
|
@ -68,7 +67,7 @@ def es_deepop(es, query):
|
|||
if not wheres[1]:
|
||||
more_filter = {
|
||||
"bool": {
|
||||
"must": [AndOp("and", wheres[0]).partial_eval().to_esfilter(schema)],
|
||||
"filter": [AndOp("and", wheres[0]).partial_eval().to_esfilter(schema)],
|
||||
"must_not": {
|
||||
"nested": {
|
||||
"path": query_path,
|
||||
|
@ -103,7 +102,7 @@ def es_deepop(es, query):
|
|||
col_names = set()
|
||||
for c in leaves:
|
||||
if c.nested_path[0] == ".":
|
||||
if c.type == NESTED:
|
||||
if c.jx_type == NESTED:
|
||||
continue
|
||||
es_query.stored_fields += [c.es_column]
|
||||
c_name = untype_path(c.names[query_path])
|
||||
|
@ -134,7 +133,7 @@ def es_deepop(es, query):
|
|||
for n in net_columns:
|
||||
pull = get_pull_function(n)
|
||||
if n.nested_path[0] == ".":
|
||||
if n.type == NESTED:
|
||||
if n.jx_type == NESTED:
|
||||
continue
|
||||
es_query.stored_fields += [n.es_column]
|
||||
|
||||
|
@ -161,14 +160,14 @@ def es_deepop(es, query):
|
|||
else:
|
||||
expr = s.value
|
||||
for v in expr.vars():
|
||||
for c in schema[v]:
|
||||
for c in schema[v.var]:
|
||||
if c.nested_path[0] == ".":
|
||||
es_query.stored_fields += [c.es_column]
|
||||
# else:
|
||||
# Log.error("deep field not expected")
|
||||
|
||||
pull_name = EXPRESSION_PREFIX + s.name
|
||||
map_to_local = {untype_path(k): get_pull(cc) for k, c in schema.lookup.items() for cc in c if cc.type not in STRUCT}
|
||||
map_to_local = MapToLocal(schema)
|
||||
pull = jx_expression_to_function(pull_name)
|
||||
post_expressions[pull_name] = compile_expression(expr.map(map_to_local).to_python())
|
||||
|
||||
|
@ -223,3 +222,23 @@ def es_deepop(es, query):
|
|||
Log.error("problem formatting", e)
|
||||
|
||||
|
||||
class MapToLocal(object):
|
||||
"""
|
||||
MAP FROM RELATIVE/ABSOLUTE NAMESPACE TO PYTHON THAT WILL EXTRACT RESULT
|
||||
"""
|
||||
def __init__(self, map_to_columns):
|
||||
self.map_to_columns = map_to_columns
|
||||
|
||||
def __getitem__(self, item):
|
||||
return self.get(item)
|
||||
|
||||
def get(self, item):
|
||||
cs = self.map_to_columns[item]
|
||||
if len(cs) == 0:
|
||||
return "Null"
|
||||
elif len(cs) == 1:
|
||||
return get_pull(cs[0])
|
||||
else:
|
||||
return "coalesce(" + (",".join(get_pull(c) for c in cs)) + ")"
|
||||
|
||||
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -11,18 +11,15 @@ from __future__ import absolute_import
|
|||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from collections import Mapping
|
||||
|
||||
from mo_dots import Data, set_default, wrap, split_field, coalesce
|
||||
from mo_future import sort_using_key
|
||||
from mo_logs import Log
|
||||
from pyLibrary import convert
|
||||
|
||||
from jx_base.expressions import TupleOp
|
||||
from jx_elasticsearch.es52.aggs import count_dim, aggs_iterator, format_dispatch, drill
|
||||
from jx_python.containers.cube import Cube
|
||||
from mo_collections.matrix import Matrix
|
||||
from mo_dots import Data, set_default, wrap, split_field, coalesce
|
||||
from mo_future import sort_using_key
|
||||
from mo_logs import Log
|
||||
from mo_logs.strings import quote
|
||||
from pyLibrary import convert
|
||||
|
||||
FunctionType = type(lambda: 1)
|
||||
|
||||
|
@ -191,6 +188,9 @@ def format_list_from_groupby(decoders, aggs, start, query, select):
|
|||
output[s.name] = s.pull(agg)
|
||||
yield output
|
||||
|
||||
for g in query.groupby:
|
||||
g.put.name = coalesce(g.put.name, g.name)
|
||||
|
||||
output = Data(
|
||||
meta={"format": "list"},
|
||||
data=list(data())
|
||||
|
@ -208,7 +208,7 @@ def format_list(decoders, aggs, start, query, select):
|
|||
if query.sort and not query.groupby:
|
||||
# TODO: USE THE format_table() TO PRODUCE THE NEEDED VALUES INSTEAD OF DUPLICATING LOGIC HERE
|
||||
all_coord = is_sent._all_combos() # TRACK THE EXPECTED COMBINATIONS
|
||||
for row, coord, agg in aggs_iterator(aggs, decoders):
|
||||
for _, coord, agg in aggs_iterator(aggs, decoders):
|
||||
missing_coord = all_coord.next()
|
||||
while coord != missing_coord:
|
||||
# INSERT THE MISSING COORDINATE INTO THE GENERATION
|
||||
|
@ -230,7 +230,7 @@ def format_list(decoders, aggs, start, query, select):
|
|||
output[s.name] = s.pull(agg)
|
||||
yield output
|
||||
else:
|
||||
is_sent = Matrix(dims=dims, zeros=0)
|
||||
|
||||
for row, coord, agg in aggs_iterator(aggs, decoders):
|
||||
is_sent[coord] = 1
|
||||
|
||||
|
|
|
@ -19,12 +19,11 @@ from jx_base.expressions import IDENTITY
|
|||
from jx_base.query import DEFAULT_LIMIT
|
||||
from jx_elasticsearch import post as es_post
|
||||
from jx_elasticsearch.es52.expressions import Variable, LeavesOp
|
||||
from jx_elasticsearch.es52.util import jx_sort_to_es_sort, es_query_template
|
||||
from jx_elasticsearch.es52.util import jx_sort_to_es_sort, es_query_template, es_and, es_or, es_not, es_script
|
||||
from jx_python.containers.cube import Cube
|
||||
from jx_python.expressions import jx_expression_to_function
|
||||
from mo_collections.matrix import Matrix
|
||||
from mo_dots import coalesce, split_field, set_default, Data, unwraplist, literal_field, unwrap, wrap, concat_field, relative_field, join_field
|
||||
from mo_dots import listwrap
|
||||
from mo_dots import coalesce, split_field, set_default, Data, unwraplist, literal_field, unwrap, wrap, concat_field, relative_field, join_field, listwrap
|
||||
from mo_dots.lists import FlatList
|
||||
from mo_json.typed_encoder import untype_path, unnest_path, untyped
|
||||
from mo_logs import Log
|
||||
|
@ -56,7 +55,7 @@ def is_setop(es, query):
|
|||
def es_setop(es, query):
|
||||
schema = query.frum.schema
|
||||
|
||||
es_query, filters = es_query_template(schema.query_path)
|
||||
es_query, filters = es_query_template(schema.query_path[0])
|
||||
nested_filter = None
|
||||
set_default(filters[0], query.where.partial_eval().to_esfilter(schema))
|
||||
es_query.size = coalesce(query.limit, DEFAULT_LIMIT)
|
||||
|
@ -75,10 +74,10 @@ def es_setop(es, query):
|
|||
# IF THERE IS A *, THEN INSERT THE EXTRA COLUMNS
|
||||
if isinstance(select.value, LeavesOp) and isinstance(select.value.term, Variable):
|
||||
term = select.value.term
|
||||
leaves = schema.values(term.var)
|
||||
leaves = schema.leaves(term.var)
|
||||
for c in leaves:
|
||||
full_name = concat_field(select.name, relative_field(untype_path(c.names["."]), term.var))
|
||||
if c.type == NESTED:
|
||||
if c.jx_type == NESTED:
|
||||
es_query.stored_fields = ["_source"]
|
||||
new_select.append({
|
||||
"name": full_name,
|
||||
|
@ -88,7 +87,7 @@ def es_setop(es, query):
|
|||
})
|
||||
put_index += 1
|
||||
elif c.nested_path[0] != ".":
|
||||
es_query.stored_fields = ["_source"]
|
||||
pass # THE NESTED PARENT WILL CAPTURE THIS
|
||||
else:
|
||||
es_query.stored_fields += [c.es_column]
|
||||
new_select.append({
|
||||
|
@ -103,7 +102,7 @@ def es_setop(es, query):
|
|||
leaves = schema.leaves(s_column)
|
||||
nested_selects = {}
|
||||
if leaves:
|
||||
if any(c.type == NESTED for c in leaves):
|
||||
if s_column == '.' or any(c.jx_type == NESTED for c in leaves):
|
||||
# PULL WHOLE NESTED ARRAYS
|
||||
es_query.stored_fields = ["_source"]
|
||||
for c in leaves:
|
||||
|
@ -120,7 +119,7 @@ def es_setop(es, query):
|
|||
for c in leaves:
|
||||
if len(c.nested_path) == 1:
|
||||
jx_name = untype_path(c.names["."])
|
||||
if c.type == NESTED:
|
||||
if c.jx_type == NESTED:
|
||||
es_query.stored_fields = ["_source"]
|
||||
new_select.append({
|
||||
"name": select.name,
|
||||
|
@ -144,7 +143,7 @@ def es_setop(es, query):
|
|||
filters[0][k] = None
|
||||
set_default(
|
||||
filters[0],
|
||||
{"bool": {"must": [where, {"bool": {"should": nested_filter}}]}}
|
||||
es_and([where, es_or(nested_filter)])
|
||||
)
|
||||
|
||||
nested_path = c.nested_path[0]
|
||||
|
@ -156,7 +155,7 @@ def es_setop(es, query):
|
|||
where.nested.inner_hits._source = False
|
||||
where.nested.inner_hits.stored_fields += [c.es_column]
|
||||
|
||||
child = relative_field(untype_path(c.names[schema.query_path]), s_column)
|
||||
child = relative_field(untype_path(c.names[schema.query_path[0]]), s_column)
|
||||
pull = accumulate_nested_doc(nested_path, Variable(relative_field(s_column, unnest_path(nested_path))))
|
||||
new_select.append({
|
||||
"name": select.name,
|
||||
|
@ -178,11 +177,8 @@ def es_setop(es, query):
|
|||
})
|
||||
put_index += 1
|
||||
else:
|
||||
painless = select.value.partial_eval().to_painless(schema)
|
||||
es_query.script_fields[literal_field(select.name)] = {"script": {
|
||||
"lang": "painless",
|
||||
"inline": painless.script(schema)
|
||||
}}
|
||||
painless = select.value.partial_eval().to_es_script(schema)
|
||||
es_query.script_fields[literal_field(select.name)] = es_script(painless.script(schema))
|
||||
new_select.append({
|
||||
"name": select.name,
|
||||
"pull": jx_expression_to_function("fields." + literal_field(select.name)),
|
||||
|
@ -346,6 +342,7 @@ set_default(format_dispatch, {
|
|||
"list": (format_list, None, "application/json")
|
||||
})
|
||||
|
||||
|
||||
def get_pull(column):
|
||||
if column.nested_path[0] == ".":
|
||||
return concat_field("fields", literal_field(column.es_column))
|
||||
|
|
|
@ -11,6 +11,10 @@ from __future__ import absolute_import
|
|||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from mo_future import text_type
|
||||
|
||||
from mo_logs import Log
|
||||
|
||||
from jx_base import STRING, BOOLEAN, NUMBER, OBJECT
|
||||
from jx_elasticsearch.es52.expressions import Variable
|
||||
from mo_dots import wrap
|
||||
|
@ -23,18 +27,21 @@ def es_query_template(path):
|
|||
:return:
|
||||
"""
|
||||
|
||||
if not isinstance(path, text_type):
|
||||
Log.error("expecting path to be a string")
|
||||
|
||||
if path != ".":
|
||||
f0 = {}
|
||||
f1 = {}
|
||||
output = wrap({
|
||||
"query": {"bool": {"must": [
|
||||
"query": es_and([
|
||||
f0,
|
||||
{"nested": {
|
||||
"path": path,
|
||||
"query": f1,
|
||||
"inner_hits": {"size": 100000}
|
||||
}}
|
||||
]}},
|
||||
]),
|
||||
"from": 0,
|
||||
"size": 0,
|
||||
"sort": []
|
||||
|
@ -43,7 +50,7 @@ def es_query_template(path):
|
|||
else:
|
||||
f0 = {}
|
||||
output = wrap({
|
||||
"query": {"bool": {"must": [f0]}},
|
||||
"query": es_and([f0]),
|
||||
"from": 0,
|
||||
"size": 0,
|
||||
"sort": []
|
||||
|
@ -66,7 +73,7 @@ def jx_sort_to_es_sort(sort, schema):
|
|||
|
||||
for type in types:
|
||||
for c in cols:
|
||||
if c.type == type:
|
||||
if c.jx_type == type:
|
||||
if s.sort == -1:
|
||||
output.append({c.es_column: "desc"})
|
||||
else:
|
||||
|
@ -109,3 +116,22 @@ aggregates = {
|
|||
|
||||
NON_STATISTICAL_AGGS = {"none", "one"}
|
||||
|
||||
|
||||
def es_and(terms):
|
||||
return wrap({"bool": {"filter": terms}})
|
||||
|
||||
|
||||
def es_or(terms):
|
||||
return wrap({"bool": {"should": terms}})
|
||||
|
||||
|
||||
def es_not(term):
|
||||
return wrap({"bool": {"must_not": term}})
|
||||
|
||||
|
||||
def es_script(term):
|
||||
return wrap({"script": {"lang": "painless", "inline": term}})
|
||||
|
||||
|
||||
def es_missing(term):
|
||||
return {"bool": {"must_not": {"exists": {"field": term}}}}
|
||||
|
|
|
@ -12,24 +12,28 @@ from __future__ import division
|
|||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
from copy import copy
|
||||
from itertools import product
|
||||
|
||||
from jx_base import STRUCT, Table
|
||||
import jx_base
|
||||
from jx_base.namespace import Namespace
|
||||
from mo_math import MAX
|
||||
|
||||
from mo_collections.relation import Relation_usingList
|
||||
|
||||
from jx_base import STRUCT, TableDesc, BOOLEAN
|
||||
from jx_base.query import QueryOp
|
||||
from jx_base.schema import Schema
|
||||
from jx_python import jx, meta as jx_base_meta
|
||||
from jx_python.containers.list_usingPythonList import ListContainer
|
||||
from jx_python.meta import ColumnList, Column
|
||||
from mo_dots import Data, relative_field, concat_field, SELF_PATH, ROOT_PATH, coalesce, set_default, Null, split_field, join_field, wrap
|
||||
from mo_json.typed_encoder import EXISTS_TYPE
|
||||
from mo_dots import Data, relative_field, SELF_PATH, ROOT_PATH, coalesce, set_default, Null, split_field, join_field, wrap, concat_field, startswith_field, literal_field
|
||||
from mo_json.typed_encoder import EXISTS_TYPE, TYPE_PREFIX, untype_path, unnest_path
|
||||
from mo_kwargs import override
|
||||
from mo_logs import Log
|
||||
from mo_logs.strings import quote
|
||||
from mo_threads import Queue, THREAD_STOP, Thread, Till
|
||||
from mo_times import HOUR, MINUTE, Timer, Date
|
||||
from pyLibrary.env import elasticsearch
|
||||
from pyLibrary.env.elasticsearch import es_type_to_json_type
|
||||
from pyLibrary.env.elasticsearch import es_type_to_json_type, _get_best_type_from_mapping
|
||||
|
||||
MAX_COLUMN_METADATA_AGE = 12 * HOUR
|
||||
ENABLE_META_SCAN = False
|
||||
|
@ -39,9 +43,9 @@ OLD_METADATA = MINUTE
|
|||
TEST_TABLE_PREFIX = "testing" # USED TO TURN OFF COMPLAINING ABOUT TEST INDEXES
|
||||
|
||||
|
||||
class FromESMetadata(Schema):
|
||||
class ElasticsearchMetadata(Namespace):
|
||||
"""
|
||||
QUERY THE METADATA
|
||||
MANAGE SNOWFLAKE SCHEMAS FOR EACH OF THE ALIASES FOUND IN THE CLUSTER
|
||||
"""
|
||||
|
||||
def __new__(cls, *args, **kwargs):
|
||||
|
@ -59,21 +63,31 @@ class FromESMetadata(Schema):
|
|||
self.too_old = TOO_OLD
|
||||
self.settings = kwargs
|
||||
self.default_name = coalesce(name, alias, index)
|
||||
self.default_es = elasticsearch.Cluster(kwargs=kwargs)
|
||||
self.es_cluster = elasticsearch.Cluster(kwargs=kwargs)
|
||||
self.index_does_not_exist = set()
|
||||
self.todo = Queue("refresh metadata", max=100000, unique=True)
|
||||
|
||||
self.index_to_alias = Relation_usingList()
|
||||
|
||||
|
||||
self.es_metadata = Null
|
||||
self.abs_columns = set()
|
||||
# self.abs_columns = set()
|
||||
self.last_es_metadata = Date.now()-OLD_METADATA
|
||||
|
||||
self.meta = Data()
|
||||
table_columns = metadata_tables()
|
||||
column_columns = metadata_columns()
|
||||
self.meta.tables = ListContainer("meta.tables", [], wrap({c.names["."]: c for c in table_columns}))
|
||||
self.meta.columns = ColumnList()
|
||||
self.meta.columns.insert(column_columns)
|
||||
self.meta.columns.insert(table_columns)
|
||||
|
||||
self.alias_to_query_paths = {
|
||||
"meta.columns": [['.']],
|
||||
"meta.tables": [['.']]
|
||||
}
|
||||
self.alias_new_since = {
|
||||
"meta.columns": Date.now(),
|
||||
"meta.tables": Date.now()
|
||||
}
|
||||
table_columns = metadata_tables()
|
||||
self.meta.tables = ListContainer("meta.tables", [], jx_base.Schema(".", table_columns))
|
||||
self.meta.columns.extend(table_columns)
|
||||
# TODO: fix monitor so it does not bring down ES
|
||||
if ENABLE_META_SCAN:
|
||||
self.worker = Thread.run("refresh metadata", self.monitor)
|
||||
|
@ -81,79 +95,52 @@ class FromESMetadata(Schema):
|
|||
self.worker = Thread.run("refresh metadata", self.not_monitor)
|
||||
return
|
||||
|
||||
@property
|
||||
def query_path(self):
|
||||
return None
|
||||
|
||||
@property
|
||||
def url(self):
|
||||
return self.default_es.path + "/" + self.default_name.replace(".", "/")
|
||||
return self.es_cluster.path + "/" + self.default_name.replace(".", "/")
|
||||
|
||||
def get_table(self, table_name):
|
||||
with self.meta.tables.locker:
|
||||
return wrap([t for t in self.meta.tables.data if t.name == table_name])
|
||||
def _reload_columns(self, alias=None):
|
||||
"""
|
||||
:param alias: A REAL ALIAS (OR NAME OF INDEX THAT HAS NO ALIAS)
|
||||
:return:
|
||||
"""
|
||||
# FIND ALL INDEXES OF ALIAS
|
||||
canonical_index = self.es_cluster.get_best_matching_index(alias).index
|
||||
times = self.es_cluster.index_new_since
|
||||
|
||||
def _upsert_column(self, c):
|
||||
# ASSUMING THE self.meta.columns.locker IS HAD
|
||||
existing_columns = self.meta.columns.find(c.es_index, c.names["."])
|
||||
for canonical in existing_columns:
|
||||
if canonical.type == c.type and canonical is not c:
|
||||
set_default(c.names, canonical.names)
|
||||
for key in Column.__slots__:
|
||||
canonical[key] = c[key]
|
||||
if DEBUG:
|
||||
Log.note("todo: {{table}}::{{column}}", table=canonical.es_index, column=canonical.es_column)
|
||||
self.todo.add(canonical)
|
||||
break
|
||||
else:
|
||||
self.meta.columns.add(c)
|
||||
self.todo.add(c)
|
||||
indexes = self.index_to_alias.get_domain(alias)
|
||||
update_required = not (MAX(times[i] for i in indexes) < self.es_cluster.last_metadata)
|
||||
metadata = self.es_cluster.get_metadata(force=update_required)
|
||||
|
||||
if ENABLE_META_SCAN:
|
||||
if DEBUG:
|
||||
Log.note("todo: {{table}}::{{column}}", table=c.es_index, column=c.es_column)
|
||||
# MARK meta.columns AS DIRTY TOO
|
||||
cols = self.meta.columns.find("meta.columns", None)
|
||||
for cc in cols:
|
||||
cc.partitions = cc.cardinality = None
|
||||
cc.last_updated = Date.now() - TOO_OLD
|
||||
self.todo.extend(cols)
|
||||
props = [
|
||||
# (index, type, properties) TRIPLE
|
||||
(self.es_cluster.get_index(index=i, type=t, debug=DEBUG), t, m.properties)
|
||||
for i, d in metadata.indices.items()
|
||||
if i in indexes
|
||||
for t, m in [_get_best_type_from_mapping(d.mappings)]
|
||||
]
|
||||
|
||||
def _get_columns(self, table=None):
|
||||
# TODO: HANDLE MORE THEN ONE ES, MAP TABLE SHORT_NAME TO ES INSTANCE
|
||||
table_path = split_field(table)
|
||||
es_index = table_path[0]
|
||||
meta = self.es_metadata.indices[es_index]
|
||||
if not meta or self.last_es_metadata < Date.now() - OLD_METADATA:
|
||||
self.es_metadata = self.default_es.get_metadata(force=True)
|
||||
meta = self.es_metadata.indices[es_index]
|
||||
# CONFIRM ALL COLUMNS ARE SAME, FIX IF NOT
|
||||
dirty = False
|
||||
all_comparisions = list(jx.pairwise(props)) + list(jx.pairwise(jx.reverse(props)))
|
||||
# NOTICE THE SAME (index, type, properties) TRIPLE FROM ABOVE
|
||||
for (i1, t1, p1), (i2, t2, p2) in all_comparisions:
|
||||
diff = elasticsearch.diff_schema(p2, p1)
|
||||
if not self.settings.read_only:
|
||||
for d in diff:
|
||||
dirty = True
|
||||
i1.add_property(*d)
|
||||
meta = self.es_cluster.get_metadata(force=dirty).indices[canonical_index]
|
||||
|
||||
for data_type, properties in meta.mappings.items():
|
||||
if data_type == "_default_":
|
||||
continue
|
||||
properties.properties["_id"] = {"type": "string", "index": "not_analyzed"}
|
||||
self._parse_properties(meta.index, properties, meta)
|
||||
data_type, mapping = _get_best_type_from_mapping(meta.mappings)
|
||||
mapping.properties["_id"] = {"type": "string", "index": "not_analyzed"}
|
||||
self._parse_properties(alias, mapping, meta)
|
||||
|
||||
def _parse_properties(self, abs_index, properties, meta):
|
||||
# IT IS IMPORTANT THAT NESTED PROPERTIES NAME ALL COLUMNS, AND
|
||||
# ALL COLUMNS ARE GIVEN NAMES FOR ALL NESTED PROPERTIES
|
||||
def add_column(c, query_path):
|
||||
c.last_updated = Date.now() - TOO_OLD
|
||||
if query_path[0] != ".":
|
||||
c.names[query_path[0]] = relative_field(c.names["."], query_path[0])
|
||||
|
||||
with self.meta.columns.locker:
|
||||
for alias in meta.aliases:
|
||||
c_ = copy(c)
|
||||
c_.es_index = alias
|
||||
self._upsert_column(c_)
|
||||
self._upsert_column(c)
|
||||
|
||||
abs_columns = elasticsearch.parse_properties(abs_index, None, properties.properties)
|
||||
self.abs_columns.update(abs_columns)
|
||||
def _parse_properties(self, alias, mapping, meta):
|
||||
abs_columns = elasticsearch.parse_properties(alias, None, mapping.properties)
|
||||
with Timer("upserting {{num}} columns", {"num": len(abs_columns)}, debug=DEBUG):
|
||||
# LIST OF EVERY NESTED PATH
|
||||
query_paths = [[c.es_column] for c in abs_columns if c.type == "nested"]
|
||||
query_paths = [[c.es_column] for c in abs_columns if c.es_type == "nested"]
|
||||
for a, b in itertools.product(query_paths, query_paths):
|
||||
aa = a[0]
|
||||
bb = b[0]
|
||||
|
@ -166,15 +153,17 @@ class FromESMetadata(Schema):
|
|||
b.insert(i, aa)
|
||||
break
|
||||
for q in query_paths:
|
||||
q.append(".")
|
||||
query_paths.append(SELF_PATH)
|
||||
q.append(SELF_PATH)
|
||||
query_paths.append(ROOT_PATH)
|
||||
self.alias_to_query_paths[alias] = query_paths
|
||||
|
||||
# ADD RELATIVE COLUMNS
|
||||
# ADD RELATIVE NAMES
|
||||
for abs_column in abs_columns:
|
||||
abs_column = abs_column.__copy__()
|
||||
abs_column.type = es_type_to_json_type[abs_column.type]
|
||||
abs_column.last_updated = None
|
||||
abs_column.jx_type = es_type_to_json_type[abs_column.es_type]
|
||||
for query_path in query_paths:
|
||||
add_column(abs_column, query_path)
|
||||
abs_column.names[query_path[0]] = relative_field(abs_column.names["."], query_path[0])
|
||||
self.todo.add(self.meta.columns.add(abs_column))
|
||||
pass
|
||||
|
||||
def query(self, _query):
|
||||
|
@ -191,43 +180,62 @@ class FromESMetadata(Schema):
|
|||
RETURN METADATA COLUMNS
|
||||
"""
|
||||
table_path = split_field(table_name)
|
||||
es_index_name = table_path[0]
|
||||
query_path = join_field(table_path[1:])
|
||||
table = self.get_table(es_index_name)[0]
|
||||
abs_column_name = None if column_name == None else concat_field(query_path, column_name)
|
||||
root_table_name = table_path[0]
|
||||
|
||||
# FIND ALIAS
|
||||
if root_table_name in self.alias_new_since:
|
||||
alias = root_table_name
|
||||
else:
|
||||
alias = self.index_to_alias[root_table_name]
|
||||
|
||||
if not alias:
|
||||
self.es_cluster.get_metadata(force=True)
|
||||
# ENSURE INDEX -> ALIAS IS IN A MAPPING FOR LATER
|
||||
for a in self.es_cluster.get_aliases():
|
||||
self.alias_new_since[a.alias] = MAX([self.es_cluster.index_new_since[a.index], self.alias_new_since.get(a.alias)])
|
||||
self.index_to_alias[a.index] = coalesce(a.alias, a.index)
|
||||
|
||||
if root_table_name in self.alias_new_since:
|
||||
alias = root_table_name
|
||||
else:
|
||||
alias = self.index_to_alias[root_table_name]
|
||||
|
||||
if not alias:
|
||||
Log.error("{{table|quote}} does not exist", table=table_name)
|
||||
|
||||
now = Date.now()
|
||||
table = self.get_table(alias)[0]
|
||||
|
||||
try:
|
||||
# LAST TIME WE GOT INFO FOR THIS TABLE
|
||||
if not table:
|
||||
table = Table(
|
||||
name=es_index_name,
|
||||
table = TableDesc(
|
||||
name=alias,
|
||||
url=None,
|
||||
query_path=['.'],
|
||||
timestamp=Date.now()
|
||||
)
|
||||
with self.meta.tables.locker:
|
||||
self.meta.tables.add(table)
|
||||
self._get_columns(table=es_index_name)
|
||||
elif force or table.timestamp == None or table.timestamp < Date.now() - MAX_COLUMN_METADATA_AGE:
|
||||
table.timestamp = Date.now()
|
||||
self._get_columns(table=es_index_name)
|
||||
self._reload_columns(alias=alias)
|
||||
elif force or table.timestamp < now - MAX_COLUMN_METADATA_AGE:
|
||||
table.timestamp = now
|
||||
self._reload_columns(alias=alias)
|
||||
|
||||
with self.meta.columns.locker:
|
||||
columns = self.meta.columns.find(es_index_name, column_name)
|
||||
if columns:
|
||||
columns = jx.sort(columns, "names.\\.")
|
||||
columns = self.meta.columns.find(alias, column_name)
|
||||
columns = jx.sort(columns, "names.\.")
|
||||
# AT LEAST WAIT FOR THE COLUMNS TO UPDATE
|
||||
while len(self.todo) and not all(columns.get("last_updated")):
|
||||
if DEBUG:
|
||||
if len(columns) > 10:
|
||||
Log.note("waiting for {{num}} columns to update", num=len([c for c in columns if not c.last_updated]))
|
||||
else:
|
||||
Log.note("waiting for columns to update {{columns|json}}", columns=[c.es_index+"."+c.es_column for c in columns if not c.last_updated])
|
||||
Till(seconds=1).wait()
|
||||
return columns
|
||||
except Exception as e:
|
||||
Log.error("Not expected", cause=e)
|
||||
|
||||
if abs_column_name:
|
||||
Log.error("no columns matching {{table}}.{{column}}", table=table_name, column=abs_column_name)
|
||||
|
||||
return []
|
||||
|
||||
def _update_cardinality(self, column):
|
||||
|
@ -237,11 +245,10 @@ class FromESMetadata(Schema):
|
|||
if column.es_index in self.index_does_not_exist:
|
||||
return
|
||||
|
||||
if column.type in STRUCT:
|
||||
if column.jx_type in STRUCT:
|
||||
Log.error("not supported")
|
||||
try:
|
||||
if column.es_index == "meta.columns":
|
||||
with self.meta.columns.locker:
|
||||
partitions = jx.sort([g[column.es_column] for g, _ in jx.groupby(self.meta.columns, column.es_column) if g[column.es_column] != None])
|
||||
self.meta.columns.update({
|
||||
"set": {
|
||||
|
@ -255,7 +262,6 @@ class FromESMetadata(Schema):
|
|||
})
|
||||
return
|
||||
if column.es_index == "meta.tables":
|
||||
with self.meta.columns.locker:
|
||||
partitions = jx.sort([g[column.es_column] for g, _ in jx.groupby(self.meta.tables, column.es_column) if g[column.es_column] != None])
|
||||
self.meta.columns.update({
|
||||
"set": {
|
||||
|
@ -271,10 +277,10 @@ class FromESMetadata(Schema):
|
|||
|
||||
es_index = column.es_index.split(".")[0]
|
||||
|
||||
is_text = [cc for cc in self.abs_columns if cc.es_column == column.es_column and cc.type == "text"]
|
||||
is_text = [cc for cc in self.meta.columns if cc.es_column == column.es_column and cc.es_type == "text"]
|
||||
if is_text:
|
||||
# text IS A MULTIVALUE STRING THAT CAN ONLY BE FILTERED
|
||||
result = self.default_es.post("/" + es_index + "/_search", data={
|
||||
result = self.es_cluster.post("/" + es_index + "/_search", data={
|
||||
"aggs": {
|
||||
"count": {"filter": {"match_all": {}}}
|
||||
},
|
||||
|
@ -284,14 +290,24 @@ class FromESMetadata(Schema):
|
|||
cardinality = 1001
|
||||
multi = 1001
|
||||
elif column.es_column == "_id":
|
||||
result = self.default_es.post("/" + es_index + "/_search", data={
|
||||
result = self.es_cluster.post("/" + es_index + "/_search", data={
|
||||
"query": {"match_all": {}},
|
||||
"size": 0
|
||||
})
|
||||
count = cardinality = result.hits.total
|
||||
multi = 1
|
||||
elif column.es_type == BOOLEAN:
|
||||
result = self.es_cluster.post("/" + es_index + "/_search", data={
|
||||
"aggs": {
|
||||
"count": _counting_query(column)
|
||||
},
|
||||
"size": 0
|
||||
})
|
||||
count = result.hits.total
|
||||
cardinality = 2
|
||||
multi = 1
|
||||
else:
|
||||
result = self.default_es.post("/" + es_index + "/_search", data={
|
||||
result = self.es_cluster.post("/" + es_index + "/_search", data={
|
||||
"aggs": {
|
||||
"count": _counting_query(column),
|
||||
"multi": {"max": {"script": "doc[" + quote(column.es_column) + "].values.size()"}}
|
||||
|
@ -308,7 +324,6 @@ class FromESMetadata(Schema):
|
|||
query = Data(size=0)
|
||||
|
||||
if column.es_column == "_id":
|
||||
with self.meta.columns.locker:
|
||||
self.meta.columns.update({
|
||||
"set": {
|
||||
"count": cardinality,
|
||||
|
@ -323,7 +338,6 @@ class FromESMetadata(Schema):
|
|||
elif cardinality > 1000 or (count >= 30 and cardinality == count) or (count >= 1000 and cardinality / count > 0.99):
|
||||
if DEBUG:
|
||||
Log.note("{{table}}.{{field}} has {{num}} parts", table=column.es_index, field=column.es_column, num=cardinality)
|
||||
with self.meta.columns.locker:
|
||||
self.meta.columns.update({
|
||||
"set": {
|
||||
"count": count,
|
||||
|
@ -335,10 +349,9 @@ class FromESMetadata(Schema):
|
|||
"where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}}
|
||||
})
|
||||
return
|
||||
elif column.type in elasticsearch.ES_NUMERIC_TYPES and cardinality > 30:
|
||||
elif column.es_type in elasticsearch.ES_NUMERIC_TYPES and cardinality > 30:
|
||||
if DEBUG:
|
||||
Log.note("{{field}} has {{num}} parts", field=column.es_index, num=cardinality)
|
||||
with self.meta.columns.locker:
|
||||
self.meta.columns.update({
|
||||
"set": {
|
||||
"count": count,
|
||||
|
@ -360,7 +373,7 @@ class FromESMetadata(Schema):
|
|||
else:
|
||||
query.aggs["_"] = {"terms": {"field": column.es_column, "size": cardinality}}
|
||||
|
||||
result = self.default_es.post("/" + es_index + "/_search", data=query)
|
||||
result = self.es_cluster.post("/" + es_index + "/_search", data=query)
|
||||
|
||||
aggs = result.aggregations._
|
||||
if aggs._nested:
|
||||
|
@ -368,9 +381,6 @@ class FromESMetadata(Schema):
|
|||
else:
|
||||
parts = jx.sort(aggs.buckets.key)
|
||||
|
||||
if DEBUG:
|
||||
Log.note("{{field}} has {{parts}}", field=column.names["."], parts=parts)
|
||||
with self.meta.columns.locker:
|
||||
self.meta.columns.update({
|
||||
"set": {
|
||||
"count": count,
|
||||
|
@ -389,7 +399,6 @@ class FromESMetadata(Schema):
|
|||
is_test_table = any(column.es_index.startswith(t) for t in [TEST_TABLE_PREFIX, TEST_TABLE])
|
||||
if is_missing_index and is_test_table:
|
||||
# WE EXPECT TEST TABLES TO DISAPPEAR
|
||||
with self.meta.columns.locker:
|
||||
self.meta.columns.update({
|
||||
"clear": ".",
|
||||
"where": {"eq": {"es_index": column.es_index}}
|
||||
|
@ -415,11 +424,10 @@ class FromESMetadata(Schema):
|
|||
while not please_stop:
|
||||
try:
|
||||
if not self.todo:
|
||||
with self.meta.columns.locker:
|
||||
old_columns = [
|
||||
c
|
||||
for c in self.meta.columns
|
||||
if (c.last_updated == None or c.last_updated < Date.now()-TOO_OLD) and c.type not in STRUCT
|
||||
if (c.last_updated == None or c.last_updated < Date.now()-TOO_OLD) and c.jx_type not in STRUCT
|
||||
]
|
||||
if old_columns:
|
||||
if DEBUG:
|
||||
|
@ -438,18 +446,19 @@ class FromESMetadata(Schema):
|
|||
Log.note("no more metatdata to update")
|
||||
|
||||
column = self.todo.pop(Till(seconds=(10*MINUTE).seconds))
|
||||
if column:
|
||||
if column is THREAD_STOP:
|
||||
continue
|
||||
|
||||
if DEBUG:
|
||||
Log.note("update {{table}}.{{column}}", table=column.es_index, column=column.es_column)
|
||||
if column:
|
||||
if column.es_index in self.index_does_not_exist:
|
||||
with self.meta.columns.locker:
|
||||
self.meta.columns.update({
|
||||
"clear": ".",
|
||||
"where": {"eq": {"es_index": column.es_index}}
|
||||
})
|
||||
continue
|
||||
if column.type in STRUCT or column.es_column.endswith("." + EXISTS_TYPE):
|
||||
with self.meta.columns.locker:
|
||||
if column.jx_type in STRUCT or column.es_column.endswith("." + EXISTS_TYPE):
|
||||
column.last_updated = Date.now()
|
||||
continue
|
||||
elif column.last_updated >= Date.now()-TOO_OLD:
|
||||
|
@ -471,10 +480,9 @@ class FromESMetadata(Schema):
|
|||
if c == THREAD_STOP:
|
||||
break
|
||||
|
||||
if not c.last_updated or c.last_updated >= Date.now()-TOO_OLD:
|
||||
if c.last_updated >= Date.now()-TOO_OLD:
|
||||
continue
|
||||
|
||||
with self.meta.columns.locker:
|
||||
self.meta.columns.update({
|
||||
"set": {
|
||||
"last_updated": Date.now()
|
||||
|
@ -488,7 +496,143 @@ class FromESMetadata(Schema):
|
|||
"where": {"eq": {"es_index": c.es_index, "es_column": c.es_column}}
|
||||
})
|
||||
if DEBUG:
|
||||
Log.note("Could not get {{col.es_index}}.{{col.es_column}} info", col=c)
|
||||
Log.note("Did not get {{col.es_index}}.{{col.es_column}} info", col=c)
|
||||
|
||||
def get_table(self, alias_name):
|
||||
with self.meta.tables.locker:
|
||||
return wrap([t for t in self.meta.tables.data if t.name == alias_name])
|
||||
|
||||
def get_snowflake(self, fact_table_name):
|
||||
return Snowflake(fact_table_name, self)
|
||||
|
||||
def get_schema(self, name):
|
||||
if name == "meta.columns":
|
||||
return self.meta.columns.schema
|
||||
query_path = split_field(name)
|
||||
return self.get_snowflake(query_path[0]).get_schema(join_field(query_path[1:]))
|
||||
|
||||
|
||||
class Snowflake(object):
|
||||
"""
|
||||
REPRESENT ONE ALIAS, AND ITS NESTED ARRAYS
|
||||
"""
|
||||
|
||||
def __init__(self, alias, namespace):
|
||||
self.alias = alias
|
||||
self.namespace = namespace
|
||||
|
||||
|
||||
def get_schema(self, query_path):
|
||||
return Schema(query_path, self)
|
||||
|
||||
@property
|
||||
def query_paths(self):
|
||||
"""
|
||||
RETURN A LIST OF ALL NESTED COLUMNS
|
||||
"""
|
||||
return self.namespace.alias_to_query_paths[self.alias]
|
||||
|
||||
@property
|
||||
def columns(self):
|
||||
"""
|
||||
RETURN ALL COLUMNS FROM ORIGIN OF FACT TABLE
|
||||
"""
|
||||
return self.namespace.get_columns(literal_field(self.alias))
|
||||
|
||||
|
||||
class Schema(jx_base.Schema):
|
||||
"""
|
||||
REPRESENT JUST ONE TABLE IN A SNOWFLAKE
|
||||
"""
|
||||
|
||||
def __init__(self, query_path, snowflake):
|
||||
if not isinstance(snowflake.query_paths[0], list):
|
||||
Log.error("Snowflake query paths should be a list of string tuples (well, technically, a list of lists of strings)")
|
||||
self.query_path = [
|
||||
p
|
||||
for p in snowflake.query_paths
|
||||
if untype_path(p[0]) == query_path
|
||||
][0]
|
||||
self.snowflake = snowflake
|
||||
|
||||
def leaves(self, column_name):
|
||||
"""
|
||||
:param column_name:
|
||||
:return: ALL COLUMNS THAT START WITH column_name, NOT INCLUDING DEEPER NESTED COLUMNS
|
||||
"""
|
||||
column_name = unnest_path(column_name)
|
||||
columns = self.columns
|
||||
deep_path = self.query_path[0]
|
||||
for path in self.query_path:
|
||||
output = [
|
||||
c
|
||||
for c in columns
|
||||
if (
|
||||
(c.names['.'] != "_id" or column_name == "_id") and
|
||||
c.jx_type not in OBJECTS and
|
||||
startswith_field(unnest_path(c.names[path]), column_name)
|
||||
)
|
||||
]
|
||||
if output:
|
||||
return output
|
||||
return []
|
||||
|
||||
def values(self, column_name):
|
||||
"""
|
||||
RETURN ALL COLUMNS THAT column_name REFERES TO
|
||||
"""
|
||||
column_name = unnest_path(column_name)
|
||||
columns = self.columns
|
||||
deep_path = self.query_path[0]
|
||||
for path in self.query_path:
|
||||
output = [
|
||||
c
|
||||
for c in columns
|
||||
if (
|
||||
c.jx_type not in STRUCT and
|
||||
untype_path(c.names[path]) == column_name
|
||||
)
|
||||
]
|
||||
if output:
|
||||
return output
|
||||
return output
|
||||
|
||||
def __getitem__(self, column_name):
|
||||
return self.values(column_name)
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return concat_field(self.snowflake.alias, self.query_path[0])
|
||||
|
||||
@property
|
||||
def columns(self):
|
||||
return self.snowflake.namespace.get_columns(literal_field(self.snowflake.alias))
|
||||
|
||||
def map_to_es(self):
|
||||
"""
|
||||
RETURN A MAP FROM THE NAMESPACE TO THE es_column NAME
|
||||
"""
|
||||
output = {}
|
||||
for path in self.query_path:
|
||||
set_default(
|
||||
output,
|
||||
{
|
||||
k: c.es_column
|
||||
for c in self.snowflake.columns
|
||||
if c.jx_type not in STRUCT
|
||||
for rel_name in [c.names[path]]
|
||||
for k in [rel_name, untype_path(rel_name), unnest_path(rel_name)]
|
||||
}
|
||||
)
|
||||
return output
|
||||
|
||||
|
||||
class Table(jx_base.Table):
|
||||
|
||||
def __init__(self, full_name, container):
|
||||
jx_base.Table.__init__(self, full_name)
|
||||
self.container=container
|
||||
self.schema = container.namespace.get_schema(full_name)
|
||||
|
||||
|
||||
def _counting_query(c):
|
||||
|
@ -502,7 +646,7 @@ def _counting_query(c):
|
|||
"aggs": {
|
||||
"_nested": {"cardinality": {
|
||||
"field": c.es_column,
|
||||
"precision_threshold": 10 if c.type in elasticsearch.ES_NUMERIC_TYPES else 100
|
||||
"precision_threshold": 10 if c.es_type in elasticsearch.ES_NUMERIC_TYPES else 100
|
||||
}}
|
||||
}
|
||||
}
|
||||
|
@ -512,59 +656,6 @@ def _counting_query(c):
|
|||
}}
|
||||
|
||||
|
||||
def metadata_columns():
|
||||
return wrap(
|
||||
[
|
||||
Column(
|
||||
names={".":c},
|
||||
es_index="meta.columns",
|
||||
es_column=c,
|
||||
type="string",
|
||||
nested_path=ROOT_PATH
|
||||
)
|
||||
for c in [
|
||||
"type",
|
||||
"nested_path",
|
||||
"es_column",
|
||||
"es_index"
|
||||
]
|
||||
] + [
|
||||
Column(
|
||||
es_index="meta.columns",
|
||||
names={".":c},
|
||||
es_column=c,
|
||||
type="object",
|
||||
nested_path=ROOT_PATH
|
||||
)
|
||||
for c in [
|
||||
"names",
|
||||
"domain",
|
||||
"partitions"
|
||||
]
|
||||
] + [
|
||||
Column(
|
||||
names={".": c},
|
||||
es_index="meta.columns",
|
||||
es_column=c,
|
||||
type="long",
|
||||
nested_path=ROOT_PATH
|
||||
)
|
||||
for c in [
|
||||
"count",
|
||||
"cardinality"
|
||||
]
|
||||
] + [
|
||||
Column(
|
||||
names={".": "last_updated"},
|
||||
es_index="meta.columns",
|
||||
es_column="last_updated",
|
||||
type="time",
|
||||
nested_path=ROOT_PATH
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def metadata_tables():
|
||||
return wrap(
|
||||
[
|
||||
|
@ -572,7 +663,7 @@ def metadata_tables():
|
|||
names={".": c},
|
||||
es_index="meta.tables",
|
||||
es_column=c,
|
||||
type="string",
|
||||
es_type="string",
|
||||
nested_path=ROOT_PATH
|
||||
)
|
||||
for c in [
|
||||
|
@ -582,29 +673,17 @@ def metadata_tables():
|
|||
]
|
||||
]+[
|
||||
Column(
|
||||
names={".": "timestamp"},
|
||||
names={".": c},
|
||||
es_index="meta.tables",
|
||||
es_column="timestamp",
|
||||
type="integer",
|
||||
es_column=c,
|
||||
es_type="integer",
|
||||
nested_path=ROOT_PATH
|
||||
)
|
||||
for c in [
|
||||
"timestamp"
|
||||
]
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def init_database(sql):
|
||||
|
||||
|
||||
|
||||
sql.execute("""
|
||||
CREATE TABLE tables AS (
|
||||
table_name VARCHAR(200),
|
||||
alias CHAR
|
||||
|
||||
)
|
||||
|
||||
|
||||
""")
|
||||
|
||||
|
||||
|
||||
OBJECTS = (jx_base.OBJECT, jx_base.EXISTS)
|
||||
|
|
|
@ -39,18 +39,18 @@ def _delayed_imports():
|
|||
MySQL = None
|
||||
|
||||
try:
|
||||
from jx_elasticsearch.meta import FromESMetadata
|
||||
from jx_elasticsearch.meta import ElasticsearchMetadata
|
||||
except Exception:
|
||||
FromESMetadata = None
|
||||
ElasticsearchSnowflake = None
|
||||
|
||||
set_default(container.type2container, {
|
||||
"mysql": MySQL,
|
||||
"memory": None,
|
||||
"meta": FromESMetadata
|
||||
"meta": ElasticsearchMetadata
|
||||
})
|
||||
|
||||
|
||||
def wrap_from(frum, schema=None):
|
||||
def find_container(frum, schema=None):
|
||||
"""
|
||||
:param frum:
|
||||
:param schema:
|
||||
|
@ -66,7 +66,6 @@ def wrap_from(frum, schema=None):
|
|||
Log.error("expecting jx_base.container.config.default.settings to contain default elasticsearch connection info")
|
||||
|
||||
type_ = None
|
||||
index = frum
|
||||
if frum.startswith("meta."):
|
||||
if frum == "meta.columns":
|
||||
return _meta.singlton.meta.columns.denormalized()
|
||||
|
@ -74,13 +73,13 @@ def wrap_from(frum, schema=None):
|
|||
return _meta.singlton.meta.tables
|
||||
else:
|
||||
Log.error("{{name}} not a recognized table", name=frum)
|
||||
else:
|
||||
|
||||
type_ = container.config.default.type
|
||||
index = split_field(frum)[0]
|
||||
fact_table_name = split_field(frum)[0]
|
||||
|
||||
settings = set_default(
|
||||
{
|
||||
"index": index,
|
||||
"index": fact_table_name,
|
||||
"name": frum,
|
||||
"exists": True,
|
||||
},
|
||||
|
@ -95,7 +94,7 @@ def wrap_from(frum, schema=None):
|
|||
return container.type2container[frum.type](frum.settings)
|
||||
elif isinstance(frum, Mapping) and (frum["from"] or isinstance(frum["from"], (list, set))):
|
||||
from jx_base.query import QueryOp
|
||||
return QueryOp.wrap(frum, schema=schema)
|
||||
return QueryOp.wrap(frum, namespace=schema)
|
||||
elif isinstance(frum, (list, set)):
|
||||
return _ListContainer("test_list", frum)
|
||||
else:
|
||||
|
|
|
@ -14,6 +14,15 @@ from __future__ import unicode_literals
|
|||
import itertools
|
||||
from collections import Mapping
|
||||
|
||||
from mo_math import UNION
|
||||
|
||||
import jx_base
|
||||
from jx_base import Container
|
||||
from jx_base.expressions import jx_expression, Expression, Variable, TRUE
|
||||
from jx_python.expression_compiler import compile_expression
|
||||
from jx_python.expressions import jx_expression_to_function
|
||||
from jx_python.lists.aggs import is_aggs, list_aggs
|
||||
from jx_python.meta import get_schema_from_list
|
||||
from mo_collections import UniqueIndex
|
||||
from mo_dots import Data, wrap, listwrap, unwraplist, unwrap, Null
|
||||
from mo_future import sort_using_key
|
||||
|
@ -21,21 +30,17 @@ from mo_logs import Log
|
|||
from mo_threads import Lock
|
||||
from pyLibrary import convert
|
||||
|
||||
from jx_base.expressions import jx_expression, Expression, TrueOp, Variable, TRUE
|
||||
from jx_python.expressions import jx_expression_to_function
|
||||
from jx_base.container import Container
|
||||
from jx_python.expression_compiler import compile_expression
|
||||
from jx_python.lists.aggs import is_aggs, list_aggs
|
||||
from jx_python.meta import get_schema_from_list
|
||||
|
||||
_get = object.__getattribute__
|
||||
|
||||
|
||||
class ListContainer(Container):
|
||||
class ListContainer(Container, jx_base.Namespace, jx_base.Table):
|
||||
"""
|
||||
A CONTAINER WITH ONLY ONE TABLE
|
||||
"""
|
||||
def __init__(self, name, data, schema=None):
|
||||
# TODO: STORE THIS LIKE A CUBE FOR FASTER ACCESS AND TRANSFORMATION
|
||||
data = list(unwrap(data))
|
||||
Container.__init__(self, data, schema)
|
||||
Container.__init__(self)
|
||||
if schema == None:
|
||||
self._schema = get_schema_from_list(name, data)
|
||||
else:
|
||||
|
@ -52,6 +57,10 @@ class ListContainer(Container):
|
|||
def schema(self):
|
||||
return self._schema
|
||||
|
||||
@property
|
||||
def namespace(self):
|
||||
return self
|
||||
|
||||
def last(self):
|
||||
"""
|
||||
:return: Last element in the list, or Null
|
||||
|
@ -91,7 +100,7 @@ class ListContainer(Container):
|
|||
elif q.format == "table":
|
||||
head = [c.names['.'] for c in output.schema.columns]
|
||||
data = [
|
||||
[r[h] for h in head]
|
||||
[r if h == '.' else r[h] for h in head]
|
||||
for r in output.data
|
||||
]
|
||||
return Data(header=head, data=data, meta={"format": "table"})
|
||||
|
@ -170,6 +179,13 @@ class ListContainer(Container):
|
|||
new_schema = None
|
||||
|
||||
if isinstance(select, list):
|
||||
if all(
|
||||
isinstance(s.value, Variable) and s.name == s.value.var
|
||||
for s in select
|
||||
):
|
||||
names = set(s.value.var for s in select)
|
||||
new_schema = Schema(".", [c for c in self.schema.columns if c.names['.'] in names])
|
||||
|
||||
push_and_pull = [(s.name, jx_expression_to_function(s.value)) for s in selects]
|
||||
def selector(d):
|
||||
output = Data()
|
||||
|
@ -250,6 +266,23 @@ class ListContainer(Container):
|
|||
def __len__(self):
|
||||
return len(self.data)
|
||||
|
||||
# class Namespace(jx_base.Namespace):
|
||||
|
||||
def get_snowflake(self, name):
|
||||
if self.name != name:
|
||||
Log.error("This container only has table by name of {{name}}", name=name)
|
||||
return self
|
||||
|
||||
def get_schema(self, name):
|
||||
if self.name != name:
|
||||
Log.error("This container only has table by name of {{name}}", name=name)
|
||||
return self.schema
|
||||
|
||||
def get_table(self, name):
|
||||
if self.name != name:
|
||||
Log.error("This container only has table by name of {{name}}", name=name)
|
||||
return self
|
||||
|
||||
|
||||
def _exec(code):
|
||||
try:
|
||||
|
@ -261,6 +294,7 @@ def _exec(code):
|
|||
|
||||
|
||||
|
||||
|
||||
from jx_base.schema import Schema
|
||||
from jx_python import jx
|
||||
|
||||
|
|
|
@ -60,64 +60,64 @@ def get(expr):
|
|||
return jx_expression_to_function(expr)
|
||||
|
||||
|
||||
def run(query, frum=Null):
|
||||
def run(query, container=Null):
|
||||
"""
|
||||
THIS FUNCTION IS SIMPLY SWITCHING BASED ON THE query["from"] CONTAINER,
|
||||
BUT IT IS ALSO PROCESSING A list CONTAINER; SEPARATE TO A ListContainer
|
||||
"""
|
||||
if frum == None:
|
||||
frum = wrap(query)['from']
|
||||
query_op = QueryOp.wrap(query, table=frum, schema=frum.schema)
|
||||
if container == None:
|
||||
container = wrap(query)['from'].container
|
||||
query_op = QueryOp.wrap(query, container=container, namespace=container.schema)
|
||||
else:
|
||||
query_op = QueryOp.wrap(query, frum, frum.schema)
|
||||
query_op = QueryOp.wrap(query, container, container.namespace)
|
||||
|
||||
if frum == None:
|
||||
if container == None:
|
||||
from jx_python.containers.list_usingPythonList import DUAL
|
||||
return DUAL.query(query_op)
|
||||
elif isinstance(frum, Container):
|
||||
return frum.query(query_op)
|
||||
elif isinstance(frum, (list, set) + generator_types):
|
||||
frum = wrap(list(frum))
|
||||
elif isinstance(frum, Cube):
|
||||
elif isinstance(container, Container):
|
||||
return container.query(query_op)
|
||||
elif isinstance(container, (list, set) + generator_types):
|
||||
container = wrap(list(container))
|
||||
elif isinstance(container, Cube):
|
||||
if is_aggs(query_op):
|
||||
return cube_aggs(frum, query_op)
|
||||
elif isinstance(frum, QueryOp):
|
||||
frum = run(frum)
|
||||
return cube_aggs(container, query_op)
|
||||
elif isinstance(container, QueryOp):
|
||||
container = run(container)
|
||||
else:
|
||||
Log.error("Do not know how to handle {{type}}", type=frum.__class__.__name__)
|
||||
Log.error("Do not know how to handle {{type}}", type=container.__class__.__name__)
|
||||
|
||||
if is_aggs(query_op):
|
||||
frum = list_aggs(frum, query_op)
|
||||
container = list_aggs(container, query_op)
|
||||
else: # SETOP
|
||||
if query_op.where is not TRUE:
|
||||
frum = filter(frum, query_op.where)
|
||||
container = filter(container, query_op.where)
|
||||
|
||||
if query_op.sort:
|
||||
frum = sort(frum, query_op.sort, already_normalized=True)
|
||||
container = sort(container, query_op.sort, already_normalized=True)
|
||||
|
||||
if query_op.select:
|
||||
frum = select(frum, query_op.select)
|
||||
container = select(container, query_op.select)
|
||||
|
||||
if query_op.window:
|
||||
if isinstance(frum, Cube):
|
||||
frum = list(frum.values())
|
||||
if isinstance(container, Cube):
|
||||
container = list(container.values())
|
||||
|
||||
for param in query_op.window:
|
||||
window(frum, param)
|
||||
window(container, param)
|
||||
|
||||
# AT THIS POINT frum IS IN LIST FORMAT, NOW PACKAGE RESULT
|
||||
if query_op.format == "cube":
|
||||
frum = convert.list2cube(frum)
|
||||
container = convert.list2cube(container)
|
||||
elif query_op.format == "table":
|
||||
frum = convert.list2table(frum)
|
||||
frum.meta.format = "table"
|
||||
container = convert.list2table(container)
|
||||
container.meta.format = "table"
|
||||
else:
|
||||
frum = wrap({
|
||||
container = wrap({
|
||||
"meta": {"format": "list"},
|
||||
"data": frum
|
||||
"data": container
|
||||
})
|
||||
|
||||
return frum
|
||||
return container
|
||||
|
||||
|
||||
groupby = group_by.groupby
|
||||
|
|
|
@ -11,19 +11,18 @@ from __future__ import absolute_import
|
|||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from collections import Mapping
|
||||
from datetime import date
|
||||
from datetime import datetime
|
||||
|
||||
from jx_base import STRUCT, Column
|
||||
from jx_base.container import Container
|
||||
import jx_base
|
||||
from jx_base import python_type_to_json_type
|
||||
from jx_base import STRUCT, Column, Table
|
||||
from jx_base.schema import Schema
|
||||
from jx_python import jx
|
||||
from mo_collections import UniqueIndex
|
||||
from mo_dots import Data, concat_field, get_attr, listwrap, unwraplist, NullType, FlatList
|
||||
from mo_dots import split_field, join_field, ROOT_PATH
|
||||
from mo_dots import wrap
|
||||
from mo_future import none_type
|
||||
from mo_future import text_type, long, PY2
|
||||
from mo_dots import Data, concat_field, get_attr, listwrap, unwraplist, NullType, FlatList, set_default, split_field, join_field, ROOT_PATH, wrap
|
||||
from mo_future import none_type, text_type, long, PY2
|
||||
from mo_json.typed_encoder import untype_path, unnest_path
|
||||
from mo_logs import Log
|
||||
from mo_threads import Lock
|
||||
|
@ -32,48 +31,101 @@ from mo_times.dates import Date
|
|||
singlton = None
|
||||
|
||||
|
||||
class ColumnList(Container):
|
||||
class ColumnList(Table):
|
||||
"""
|
||||
OPTIMIZED FOR THE PARTICULAR ACCESS PATTERNS USED
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
Table.__init__(self, "meta.columns")
|
||||
self.data = {} # MAP FROM ES_INDEX TO (abs_column_name to COLUMNS)
|
||||
self.locker = Lock()
|
||||
self.count = 0
|
||||
self.meta_schema = None
|
||||
self._schema = None
|
||||
self.extend(METADATA_COLUMNS)
|
||||
|
||||
def find(self, es_index, abs_column_name):
|
||||
if "." in es_index and not es_index.startswith("meta."):
|
||||
Log.error("unlikely index name")
|
||||
with self.locker:
|
||||
if es_index.startswith("meta."):
|
||||
self._update_meta()
|
||||
|
||||
if not abs_column_name:
|
||||
return [c for cs in self.data.get(es_index, {}).values() for c in cs]
|
||||
else:
|
||||
return self.data.get(es_index, {}).get(abs_column_name, [])
|
||||
|
||||
def insert(self, columns):
|
||||
def extend(self, columns):
|
||||
self.dirty = True
|
||||
with self.locker:
|
||||
for column in columns:
|
||||
self.add(column)
|
||||
self._add(column)
|
||||
|
||||
def add(self, column):
|
||||
self.dirty = True
|
||||
with self.locker:
|
||||
return self._add(column)
|
||||
|
||||
def _add(self, column):
|
||||
columns_for_table = self.data.setdefault(column.es_index, {})
|
||||
abs_cname = column.names["."]
|
||||
_columns = columns_for_table.get(abs_cname)
|
||||
if not _columns:
|
||||
_columns = columns_for_table[abs_cname] = []
|
||||
_columns.append(column)
|
||||
self.count += 1
|
||||
existing_columns = columns_for_table.setdefault(column.names["."], [])
|
||||
|
||||
for canonical in existing_columns:
|
||||
if canonical is column:
|
||||
return canonical
|
||||
if canonical.es_type == column.es_type:
|
||||
set_default(column.names, canonical.names)
|
||||
for key in Column.__slots__:
|
||||
canonical[key] = column[key]
|
||||
return canonical
|
||||
existing_columns.append(column)
|
||||
return column
|
||||
|
||||
def _update_meta(self):
|
||||
if not self.dirty:
|
||||
return
|
||||
|
||||
for mcl in self.data.get("meta.columns").values():
|
||||
for mc in mcl:
|
||||
count = 0
|
||||
values = set()
|
||||
objects = 0
|
||||
multi = 1
|
||||
for t, cs in self.data.items():
|
||||
for c, css in cs.items():
|
||||
for column in css:
|
||||
value = column[mc.names["."]]
|
||||
if value == None:
|
||||
pass
|
||||
else:
|
||||
count += 1
|
||||
if isinstance(value, list):
|
||||
multi = max(multi, len(value))
|
||||
try:
|
||||
values |= set(value)
|
||||
except Exception:
|
||||
objects += len(value)
|
||||
elif isinstance(value, Mapping):
|
||||
objects += 1
|
||||
else:
|
||||
values.add(value)
|
||||
mc.count = count
|
||||
mc.cardinality = len(values) + objects
|
||||
mc.partitions = jx.sort(values)
|
||||
mc.multi = multi
|
||||
mc.last_updated = Date.now()
|
||||
self.dirty = False
|
||||
|
||||
def __iter__(self):
|
||||
self._update_meta()
|
||||
for t, cs in self.data.items():
|
||||
for c, css in cs.items():
|
||||
for column in css:
|
||||
yield column
|
||||
|
||||
def __len__(self):
|
||||
return self.count
|
||||
return self.data['meta.columns']['es_index'].count
|
||||
|
||||
def update(self, command):
|
||||
self.dirty = True
|
||||
try:
|
||||
command = wrap(command)
|
||||
eq = command.where.eq
|
||||
|
@ -81,9 +133,11 @@ class ColumnList(Container):
|
|||
columns = self.find(eq.es_index, eq.name)
|
||||
columns = [c for c in columns if all(get_attr(c, k) == v for k, v in eq.items())]
|
||||
else:
|
||||
with self.locker:
|
||||
columns = list(self)
|
||||
columns = jx.filter(columns, command.where)
|
||||
|
||||
with self.locker:
|
||||
for col in list(columns):
|
||||
for k in command["clear"]:
|
||||
if k == ".":
|
||||
|
@ -97,23 +151,37 @@ class ColumnList(Container):
|
|||
Log.error("should not happen", cause=e)
|
||||
|
||||
def query(self, query):
|
||||
with self.locker:
|
||||
self._update_meta()
|
||||
query.frum = self.__iter__()
|
||||
output = jx.run(query)
|
||||
|
||||
return output
|
||||
|
||||
def groupby(self, keys):
|
||||
with self.locker:
|
||||
self._update_meta()
|
||||
return jx.groupby(self.__iter__(), keys)
|
||||
|
||||
@property
|
||||
def schema(self):
|
||||
return wrap({k: set(v) for k, v in self.data["meta.columns"].items()})
|
||||
if not self._schema:
|
||||
with self.locker:
|
||||
self._update_meta()
|
||||
self._schema = Schema(".", [c for cs in self.data["meta.columns"].values() for c in cs])
|
||||
return self._schema
|
||||
|
||||
@property
|
||||
def namespace(self):
|
||||
return self
|
||||
|
||||
def denormalized(self):
|
||||
"""
|
||||
THE INTERNAL STRUCTURE FOR THE COLUMN METADATA IS VERY DIFFERENT FROM
|
||||
THE DENORMALIZED PERSPECITVE. THIS PROVIDES THAT PERSPECTIVE FOR QUERIES
|
||||
"""
|
||||
with self.locker:
|
||||
self._update_meta()
|
||||
output = [
|
||||
{
|
||||
"table": concat_field(c.es_index, untype_path(table)),
|
||||
|
@ -124,19 +192,25 @@ class ColumnList(Container):
|
|||
"last_updated": c.last_updated,
|
||||
"count": c.count,
|
||||
"nested_path": [unnest_path(n) for n in c.nested_path],
|
||||
"type": c.type
|
||||
"es_type": c.es_type,
|
||||
"type": c.jx_type
|
||||
}
|
||||
for tname, css in self.data.items()
|
||||
for cname, cs in css.items()
|
||||
for c in cs
|
||||
if c.type not in STRUCT # and c.es_column != "_id"
|
||||
if c.jx_type not in STRUCT # and c.es_column != "_id"
|
||||
for table, name in c.names.items()
|
||||
]
|
||||
if not self.meta_schema:
|
||||
self.meta_schema = get_schema_from_list("meta\\.columns", output)
|
||||
|
||||
from jx_python.containers.list_usingPythonList import ListContainer
|
||||
return ListContainer("meta\\.columns", data=output, schema=self.meta_schema)
|
||||
return ListContainer(
|
||||
self.name,
|
||||
data=output,
|
||||
schema=jx_base.Schema(
|
||||
"meta.columns",
|
||||
SIMPLE_METADATA_COLUMNS
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def get_schema_from_list(table_name, frum):
|
||||
|
@ -169,11 +243,13 @@ def _get_schema_from_list(frum, table_name, parent, nested_path, columns):
|
|||
names={table_name: full_name},
|
||||
es_column=full_name,
|
||||
es_index=".",
|
||||
type="undefined",
|
||||
jx_type=python_type_to_json_type[d.__class__],
|
||||
es_type=row_type,
|
||||
nested_path=nested_path
|
||||
)
|
||||
columns.add(column)
|
||||
column.type = _merge_type[column.type][row_type]
|
||||
column.es_type = _merge_type[column.es_type][row_type]
|
||||
column.jx_type = _merge_type[column.jx_type][row_type]
|
||||
else:
|
||||
for name, value in d.items():
|
||||
full_name = concat_field(parent, name)
|
||||
|
@ -183,7 +259,7 @@ def _get_schema_from_list(frum, table_name, parent, nested_path, columns):
|
|||
names={table_name: full_name},
|
||||
es_column=full_name,
|
||||
es_index=".",
|
||||
type="undefined",
|
||||
es_type="undefined",
|
||||
nested_path=nested_path
|
||||
)
|
||||
columns.add(column)
|
||||
|
@ -199,11 +275,8 @@ def _get_schema_from_list(frum, table_name, parent, nested_path, columns):
|
|||
this_type = "nested"
|
||||
else:
|
||||
this_type = _type_to_name[value.__class__]
|
||||
|
||||
new_type = _merge_type[column.type][this_type]
|
||||
if new_type == None:
|
||||
Log.error("can not combine {{type1}} with {{type2}} for column {{column}}", type1=column.type, type2=this_type, column=full_name)
|
||||
column.type = new_type
|
||||
new_type = _merge_type[column.es_type][this_type]
|
||||
column.es_type = new_type
|
||||
|
||||
if this_type == "object":
|
||||
_get_schema_from_list([value], table_name, full_name, nested_path, columns)
|
||||
|
@ -213,6 +286,76 @@ def _get_schema_from_list(frum, table_name, parent, nested_path, columns):
|
|||
_get_schema_from_list(value, table_name, full_name, newpath, columns)
|
||||
|
||||
|
||||
METADATA_COLUMNS = (
|
||||
[
|
||||
Column(
|
||||
names={".": c},
|
||||
es_index="meta.columns",
|
||||
es_column=c,
|
||||
es_type="string",
|
||||
nested_path=ROOT_PATH
|
||||
)
|
||||
for c in ["es_type", "jx_type", "nested_path", "es_column", "es_index"]
|
||||
] + [
|
||||
Column(
|
||||
es_index="meta.columns",
|
||||
names={".": c},
|
||||
es_column=c,
|
||||
es_type="object",
|
||||
nested_path=ROOT_PATH
|
||||
)
|
||||
for c in ["names", "partitions"]
|
||||
] + [
|
||||
Column(
|
||||
names={".": c},
|
||||
es_index="meta.columns",
|
||||
es_column=c,
|
||||
es_type="long",
|
||||
nested_path=ROOT_PATH
|
||||
)
|
||||
for c in ["count", "cardinality", "multi"]
|
||||
] + [
|
||||
Column(
|
||||
names={".": "last_updated"},
|
||||
es_index="meta.columns",
|
||||
es_column="last_updated",
|
||||
es_type="time",
|
||||
nested_path=ROOT_PATH
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
SIMPLE_METADATA_COLUMNS = (
|
||||
[
|
||||
Column(
|
||||
names={".": c},
|
||||
es_index="meta.columns",
|
||||
es_column=c,
|
||||
es_type="string",
|
||||
nested_path=ROOT_PATH
|
||||
)
|
||||
for c in ["table", "name", "type", "nested_path"]
|
||||
] + [
|
||||
Column(
|
||||
names={".": c},
|
||||
es_index="meta.columns",
|
||||
es_column=c,
|
||||
es_type="long",
|
||||
nested_path=ROOT_PATH
|
||||
)
|
||||
for c in ["count", "cardinality", "multi"]
|
||||
] + [
|
||||
Column(
|
||||
names={".": "last_updated"},
|
||||
es_index="meta.columns",
|
||||
es_column="last_updated",
|
||||
es_type="time",
|
||||
nested_path=ROOT_PATH
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
_type_to_name = {
|
||||
none_type: "undefined",
|
||||
NullType: "undefined",
|
||||
|
@ -242,6 +385,7 @@ _merge_type = {
|
|||
"long": "long",
|
||||
"float": "float",
|
||||
"double": "double",
|
||||
"number": "number",
|
||||
"string": "string",
|
||||
"object": "object",
|
||||
"nested": "nested"
|
||||
|
@ -253,6 +397,7 @@ _merge_type = {
|
|||
"long": "long",
|
||||
"float": "float",
|
||||
"double": "double",
|
||||
"number": "number",
|
||||
"string": "string",
|
||||
"object": None,
|
||||
"nested": None
|
||||
|
@ -264,6 +409,7 @@ _merge_type = {
|
|||
"long": "long",
|
||||
"float": "float",
|
||||
"double": "double",
|
||||
"number": "number",
|
||||
"string": "string",
|
||||
"object": None,
|
||||
"nested": None
|
||||
|
@ -275,6 +421,7 @@ _merge_type = {
|
|||
"long": "long",
|
||||
"float": "double",
|
||||
"double": "double",
|
||||
"number": "number",
|
||||
"string": "string",
|
||||
"object": None,
|
||||
"nested": None
|
||||
|
@ -286,6 +433,7 @@ _merge_type = {
|
|||
"long": "double",
|
||||
"float": "float",
|
||||
"double": "double",
|
||||
"number": "number",
|
||||
"string": "string",
|
||||
"object": None,
|
||||
"nested": None
|
||||
|
@ -297,6 +445,19 @@ _merge_type = {
|
|||
"long": "double",
|
||||
"float": "double",
|
||||
"double": "double",
|
||||
"number": "number",
|
||||
"string": "string",
|
||||
"object": None,
|
||||
"nested": None
|
||||
},
|
||||
"number": {
|
||||
"undefined": "number",
|
||||
"boolean": "number",
|
||||
"integer": "number",
|
||||
"long": "number",
|
||||
"float": "number",
|
||||
"double": "number",
|
||||
"number": "number",
|
||||
"string": "string",
|
||||
"object": None,
|
||||
"nested": None
|
||||
|
@ -308,6 +469,7 @@ _merge_type = {
|
|||
"long": "string",
|
||||
"float": "string",
|
||||
"double": "string",
|
||||
"number": "string",
|
||||
"string": "string",
|
||||
"object": None,
|
||||
"nested": None
|
||||
|
@ -319,6 +481,7 @@ _merge_type = {
|
|||
"long": None,
|
||||
"float": None,
|
||||
"double": None,
|
||||
"number": None,
|
||||
"string": None,
|
||||
"object": "object",
|
||||
"nested": "nested"
|
||||
|
@ -330,9 +493,9 @@ _merge_type = {
|
|||
"long": None,
|
||||
"float": None,
|
||||
"double": None,
|
||||
"number": None,
|
||||
"string": None,
|
||||
"object": "nested",
|
||||
"nested": "nested"
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,59 +0,0 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import division
|
||||
from __future__ import absolute_import
|
||||
|
||||
from collections import Mapping
|
||||
|
||||
from mo_dots import set_default, Data
|
||||
from jx_base.query import QueryOp
|
||||
|
||||
|
||||
class Namespace(object):
|
||||
|
||||
def convert(self, expr):
|
||||
raise NotImplementedError()
|
||||
|
||||
def _convert_query(self, query):
|
||||
output = QueryOp("from", None)
|
||||
output.select = self._convert_clause(query.select)
|
||||
output.where = self.convert(query.where)
|
||||
output["from"] = self._convert_from(query["from"])
|
||||
output.edges = self._convert_clause(query.edges)
|
||||
output.having = convert_list(self._convert_having, query.having)
|
||||
output.window = convert_list(self._convert_window, query.window)
|
||||
output.sort = self._convert_clause(query.sort)
|
||||
output.format = query.format
|
||||
|
||||
return output
|
||||
|
||||
def _convert_from(self, frum):
|
||||
raise NotImplementedError()
|
||||
|
||||
def _convert_clause(self, clause):
|
||||
raise NotImplementedError()
|
||||
|
||||
def _convert_having(self, clause):
|
||||
raise NotImplementedError()
|
||||
|
||||
def _convert_window(self, clause):
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
def convert_list(operator, operand):
|
||||
if operand==None:
|
||||
return None
|
||||
elif isinstance(operand, Mapping):
|
||||
return operator(operand)
|
||||
else:
|
||||
return map(operator, operand)
|
||||
|
||||
|
|
@ -10,10 +10,12 @@
|
|||
from __future__ import unicode_literals
|
||||
from __future__ import division
|
||||
from __future__ import absolute_import
|
||||
|
||||
import jx_base
|
||||
from mo_dots import Data
|
||||
|
||||
|
||||
class Table(object):
|
||||
class Table(jx_base.Table):
|
||||
|
||||
__slots__ = ['header', 'data', 'meta']
|
||||
|
||||
|
|
|
@ -12,6 +12,8 @@ from __future__ import unicode_literals
|
|||
from __future__ import division
|
||||
from __future__ import absolute_import
|
||||
|
||||
from mo_logs import Log
|
||||
|
||||
|
||||
class Relation_usingList(object):
|
||||
def __init__(self):
|
||||
|
@ -20,32 +22,51 @@ class Relation_usingList(object):
|
|||
def len(self):
|
||||
return len(self.all)
|
||||
|
||||
def add(self, key, value):
|
||||
test = (key, value)
|
||||
if test not in self.all:
|
||||
self.all.add(test)
|
||||
|
||||
def testAndAdd(self, key, value):
|
||||
"""
|
||||
RETURN TRUE IF THIS RELATION IS NET-NEW
|
||||
"""
|
||||
test = (key, value)
|
||||
if test not in self.all:
|
||||
output = test not in self.all
|
||||
self.all.add(test)
|
||||
return True
|
||||
return False
|
||||
return output
|
||||
|
||||
def extend(self, key, values):
|
||||
for v in values:
|
||||
self.add(key, v)
|
||||
self[key] = v
|
||||
|
||||
def __getitem__(self, key):
|
||||
"""
|
||||
USE THIS IF YOU ARE CONFIDENT THIS IS A MANY-TO-ONE MAPPING
|
||||
RETURN THE SINGLE CO-DOMAIN OBJECT THIS key MAPS TO
|
||||
"""
|
||||
output = [v for k, v in self.all if k == key]
|
||||
if not output:
|
||||
return None
|
||||
elif len(output) == 1:
|
||||
return output[0]
|
||||
else:
|
||||
Log.error("Not allowed")
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
self.all.add((key, value))
|
||||
|
||||
def get_domain(self, value):
|
||||
"""
|
||||
RETURN domain FOR GIVEN CODOMAIN
|
||||
:param value:
|
||||
:return:
|
||||
"""
|
||||
return [k for k, v in self.all if v == value]
|
||||
|
||||
def get_codomain(self, key):
|
||||
"""
|
||||
RETURN AN ARRAY OF OBJECTS THAT key MAPS TO
|
||||
"""
|
||||
return [v for k, v in self.all if k == key]
|
||||
|
||||
|
||||
|
||||
class Relation(object):
|
||||
def __init__(self):
|
||||
self.map = dict()
|
||||
|
@ -96,5 +117,3 @@ class Relation(object):
|
|||
|
||||
def domain(self):
|
||||
return self.map.keys()
|
||||
|
||||
|
||||
|
|
|
@ -411,6 +411,12 @@ def lower_match(value, candidates):
|
|||
|
||||
|
||||
def wrap(v):
|
||||
"""
|
||||
WRAP AS Data OBJECT FOR DATA PROCESSING: https://github.com/klahnakoski/mo-dots/tree/dev/docs
|
||||
:param v: THE VALUE TO WRAP
|
||||
:return: Data INSTANCE
|
||||
"""
|
||||
|
||||
type_ = _get(v, "__class__")
|
||||
|
||||
if type_ is dict:
|
||||
|
@ -422,7 +428,7 @@ def wrap(v):
|
|||
elif type_ is list:
|
||||
return FlatList(v)
|
||||
elif type_ in generator_types:
|
||||
return FlatList(list(v))
|
||||
return FlatList(list(unwrap(vv) for vv in v))
|
||||
else:
|
||||
return v
|
||||
|
||||
|
|
|
@ -19,12 +19,20 @@ from mo_dots.nones import Null
|
|||
_get = object.__getattribute__
|
||||
_set = object.__setattr__
|
||||
_emit_slice_warning = True
|
||||
|
||||
_datawrap = None
|
||||
Log = None
|
||||
|
||||
|
||||
def _late_import():
|
||||
global _datawrap
|
||||
global Log
|
||||
|
||||
from mo_dots.objects import datawrap as _datawrap
|
||||
try:
|
||||
from mo_logs import Log
|
||||
except Exception:
|
||||
from mo_dots.utils import PoorLogger as Log
|
||||
|
||||
_ = _datawrap
|
||||
|
||||
|
@ -33,6 +41,7 @@ class FlatList(list):
|
|||
"""
|
||||
ENCAPSULATES HANDING OF Nulls BY wrapING ALL MEMBERS AS NEEDED
|
||||
ENCAPSULATES FLAT SLICES ([::]) FOR USE IN WINDOW FUNCTIONS
|
||||
https://github.com/klahnakoski/mo-dots/tree/dev/docs#flatlist-is-flat
|
||||
"""
|
||||
EMPTY = None
|
||||
|
||||
|
@ -50,7 +59,8 @@ class FlatList(list):
|
|||
if isinstance(index, slice):
|
||||
# IMPLEMENT FLAT SLICES (for i not in range(0, len(self)): assert self[i]==None)
|
||||
if index.step is not None:
|
||||
Log = _late_import()
|
||||
if not Log:
|
||||
_late_import()
|
||||
Log.error("slice step must be None, do not know how to deal with values")
|
||||
length = len(_get(self, "list"))
|
||||
|
||||
|
@ -78,7 +88,8 @@ class FlatList(list):
|
|||
_list.append(None)
|
||||
_list[i] = unwrap(y)
|
||||
except Exception as e:
|
||||
Log = _late_import()
|
||||
if not Log:
|
||||
_late_import()
|
||||
Log.error("problem", cause=e)
|
||||
|
||||
def __getattribute__(self, key):
|
||||
|
@ -95,20 +106,22 @@ class FlatList(list):
|
|||
"""
|
||||
simple `select`
|
||||
"""
|
||||
if not _datawrap:
|
||||
if not Log:
|
||||
_late_import()
|
||||
|
||||
return FlatList(vals=[unwrap(coalesce(_datawrap(v), Null)[key]) for v in _get(self, "list")])
|
||||
|
||||
def select(self, key):
|
||||
Log = _late_import()
|
||||
if not Log:
|
||||
_late_import()
|
||||
Log.error("Not supported. Use `get()`")
|
||||
|
||||
def filter(self, _filter):
|
||||
return FlatList(vals=[unwrap(u) for u in (wrap(v) for v in _get(self, "list")) if _filter(u)])
|
||||
|
||||
def __delslice__(self, i, j):
|
||||
Log = _late_import()
|
||||
if not Log:
|
||||
_late_import()
|
||||
Log.error("Can not perform del on slice: modulo arithmetic was performed on the parameters. You can try using clear()")
|
||||
|
||||
def __clear__(self):
|
||||
|
@ -135,7 +148,8 @@ class FlatList(list):
|
|||
|
||||
if _emit_slice_warning:
|
||||
_emit_slice_warning = False
|
||||
Log = _late_import()
|
||||
if not Log:
|
||||
_late_import()
|
||||
Log.warning("slicing is broken in Python 2.7: a[i:j] == a[i+len(a), j] sometimes. Use [start:stop:step] (see https://github.com/klahnakoski/pyLibrary/blob/master/pyLibrary/dot/README.md#the-slice-operator-in-python27-is-inconsistent)")
|
||||
return self[i:j:]
|
||||
|
||||
|
|
|
@ -408,6 +408,10 @@ class File(object):
|
|||
|
||||
|
||||
class TempDirectory(File):
|
||||
"""
|
||||
A CONTEXT MANAGER FOR AN ALLOCATED, BUT UNOPENED TEMPORARY DIRECTORY
|
||||
WILL BE DELETED WHEN EXITED
|
||||
"""
|
||||
def __new__(cls):
|
||||
return File.__new__(cls, None)
|
||||
|
||||
|
@ -418,10 +422,14 @@ class TempDirectory(File):
|
|||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
Thread.run("delete "+self.name, delete_daemon, file=self)
|
||||
Thread.run("delete dir "+self.name, delete_daemon, file=self)
|
||||
|
||||
|
||||
class TempFile(File):
|
||||
"""
|
||||
A CONTEXT MANAGER FOR AN ALLOCATED, BUT UNOPENED TEMPORARY FILE
|
||||
WILL BE DELETED WHEN EXITED
|
||||
"""
|
||||
def __new__(cls, *args, **kwargs):
|
||||
return object.__new__(cls)
|
||||
|
||||
|
@ -434,7 +442,7 @@ class TempFile(File):
|
|||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
Thread.run("delete "+self.name, delete_daemon, file=self)
|
||||
Thread.run("delete file "+self.name, delete_daemon, file=self)
|
||||
|
||||
|
||||
def _copy(from_, to_):
|
||||
|
|
|
@ -37,8 +37,15 @@ if PY3:
|
|||
unichr = chr
|
||||
|
||||
xrange = range
|
||||
filter_type = type(filter(lambda x: True, []))
|
||||
generator_types = (collections.Iterable, filter_type)
|
||||
def _gen():
|
||||
yield
|
||||
|
||||
generator_types = (
|
||||
type(_gen()),
|
||||
type(filter(lambda x: True, [])),
|
||||
type({}.items()),
|
||||
type({}.values())
|
||||
)
|
||||
unichr = chr
|
||||
|
||||
round = round
|
||||
|
|
|
@ -56,6 +56,8 @@ def expand(doc, doc_url="param://", params=None):
|
|||
ASSUMING YOU ALREADY PULED THE doc FROM doc_url, YOU CAN STILL USE THE
|
||||
EXPANDING FEATURE
|
||||
|
||||
USE mo_json_config.expand({}) TO ASSUME CURRENT WORKING DIRECTORY
|
||||
|
||||
:param doc: THE DATA STRUCTURE FROM JSON SOURCE
|
||||
:param doc_url: THE URL THIS doc CAME FROM (DEFAULT USES params AS A DOCUMENT SOURCE)
|
||||
:param params: EXTRA PARAMETERS NOT FOUND IN THE doc_url PARAMETERS (WILL SUPERSEDE PARAMETERS FROM doc_url)
|
||||
|
|
|
@ -97,9 +97,10 @@ def override(func):
|
|||
if e.message.startswith(func_name) and "takes at least" in e:
|
||||
missing = [p for p in params if str(p) not in packed]
|
||||
get_logger().error(
|
||||
"Problem calling {{func_name}}: Expecting parameter {{missing}}",
|
||||
"Problem calling {{func_name}}: Expecting parameter {{missing}}, given {{given}}",
|
||||
func_name=func_name,
|
||||
missing=missing,
|
||||
given=packed.keys(),
|
||||
stack_depth=1
|
||||
)
|
||||
get_logger().error("Error dispatching call", e)
|
||||
|
|
|
@ -103,7 +103,13 @@ class Log(object):
|
|||
|
||||
@classmethod
|
||||
def stop(cls):
|
||||
from mo_logs import profiles
|
||||
"""
|
||||
DECONSTRUCTS ANY LOGGING, AND RETURNS TO DIRECT-TO-stdout LOGGING
|
||||
EXECUTING MULUTIPLE TIMES IN A ROW IS SAFE, IT HAS NO NET EFFECT, IT STILL LOGS TO stdout
|
||||
:return: NOTHING
|
||||
"""
|
||||
|
||||
from mo_threads import profiles
|
||||
|
||||
if cls.cprofiler and hasattr(cls, "settings"):
|
||||
if cls.cprofiler == None:
|
||||
|
@ -429,7 +435,6 @@ class Log(object):
|
|||
trace = exceptions.extract_stack(stack_depth + 1)
|
||||
|
||||
e = Except(exceptions.ERROR, template, params, cause, trace)
|
||||
str_e = text_type(e)
|
||||
|
||||
error_mode = cls.error_mode
|
||||
with suppress_exception:
|
||||
|
@ -443,7 +448,7 @@ class Log(object):
|
|||
)
|
||||
cls.error_mode = error_mode
|
||||
|
||||
sys.stderr.write(str_e.encode('utf8'))
|
||||
sys.stderr.write(str(e))
|
||||
|
||||
|
||||
def write(self):
|
||||
|
@ -475,6 +480,10 @@ def write_profile(profile_settings, stats):
|
|||
stats_file.write(convert.list2tab(stats))
|
||||
|
||||
|
||||
def _same_frame(frameA, frameB):
|
||||
return (frameA.line, frameA.file) == (frameB.line, frameB.file)
|
||||
|
||||
|
||||
# GET THE MACHINE METADATA
|
||||
machine_metadata = wrap({
|
||||
"pid": os.getpid(),
|
||||
|
|
|
@ -55,6 +55,13 @@ class Except(Exception):
|
|||
|
||||
@classmethod
|
||||
def wrap(cls, e, stack_depth=0):
|
||||
"""
|
||||
ENSURE THE STACKTRACE AND CAUSAL CHAIN IS CAPTURED, PLUS ADD FEATURES OF Except
|
||||
|
||||
:param e: AN EXCEPTION OF ANY TYPE
|
||||
:param stack_depth: HOW MANY CALLS TO TAKE OFF THE TOP OF THE STACK TRACE
|
||||
:return: A Except OBJECT OF THE SAME
|
||||
"""
|
||||
if e == None:
|
||||
return Null
|
||||
elif isinstance(e, (list, Except)):
|
||||
|
|
|
@ -37,6 +37,10 @@ class StructuredLogger_usingElasticSearch(StructuredLogger):
|
|||
"""
|
||||
settings ARE FOR THE ELASTICSEARCH INDEX
|
||||
"""
|
||||
kwargs.timeout = Duration(coalesce(self.es.settings.timeout, "30second")).seconds
|
||||
kwargs.retry.times = coalesce(self.es.settings.retry.times, 3)
|
||||
kwargs.retry.sleep = Duration(coalesce(self.es.settings.retry.sleep, MINUTE)).seconds
|
||||
|
||||
self.es = Cluster(kwargs).get_or_create_index(
|
||||
schema=mo_json.json2value(value2json(SCHEMA), leaves=True),
|
||||
limit_replicas=True,
|
||||
|
@ -46,8 +50,7 @@ class StructuredLogger_usingElasticSearch(StructuredLogger):
|
|||
self.batch_size = batch_size
|
||||
self.es.add_alias(coalesce(kwargs.alias, kwargs.index))
|
||||
self.queue = Queue("debug logs to es", max=max_size, silent=True)
|
||||
self.es.settings.retry.times = coalesce(self.es.settings.retry.times, 3)
|
||||
self.es.settings.retry.sleep = Duration(coalesce(self.es.settings.retry.sleep, MINUTE))
|
||||
|
||||
Thread.run("add debug logs to es", self._insert_loop)
|
||||
|
||||
def write(self, template, params):
|
||||
|
|
|
@ -22,8 +22,9 @@ from mo_logs.strings import expand_template
|
|||
|
||||
class StructuredLogger_usingStream(StructuredLogger):
|
||||
def __init__(self, stream):
|
||||
self.locker = allocate_lock()
|
||||
try:
|
||||
self.locker = allocate_lock()
|
||||
self.flush = stream.flush
|
||||
if stream in (sys.stdout, sys.stderr):
|
||||
if PY3:
|
||||
self.writer = stream.write
|
||||
|
@ -33,8 +34,8 @@ class StructuredLogger_usingStream(StructuredLogger):
|
|||
self.writer = _UTF8Encoder(stream).write
|
||||
else:
|
||||
self.writer = stream.write
|
||||
except Exception as e:
|
||||
sys.stderr("can not handle")
|
||||
except Exception as _:
|
||||
sys.stderr.write("can not handle")
|
||||
|
||||
def write(self, template, params):
|
||||
value = expand_template(template, params)
|
||||
|
@ -45,7 +46,7 @@ class StructuredLogger_usingStream(StructuredLogger):
|
|||
self.locker.release()
|
||||
|
||||
def stop(self):
|
||||
pass
|
||||
self.flush()
|
||||
|
||||
|
||||
class _UTF8Encoder(object):
|
||||
|
@ -56,5 +57,5 @@ class _UTF8Encoder(object):
|
|||
def write(self, v):
|
||||
try:
|
||||
self.stream.write(v.encode('utf8'))
|
||||
except Exception as e:
|
||||
sys.stderr("can not handle")
|
||||
except Exception as _:
|
||||
sys.stderr.write("can not handle")
|
||||
|
|
|
@ -93,7 +93,9 @@ def time_delta_pusher(please_stop, appender, queue, interval):
|
|||
next_run = time() + interval
|
||||
|
||||
while not please_stop:
|
||||
Thread.current().cprofiler.disable()
|
||||
(Till(till=next_run) | please_stop).wait()
|
||||
Thread.current().cprofiler.enable()
|
||||
next_run = time() + interval
|
||||
logs = queue.pop_all()
|
||||
if not logs:
|
||||
|
@ -116,7 +118,7 @@ def time_delta_pusher(please_stop, appender, queue, interval):
|
|||
appender(u"\n".join(lines) + u"\n")
|
||||
except Exception as e:
|
||||
|
||||
sys.stderr.write(b"Trouble with appender: " + str(e.__class__.__name__) + b"\n")
|
||||
sys.stderr.write(str("Trouble with appender: ") + str(e.__class__.__name__) + str("\n"))
|
||||
# SWALLOW ERROR, MUST KEEP RUNNING
|
||||
|
||||
|
||||
|
|
|
@ -20,7 +20,7 @@ import tempfile
|
|||
import mo_json_config
|
||||
from mo_files import File
|
||||
from mo_logs import Log
|
||||
from mo_dots import listwrap, wrap, unwrap
|
||||
from mo_dots import listwrap, wrap, unwrap, coalesce
|
||||
|
||||
|
||||
# PARAMETERS MATCH argparse.ArgumentParser.add_argument()
|
||||
|
@ -58,39 +58,32 @@ def argparse(defs):
|
|||
return wrap(output)
|
||||
|
||||
|
||||
def read_settings(filename=None, defs=None, env_filename=None):
|
||||
def read_settings(filename=None, defs=None):
|
||||
"""
|
||||
:param filename: Force load a file
|
||||
:param defs: arguments you want to accept
|
||||
:param env_filename: A config file from an environment variable (a fallback config file, if no other provided)
|
||||
:param default_filename: A config file from an environment variable (a fallback config file, if no other provided)
|
||||
:return:
|
||||
"""
|
||||
# READ SETTINGS
|
||||
if filename:
|
||||
settings_file = File(filename)
|
||||
if not settings_file.exists:
|
||||
Log.error("Can not file settings file {{filename}}", {
|
||||
"filename": settings_file.abspath
|
||||
})
|
||||
settings = mo_json_config.get("file:///" + settings_file.abspath)
|
||||
if defs:
|
||||
settings.args = argparse(defs)
|
||||
return settings
|
||||
else:
|
||||
defs = listwrap(defs)
|
||||
defs.append({
|
||||
"name": ["--config", "--settings", "--settings-file", "--settings_file"],
|
||||
"help": "path to JSON file with settings",
|
||||
"type": str,
|
||||
"dest": "filename",
|
||||
"default": "config.json",
|
||||
"default": None,
|
||||
"required": False
|
||||
})
|
||||
args = argparse(defs)
|
||||
|
||||
if env_filename:
|
||||
args.filename = env_filename
|
||||
settings = mo_json_config.get("file://" + args.filename.replace(os.sep, "/"))
|
||||
args.filename = coalesce(filename, args.filename, "./config.json")
|
||||
settings_file = File(args.filename)
|
||||
if not settings_file.exists:
|
||||
Log.error("Can not read configuration file {{filename}}", {
|
||||
"filename": settings_file.abspath
|
||||
})
|
||||
settings = mo_json_config.get("file:///" + settings_file.abspath)
|
||||
settings.args = args
|
||||
return settings
|
||||
|
||||
|
|
|
@ -297,6 +297,12 @@ def MIN(values, *others):
|
|||
|
||||
|
||||
def MAX(values, *others):
|
||||
"""
|
||||
DECISIVE MAX
|
||||
:param values:
|
||||
:param others:
|
||||
:return:
|
||||
"""
|
||||
|
||||
if others:
|
||||
from mo_logs import Log
|
||||
|
|
|
@ -86,6 +86,8 @@ def assertAlmostEqual(test, expected, digits=None, places=None, msg=None, delta=
|
|||
return
|
||||
elif test is expected:
|
||||
return
|
||||
elif isinstance(expected, text_type):
|
||||
assertAlmostEqualValue(test, expected, msg=msg, digits=digits, places=places, delta=delta)
|
||||
elif isinstance(test, UniqueIndex):
|
||||
if test ^ expected:
|
||||
Log.error("Sets do not match")
|
||||
|
@ -196,7 +198,6 @@ def assertAlmostEqualValue(test, expected, digits=None, places=None, msg=None, d
|
|||
if diff < Math.ceiling(Math.log10(abs(test)))-places:
|
||||
return
|
||||
|
||||
|
||||
standardMsg = expand_template("{{test|json}} != {{expected|json}} within {{places}} places", locals())
|
||||
|
||||
raise AssertionError(coalesce(msg, "") + ": (" + standardMsg + ")")
|
||||
|
|
|
@ -15,15 +15,21 @@ from __future__ import absolute_import
|
|||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from mo_future import get_function_name
|
||||
|
||||
from mo_logs import Log
|
||||
from mo_threads.lock import Lock
|
||||
from mo_threads.signal import Signal
|
||||
from mo_threads.till import Till
|
||||
from mo_threads.threads import Thread, THREAD_STOP, THREAD_TIMEOUT
|
||||
from mo_threads.queues import Queue
|
||||
from mo_threads.queues import ThreadedQueue
|
||||
from mo_threads.multiprocess import Process
|
||||
from mo_threads.queues import Queue, ThreadedQueue
|
||||
from mo_threads.signal import Signal
|
||||
from mo_threads.threads import Thread, THREAD_STOP, THREAD_TIMEOUT, MainThread, stop_main_thread, MAIN_THREAD
|
||||
from mo_threads.till import Till
|
||||
|
||||
Log.cprofiler_stats = Queue("cprofiler stats") # ACCUMULATION OF STATS FROM ALL THREADS
|
||||
|
||||
MAIN_THREAD.timers = Thread.run("timers daemon", till.daemon)
|
||||
MAIN_THREAD.children.remove(threads.MAIN_THREAD.timers)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# from threading import Thread as _threading_Thread
|
||||
|
@ -78,3 +84,4 @@ from mo_threads.multiprocess import Process
|
|||
# _threading_Thread.setDaemon = _setDaemon
|
||||
#
|
||||
#
|
||||
|
||||
|
|
|
@ -49,7 +49,7 @@ def _late_import():
|
|||
|
||||
class Lock(object):
|
||||
"""
|
||||
A NON-RE-ENTRANT LOCK WITH wait() AND
|
||||
A NON-RE-ENTRANT LOCK WITH wait()
|
||||
"""
|
||||
__slots__ = ["name", "lock", "waiting"]
|
||||
|
||||
|
@ -77,7 +77,7 @@ class Lock(object):
|
|||
"""
|
||||
THE ASSUMPTION IS wait() WILL ALWAYS RETURN WITH THE LOCK ACQUIRED
|
||||
:param till: WHEN TO GIVE UP WAITING FOR ANOTHER THREAD TO SIGNAL
|
||||
:return: True IF SIGNALED TO GO, False IF TIMEOUT HAPPENED
|
||||
:return: True IF SIGNALED TO GO, False IF till WAS SIGNALED
|
||||
"""
|
||||
waiter = Signal()
|
||||
if self.waiting:
|
||||
|
|
|
@ -16,26 +16,12 @@ import pstats
|
|||
from datetime import datetime
|
||||
from time import clock
|
||||
|
||||
from mo_dots import Data
|
||||
from mo_dots import wrap
|
||||
|
||||
from mo_dots import Data, wrap, Null
|
||||
from mo_logs import Log
|
||||
|
||||
ON = False
|
||||
profiles = {}
|
||||
|
||||
_Log = None
|
||||
|
||||
|
||||
def _late_import():
|
||||
global _Log
|
||||
|
||||
from mo_logs import Log as _Log
|
||||
from mo_threads import Queue
|
||||
|
||||
if _Log.cprofiler_stats == None:
|
||||
_Log.cprofiler_stats = Queue("cprofiler stats") # ACCUMULATION OF STATS FROM ALL THREADS
|
||||
|
||||
|
||||
class Profiler(object):
|
||||
"""
|
||||
VERY SIMPLE PROFILER FOR USE IN with STATEMENTS
|
||||
|
@ -48,13 +34,12 @@ class Profiler(object):
|
|||
output = profiles.get(args[0])
|
||||
if output:
|
||||
return output
|
||||
output = object.__new__(cls, *args)
|
||||
output = object.__new__(cls)
|
||||
return output
|
||||
|
||||
def __init__(self, description):
|
||||
from jx_python.windows import Stats
|
||||
|
||||
if ON and not hasattr(self, "description"):
|
||||
from jx_python.windows import Stats
|
||||
self.description = description
|
||||
self.samples = []
|
||||
self.stats = Stats()()
|
||||
|
@ -127,20 +112,25 @@ class CProfiler(object):
|
|||
"""
|
||||
|
||||
def __init__(self):
|
||||
if not _Log:
|
||||
_late_import()
|
||||
self.cprofiler = None
|
||||
|
||||
def __enter__(self):
|
||||
if _Log.cprofiler:
|
||||
_Log.note("starting cprofile")
|
||||
if Log.cprofiler:
|
||||
Log.note("starting cprofile")
|
||||
self.cprofiler = cProfile.Profile()
|
||||
self.cprofiler.enable()
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
if self.cprofiler:
|
||||
if self.cprofiler is not None:
|
||||
self.cprofiler.disable()
|
||||
_Log.cprofiler_stats.add(pstats.Stats(self.cprofiler))
|
||||
Log.cprofiler_stats.add(pstats.Stats(self.cprofiler))
|
||||
del self.cprofiler
|
||||
_Log.note("done cprofile")
|
||||
Log.note("done cprofile")
|
||||
|
||||
def enable(self):
|
||||
if self.cprofiler is not None:
|
||||
return self.cprofiler.enable()
|
||||
|
||||
def disable(self):
|
||||
if self.cprofiler is not None:
|
||||
return self.cprofiler.disable()
|
|
@ -22,14 +22,12 @@ from datetime import datetime
|
|||
from time import time
|
||||
|
||||
from mo_dots import coalesce, Null
|
||||
from mo_threads import Lock, Signal, Thread, THREAD_STOP, THREAD_TIMEOUT, Till
|
||||
|
||||
from mo_logs import Log
|
||||
from mo_threads.lock import Lock
|
||||
from mo_threads.signal import Signal
|
||||
from mo_threads.threads import THREAD_STOP, THREAD_TIMEOUT, Thread
|
||||
from mo_threads.till import Till
|
||||
|
||||
_convert = None
|
||||
_Except = None
|
||||
_CProfiler = None
|
||||
_Log = None
|
||||
DEBUG = False
|
||||
|
||||
# MAX_DATETIME = datetime(2286, 11, 20, 17, 46, 39)
|
||||
|
@ -37,23 +35,6 @@ DEFAULT_WAIT_TIME = 10 * 60 # SECONDS
|
|||
|
||||
datetime.strptime('2012-01-01', '%Y-%m-%d') # http://bugs.python.org/issue7980
|
||||
|
||||
|
||||
def _late_import():
|
||||
global _convert
|
||||
global _Except
|
||||
global _CProfiler
|
||||
global _Log
|
||||
|
||||
from mo_logs.exceptions import Except as _Except
|
||||
from mo_logs.profiles import CProfiler as _CProfiler
|
||||
from mo_logs import Log as _Log
|
||||
|
||||
_ = _convert
|
||||
_ = _Except
|
||||
_ = _CProfiler
|
||||
_ = _Log
|
||||
|
||||
|
||||
class Queue(object):
|
||||
"""
|
||||
SIMPLE MESSAGE QUEUE, multiprocessing.Queue REQUIRES SERIALIZATION, WHICH
|
||||
|
@ -66,9 +47,6 @@ class Queue(object):
|
|||
silent - COMPLAIN IF THE READERS ARE TOO SLOW
|
||||
unique - SET True IF YOU WANT ONLY ONE INSTANCE IN THE QUEUE AT A TIME
|
||||
"""
|
||||
if not _Log:
|
||||
_late_import()
|
||||
|
||||
self.name = name
|
||||
self.max = coalesce(max, 2 ** 10)
|
||||
self.silent = silent
|
||||
|
@ -88,10 +66,10 @@ class Queue(object):
|
|||
if value is not None:
|
||||
yield value
|
||||
except Exception as e:
|
||||
_Log.warning("Tell me about what happened here", e)
|
||||
Log.warning("Tell me about what happened here", e)
|
||||
|
||||
if not self.silent:
|
||||
_Log.note("queue iterator is done")
|
||||
Log.note("queue iterator is done")
|
||||
|
||||
def add(self, value, timeout=None):
|
||||
with self.lock:
|
||||
|
@ -103,7 +81,7 @@ class Queue(object):
|
|||
|
||||
self._wait_for_queue_space(timeout=timeout)
|
||||
if self.please_stop and not self.allow_add_after_close:
|
||||
_Log.error("Do not add to closed queue")
|
||||
Log.error("Do not add to closed queue")
|
||||
else:
|
||||
if self.unique:
|
||||
if value not in self.queue:
|
||||
|
@ -117,7 +95,7 @@ class Queue(object):
|
|||
SNEAK value TO FRONT OF THE QUEUE
|
||||
"""
|
||||
if self.please_stop and not self.allow_add_after_close:
|
||||
_Log.error("Do not push to closed queue")
|
||||
Log.error("Do not push to closed queue")
|
||||
|
||||
with self.lock:
|
||||
self._wait_for_queue_space()
|
||||
|
@ -132,12 +110,12 @@ class Queue(object):
|
|||
"""
|
||||
|
||||
if till is not None and not isinstance(till, Signal):
|
||||
_Log.error("Expecting a signal")
|
||||
Log.error("Expecting a signal")
|
||||
return Null, self.pop(till=till)
|
||||
|
||||
def extend(self, values):
|
||||
if self.please_stop and not self.allow_add_after_close:
|
||||
_Log.error("Do not push to closed queue")
|
||||
Log.error("Do not push to closed queue")
|
||||
|
||||
with self.lock:
|
||||
# ONCE THE queue IS BELOW LIMIT, ALLOW ADDING MORE
|
||||
|
@ -171,16 +149,16 @@ class Queue(object):
|
|||
if timeout != None:
|
||||
time_to_stop_waiting = now + timeout
|
||||
else:
|
||||
time_to_stop_waiting = Null
|
||||
time_to_stop_waiting = None
|
||||
|
||||
if self.next_warning < now:
|
||||
self.next_warning = now + wait_time
|
||||
|
||||
while not self.please_stop and len(self.queue) >= self.max:
|
||||
if now > time_to_stop_waiting:
|
||||
if not _Log:
|
||||
if not Log:
|
||||
_late_import()
|
||||
_Log.error(THREAD_TIMEOUT)
|
||||
Log.error(THREAD_TIMEOUT)
|
||||
|
||||
if self.silent:
|
||||
self.lock.wait(Till(till=time_to_stop_waiting))
|
||||
|
@ -190,7 +168,7 @@ class Queue(object):
|
|||
now = time()
|
||||
if self.next_warning < now:
|
||||
self.next_warning = now + wait_time
|
||||
_Log.alert(
|
||||
Log.alert(
|
||||
"Queue by name of {{name|quote}} is full with ({{num}} items), thread(s) have been waiting {{wait_time}} sec",
|
||||
name=self.name,
|
||||
num=len(self.queue),
|
||||
|
@ -215,7 +193,7 @@ class Queue(object):
|
|||
:return: A value, or a THREAD_STOP or None
|
||||
"""
|
||||
if till is not None and not isinstance(till, Signal):
|
||||
_Log.error("expecting a signal")
|
||||
Log.error("expecting a signal")
|
||||
|
||||
with self.lock:
|
||||
while True:
|
||||
|
@ -229,7 +207,7 @@ class Queue(object):
|
|||
break
|
||||
return None
|
||||
if DEBUG or not self.silent:
|
||||
_Log.note(self.name + " queue stopped")
|
||||
Log.note(self.name + " queue stopped")
|
||||
return THREAD_STOP
|
||||
|
||||
def pop_all(self):
|
||||
|
@ -289,13 +267,13 @@ class ThreadedQueue(Queue):
|
|||
# BE CAREFUL! THE THREAD MAKING THE CALL WILL NOT BE YOUR OWN!
|
||||
# DEFAULT BEHAVIOUR: THIS WILL KEEP RETRYING WITH WARNINGS
|
||||
):
|
||||
if not _Log:
|
||||
if not Log:
|
||||
_late_import()
|
||||
|
||||
if period !=None and not isinstance(period, (int, float, long)):
|
||||
if not _Log:
|
||||
if not Log:
|
||||
_late_import()
|
||||
_Log.error("Expecting a float for the period")
|
||||
Log.error("Expecting a float for the period")
|
||||
|
||||
batch_size = coalesce(batch_size, int(max_size / 2) if max_size else None, 900)
|
||||
max_size = coalesce(max_size, batch_size * 2) # REASONABLE DEFAULT
|
||||
|
@ -328,7 +306,7 @@ class ThreadedQueue(Queue):
|
|||
item = self.pop()
|
||||
now = time()
|
||||
if now > last_push + period:
|
||||
# _Log.note("delay next push")
|
||||
# Log.note("delay next push")
|
||||
next_push = Till(till=now + period)
|
||||
else:
|
||||
item = self.pop(till=next_push)
|
||||
|
@ -349,13 +327,13 @@ class ThreadedQueue(Queue):
|
|||
try:
|
||||
error_target(e, _buffer)
|
||||
except Exception as f:
|
||||
_Log.warning(
|
||||
Log.warning(
|
||||
"`error_target` should not throw, just deal",
|
||||
name=name,
|
||||
cause=f
|
||||
)
|
||||
else:
|
||||
_Log.warning(
|
||||
Log.warning(
|
||||
"Unexpected problem",
|
||||
name=name,
|
||||
cause=e
|
||||
|
@ -374,13 +352,13 @@ class ThreadedQueue(Queue):
|
|||
try:
|
||||
error_target(e, _buffer)
|
||||
except Exception as f:
|
||||
_Log.warning(
|
||||
Log.warning(
|
||||
"`error_target` should not throw, just deal",
|
||||
name=name,
|
||||
cause=f
|
||||
)
|
||||
else:
|
||||
_Log.warning(
|
||||
Log.warning(
|
||||
"Problem with {{name}} pushing {{num}} items to data sink",
|
||||
name=name,
|
||||
num=len(_buffer),
|
||||
|
@ -405,8 +383,8 @@ class ThreadedQueue(Queue):
|
|||
# from jx_python import jx
|
||||
#
|
||||
# biggest = jx.sort(sizes, "size").last().id
|
||||
# _Log.note("Big record {{id}}", id=biggest)
|
||||
# _Log.note("{{name}} has {{num}} items with json size of {{size|comma}}", name=self.name, num=len(self.queue), size=size)
|
||||
# Log.note("Big record {{id}}", id=biggest)
|
||||
# Log.note("{{name}} has {{num}} items with json size of {{size|comma}}", name=self.name, num=len(self.queue), size=size)
|
||||
return self
|
||||
|
||||
def extend(self, values):
|
||||
|
@ -415,7 +393,7 @@ class ThreadedQueue(Queue):
|
|||
self._wait_for_queue_space()
|
||||
if not self.please_stop:
|
||||
self.queue.extend(values)
|
||||
_Log.note("{{name}} has {{num}} items", name=self.name, num=len(self.queue))
|
||||
Log.note("{{name}} has {{num}} items", name=self.name, num=len(self.queue))
|
||||
return self
|
||||
|
||||
def __enter__(self):
|
||||
|
@ -430,3 +408,5 @@ class ThreadedQueue(Queue):
|
|||
def stop(self):
|
||||
self.add(THREAD_STOP)
|
||||
self.thread.join()
|
||||
|
||||
|
||||
|
|
|
@ -15,20 +15,19 @@ from __future__ import absolute_import
|
|||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import signal as _signal
|
||||
import sys
|
||||
|
||||
from copy import copy
|
||||
from datetime import datetime, timedelta
|
||||
from time import sleep
|
||||
|
||||
from mo_future import get_ident, start_new_thread, interrupt_main
|
||||
|
||||
from mo_dots import Data, unwraplist, Null
|
||||
from mo_future import get_ident, start_new_thread, interrupt_main, get_function_name, text_type
|
||||
from mo_logs import Log, Except
|
||||
from mo_logs.profiles import CProfiler
|
||||
from mo_threads import Till, Lock, Signal, till
|
||||
|
||||
from mo_threads.signal import AndSignals
|
||||
from mo_threads.lock import Lock
|
||||
from mo_threads.profiles import CProfiler
|
||||
from mo_threads.signal import AndSignals, Signal
|
||||
from mo_threads.till import Till
|
||||
|
||||
DEBUG = False
|
||||
|
||||
|
@ -81,8 +80,11 @@ class MainThread(object):
|
|||
def __init__(self):
|
||||
self.name = "Main Thread"
|
||||
self.id = get_ident()
|
||||
self.please_stop = Signal()
|
||||
self.children = []
|
||||
self.stop_logging = Log.stop
|
||||
self.timers = None
|
||||
self.cprofiler = Null
|
||||
|
||||
def add_child(self, child):
|
||||
self.children.append(child)
|
||||
|
@ -96,9 +98,15 @@ class MainThread(object):
|
|||
def stop(self):
|
||||
"""
|
||||
BLOCKS UNTIL ALL THREADS HAVE STOPPED
|
||||
THEN RUNS sys.exit(0)
|
||||
"""
|
||||
join_errors = []
|
||||
self.please_stop.go()
|
||||
|
||||
self_thread = Thread.current()
|
||||
if self_thread != MAIN_THREAD or self_thread != self:
|
||||
Log.error("Only the main thread can call stop() on main thread")
|
||||
|
||||
join_errors = []
|
||||
children = copy(self.children)
|
||||
for c in reversed(children):
|
||||
if DEBUG and c.name:
|
||||
|
@ -122,11 +130,57 @@ class MainThread(object):
|
|||
if join_errors:
|
||||
Log.error("Problem while stopping {{name|quote}}", name=self.name, cause=unwraplist(join_errors))
|
||||
|
||||
self.stop_logging()
|
||||
self.timers.stop()
|
||||
self.timers.join()
|
||||
|
||||
if DEBUG:
|
||||
Log.note("Thread {{name|quote}} now stopped", name=self.name)
|
||||
sys.exit(0)
|
||||
|
||||
def wait_for_shutdown_signal(
|
||||
self,
|
||||
please_stop=False, # ASSIGN SIGNAL TO STOP EARLY
|
||||
allow_exit=False, # ALLOW "exit" COMMAND ON CONSOLE TO ALSO STOP THE APP
|
||||
wait_forever=True # IGNORE CHILD THREADS, NEVER EXIT. False => IF NO CHILD THREADS LEFT, THEN EXIT
|
||||
):
|
||||
"""
|
||||
FOR USE BY PROCESSES THAT NEVER DIE UNLESS EXTERNAL SHUTDOWN IS REQUESTED
|
||||
|
||||
CALLING THREAD WILL SLEEP UNTIL keyboard interrupt, OR please_stop, OR "exit"
|
||||
|
||||
:param please_stop:
|
||||
:param allow_exit:
|
||||
:param wait_forever:: Assume all needed threads have been launched. When done
|
||||
:return:
|
||||
"""
|
||||
self_thread = Thread.current()
|
||||
if self_thread != MAIN_THREAD or self_thread != self:
|
||||
Log.error("Only the main thread can sleep forever (waiting for KeyboardInterrupt)")
|
||||
|
||||
if isinstance(please_stop, Signal):
|
||||
self.please_stop.on_go(please_stop.go)
|
||||
else:
|
||||
please_stop = self.please_stop
|
||||
|
||||
if not wait_forever:
|
||||
# TRIGGER SIGNAL WHEN ALL CHILDREN THEADS ARE DONE
|
||||
pending = copy(self_thread.children)
|
||||
all = AndSignals(please_stop, len(pending))
|
||||
for p in pending:
|
||||
p.stopped.on_go(all.done)
|
||||
|
||||
try:
|
||||
if allow_exit:
|
||||
_wait_for_exit(please_stop)
|
||||
else:
|
||||
_wait_for_interrupt(please_stop)
|
||||
except KeyboardInterrupt as _:
|
||||
Log.alert("SIGINT Detected! Stopping...")
|
||||
except SystemExit as _:
|
||||
Log.alert("SIGTERM Detected! Stopping...")
|
||||
finally:
|
||||
self.stop()
|
||||
|
||||
|
||||
class Thread(object):
|
||||
|
@ -152,7 +206,7 @@ class Thread(object):
|
|||
|
||||
self.thread = None
|
||||
self.stopped = Signal("stopped signal for " + self.name)
|
||||
self.cprofiler = None
|
||||
self.cprofiler = Null
|
||||
self.children = []
|
||||
|
||||
if "parent_thread" in kwargs:
|
||||
|
@ -162,7 +216,6 @@ class Thread(object):
|
|||
self.parent = Thread.current()
|
||||
self.parent.add_child(self)
|
||||
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
|
@ -210,7 +263,8 @@ class Thread(object):
|
|||
try:
|
||||
if self.target is not None:
|
||||
a, k, self.args, self.kwargs = self.args, self.kwargs, None, None
|
||||
with CProfiler(): # PROFILE IN HERE SO THAT __exit__() IS RUN BEFORE THREAD MARKED AS stopped
|
||||
self.cprofiler = CProfiler()
|
||||
with self.cprofiler: # PROFILE IN HERE SO THAT __exit__() IS RUN BEFORE THREAD MARKED AS stopped
|
||||
response = self.target(*a, **k)
|
||||
with self.synch_lock:
|
||||
self.end_of_thread = Data(response=response)
|
||||
|
@ -226,7 +280,7 @@ class Thread(object):
|
|||
try:
|
||||
Log.fatal("Problem in thread {{name|quote}}", name=self.name, cause=e)
|
||||
except Exception:
|
||||
sys.stderr.write(b"ERROR in thread: " + str(self.name) + b" " + str(e) + b"\n")
|
||||
sys.stderr.write(str("ERROR in thread: " + self.name + " " + text_type(e) + "\n"))
|
||||
finally:
|
||||
try:
|
||||
children = copy(self.children)
|
||||
|
@ -260,9 +314,9 @@ class Thread(object):
|
|||
if DEBUG:
|
||||
Log.warning("problem with thread {{name|quote}}", cause=e, name=self.name)
|
||||
finally:
|
||||
self.stopped.go()
|
||||
if DEBUG:
|
||||
Log.note("thread {{name|quote}} is done", name=self.name)
|
||||
self.stopped.go()
|
||||
|
||||
def is_alive(self):
|
||||
return not self.stopped
|
||||
|
@ -293,7 +347,9 @@ class Thread(object):
|
|||
@staticmethod
|
||||
def run(name, target, *args, **kwargs):
|
||||
# ENSURE target HAS please_stop ARGUMENT
|
||||
if "please_stop" not in target.__code__.co_varnames:
|
||||
if get_function_name(target) == 'wrapper':
|
||||
pass # GIVE THE override DECORATOR A PASS
|
||||
elif "please_stop" not in target.__code__.co_varnames:
|
||||
Log.error("function must have please_stop argument for signalling emergency shutdown")
|
||||
|
||||
Thread.num_threads += 1
|
||||
|
@ -302,48 +358,6 @@ class Thread(object):
|
|||
output.start()
|
||||
return output
|
||||
|
||||
@staticmethod
|
||||
def wait_for_shutdown_signal(
|
||||
please_stop=False, # ASSIGN SIGNAL TO STOP EARLY
|
||||
allow_exit=False, # ALLOW "exit" COMMAND ON CONSOLE TO ALSO STOP THE APP
|
||||
wait_forever=True # IGNORE CHILD THREADS, NEVER EXIT. False -> IF NO CHILD THREADS LEFT, THEN EXIT
|
||||
):
|
||||
"""
|
||||
FOR USE BY PROCESSES NOT EXPECTED TO EVER COMPLETE UNTIL EXTERNAL
|
||||
SHUTDOWN IS REQUESTED
|
||||
|
||||
SLEEP UNTIL keyboard interrupt, OR please_stop, OR "exit"
|
||||
|
||||
:param please_stop:
|
||||
:param allow_exit:
|
||||
:param wait_forever:: Assume all needed threads have been launched. When done
|
||||
:return:
|
||||
"""
|
||||
if not isinstance(please_stop, Signal):
|
||||
please_stop = Signal()
|
||||
|
||||
please_stop.on_go(lambda: start_new_thread(_stop_main_thread, ()))
|
||||
|
||||
self_thread = Thread.current()
|
||||
if self_thread != MAIN_THREAD:
|
||||
Log.error("Only the main thread can sleep forever (waiting for KeyboardInterrupt)")
|
||||
|
||||
if not wait_forever:
|
||||
# TRIGGER SIGNAL WHEN ALL EXITING THREADS ARE DONE
|
||||
pending = copy(self_thread.children)
|
||||
all = AndSignals(please_stop, len(pending))
|
||||
for p in pending:
|
||||
p.stopped.on_go(all.done)
|
||||
|
||||
try:
|
||||
if allow_exit:
|
||||
_wait_for_exit(please_stop)
|
||||
else:
|
||||
_wait_for_interrupt(please_stop)
|
||||
except (KeyboardInterrupt, SystemExit) as _:
|
||||
Log.alert("SIGINT Detected! Stopping...")
|
||||
finally:
|
||||
please_stop.go()
|
||||
|
||||
@staticmethod
|
||||
def current():
|
||||
|
@ -355,15 +369,26 @@ class Thread(object):
|
|||
return MAIN_THREAD
|
||||
|
||||
|
||||
def _stop_main_thread():
|
||||
def stop_main_thread(*args):
|
||||
global DEBUG
|
||||
|
||||
DEBUG = True
|
||||
try:
|
||||
if len(args):
|
||||
Log.warning("exit with {{value}}", value=_describe_exit_codes.get(args[0], args[0]))
|
||||
except Exception as _:
|
||||
pass
|
||||
finally:
|
||||
MAIN_THREAD.stop()
|
||||
except Exception as e:
|
||||
e = Except.wrap(e)
|
||||
Log.warning("Problem with threads", cause=e)
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
_describe_exit_codes = {
|
||||
_signal.SIGTERM: "SIGTERM",
|
||||
_signal.SIGINT: "SIGINT"
|
||||
}
|
||||
|
||||
_signal.signal(_signal.SIGTERM, stop_main_thread)
|
||||
_signal.signal(_signal.SIGINT, stop_main_thread)
|
||||
|
||||
|
||||
def _wait_for_exit(please_stop):
|
||||
|
@ -416,11 +441,10 @@ def _interrupt_main_safely():
|
|||
# WE COULD BE INTERRUPTING SELF
|
||||
pass
|
||||
|
||||
|
||||
MAIN_THREAD = MainThread()
|
||||
|
||||
ALL_LOCK = Lock("threads ALL_LOCK")
|
||||
ALL = dict()
|
||||
ALL[get_ident()] = MAIN_THREAD
|
||||
|
||||
MAIN_THREAD.timers = Thread.run("timers daemon", till.daemon)
|
||||
MAIN_THREAD.children.remove(MAIN_THREAD.timers)
|
||||
|
|
|
@ -15,9 +15,10 @@ from __future__ import absolute_import
|
|||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from mo_future import allocate_lock as _allocate_lock
|
||||
from time import sleep, time
|
||||
from weakref import ref
|
||||
|
||||
from mo_future import allocate_lock as _allocate_lock
|
||||
from mo_future import text_type
|
||||
|
||||
from mo_threads.signal import Signal
|
||||
|
@ -40,7 +41,7 @@ class Till(Signal):
|
|||
if not Till.enabled:
|
||||
return Till.done
|
||||
elif till == None and timeout == None and seconds == None:
|
||||
return Till.done
|
||||
return None
|
||||
else:
|
||||
return object.__new__(cls)
|
||||
|
||||
|
@ -70,7 +71,7 @@ class Till(Signal):
|
|||
with Till.locker:
|
||||
if timeout != None:
|
||||
Till.next_ping = min(Till.next_ping, timeout)
|
||||
Till.new_timers.append((timeout, self))
|
||||
Till.new_timers.append((timeout, ref(self)))
|
||||
|
||||
|
||||
Till.done.go()
|
||||
|
@ -108,13 +109,17 @@ def daemon(please_stop):
|
|||
new_timers, Till.new_timers = Till.new_timers, []
|
||||
|
||||
if DEBUG and new_timers:
|
||||
Log.note("new timers: {{timers}}", timers=[t for t, s in new_timers])
|
||||
if len(new_timers) > 5:
|
||||
Log.note("{{num}} new timers", num=len(new_timers))
|
||||
else:
|
||||
Log.note("new timers: {{timers}}", timers=[t for t, _ in new_timers])
|
||||
|
||||
sorted_timers.extend(new_timers)
|
||||
|
||||
if sorted_timers:
|
||||
sorted_timers.sort(key=lambda r: r[0])
|
||||
for i, (t, s) in enumerate(sorted_timers):
|
||||
sorted_timers.sort(key=actual_time)
|
||||
for i, rec in enumerate(sorted_timers):
|
||||
t = actual_time(rec)
|
||||
if now < t:
|
||||
work, sorted_timers = sorted_timers[:i], sorted_timers[i:]
|
||||
Till.next_ping = min(Till.next_ping, sorted_timers[0][0])
|
||||
|
@ -126,15 +131,17 @@ def daemon(please_stop):
|
|||
if DEBUG:
|
||||
Log.note(
|
||||
"done: {{timers}}. Remaining {{pending}}",
|
||||
timers=[t for t, s in work],
|
||||
pending=[t for t, s in sorted_timers]
|
||||
timers=[t for t, s in work] if len(work) <= 5 else len(work),
|
||||
pending=[t for t, s in sorted_timers] if len(sorted_timers) <= 5 else len(sorted_timers)
|
||||
)
|
||||
|
||||
for t, s in work:
|
||||
for t, r in work:
|
||||
s = r()
|
||||
if s is not None:
|
||||
s.go()
|
||||
|
||||
except Exception as e:
|
||||
Log.warning("timer shutdown", cause=e)
|
||||
Log.warning("unexpected timer shutdown", cause=e)
|
||||
finally:
|
||||
if DEBUG:
|
||||
Log.alert("TIMER SHUTDOWN")
|
||||
|
@ -145,4 +152,5 @@ def daemon(please_stop):
|
|||
for t, s in new_work + sorted_timers:
|
||||
s.go()
|
||||
|
||||
|
||||
def actual_time(rec):
|
||||
return 0 if rec[1]() is None else rec[0]
|
||||
|
|
|
@ -71,7 +71,7 @@ class Date(object):
|
|||
|
||||
def format(self, format="%Y-%m-%d %H:%M:%S"):
|
||||
try:
|
||||
return unix2datetime(self.unix).strftime(format)
|
||||
return text_type(unix2datetime(self.unix).strftime(format))
|
||||
except Exception as e:
|
||||
from mo_logs import Log
|
||||
|
||||
|
@ -160,11 +160,15 @@ class Date(object):
|
|||
return self.add(-other)
|
||||
|
||||
def __lt__(self, other):
|
||||
try:
|
||||
other = Date(other)
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
return self.unix < other.unix
|
||||
|
||||
def __eq__(self, other):
|
||||
if other == None:
|
||||
if other == None or other == '':
|
||||
return Null
|
||||
|
||||
try:
|
||||
|
|
|
@ -37,6 +37,7 @@ class Timer(object):
|
|||
self.param = wrap(coalesce(param, {}))
|
||||
self.debug = debug
|
||||
self.silent = silent
|
||||
self.agg = 0
|
||||
self.start = 0
|
||||
self.end = 0
|
||||
self.interval = None
|
||||
|
@ -51,6 +52,7 @@ class Timer(object):
|
|||
def __exit__(self, type, value, traceback):
|
||||
self.end = time()
|
||||
self.interval = self.end - self.start
|
||||
self.agg += self.interval
|
||||
|
||||
if self.debug:
|
||||
param = wrap(self.param)
|
||||
|
@ -60,7 +62,15 @@ class Timer(object):
|
|||
|
||||
@property
|
||||
def duration(self):
|
||||
end = time()
|
||||
if not self.end:
|
||||
return Duration(time() - self.start)
|
||||
return Duration(end - self.start)
|
||||
|
||||
return Duration(self.interval)
|
||||
|
||||
@property
|
||||
def total(self):
|
||||
if not self.end:
|
||||
Log.error("please ask for total time outside the context of measuring")
|
||||
|
||||
return Duration(self.agg)
|
||||
|
|
|
@ -11,17 +11,16 @@ from __future__ import absolute_import
|
|||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import StringIO
|
||||
import gzip
|
||||
import zipfile
|
||||
from tempfile import TemporaryFile
|
||||
|
||||
import boto
|
||||
from BeautifulSoup import BeautifulSoup
|
||||
from boto.s3.connection import Location
|
||||
from mo_future import text_type
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from mo_dots import wrap, Null, coalesce, unwrap, Data
|
||||
from mo_future import text_type, StringIO
|
||||
from mo_kwargs import override
|
||||
from mo_logs import Log, Except
|
||||
from mo_logs.strings import utf82unicode, unicode2utf8
|
||||
|
@ -472,7 +471,7 @@ def strip_extension(key):
|
|||
|
||||
|
||||
def _unzip(compressed):
|
||||
buff = StringIO.StringIO(compressed)
|
||||
buff = StringIO(compressed)
|
||||
archive = zipfile.ZipFile(buff, mode='r')
|
||||
return archive.read(archive.namelist()[0])
|
||||
|
||||
|
|
|
@ -46,6 +46,15 @@ def string2datetime(value, format=None):
|
|||
return unix2datetime(Date(value, format).unix)
|
||||
|
||||
|
||||
def string2boolean(value):
|
||||
if value in ["true", "T"]:
|
||||
return True
|
||||
elif value in ["false", "F"]:
|
||||
return False
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def str2datetime(value, format=None):
|
||||
return unix2datetime(Date(value, format).unix)
|
||||
|
||||
|
|
|
@ -29,7 +29,7 @@ from mo_logs.strings import utf82unicode, unicode2utf8
|
|||
from mo_math import Math
|
||||
from mo_math.randoms import Random
|
||||
from mo_threads import Lock, ThreadedQueue, Till
|
||||
from mo_times import Date, Timer
|
||||
from mo_times import Date, Timer, MINUTE
|
||||
from pyLibrary import convert
|
||||
from pyLibrary.env import http
|
||||
|
||||
|
@ -38,6 +38,8 @@ ES_NUMERIC_TYPES = ["long", "integer", "double", "float"]
|
|||
ES_PRIMITIVE_TYPES = ["string", "boolean", "integer", "date", "long", "double"]
|
||||
INDEX_DATE_FORMAT = "%Y%m%d_%H%M%S"
|
||||
|
||||
STALE_METADATA = 10 * MINUTE
|
||||
|
||||
DATA_KEY = text_type("data")
|
||||
|
||||
|
||||
|
@ -85,7 +87,7 @@ class Index(Features):
|
|||
self.cluster = cluster or Cluster(kwargs)
|
||||
|
||||
try:
|
||||
full_index = self.get_index(index)
|
||||
full_index = self.cluster.get_canonical_index(index)
|
||||
if full_index and alias==None:
|
||||
kwargs.alias = kwargs.index
|
||||
kwargs.index = full_index
|
||||
|
@ -93,41 +95,40 @@ class Index(Features):
|
|||
Log.error("not allowed")
|
||||
if type == None:
|
||||
# NO type PROVIDED, MAYBE THERE IS A SUITABLE DEFAULT?
|
||||
index_ = self.cluster.get_metadata().indices[self.settings.index]
|
||||
if not index_:
|
||||
Log.error("can not find index {{index}}", index=self.settings.index)
|
||||
|
||||
candidate_types = list(index_.mappings.keys())
|
||||
if len(candidate_types) != 1:
|
||||
Log.error("Expecting `type` parameter")
|
||||
self.settings.type = type = candidate_types[0]
|
||||
except Exception as e:
|
||||
# EXPLORING (get_metadata()) IS NOT ALLOWED ON THE PUBLIC CLUSTER
|
||||
Log.error("not expected", cause=e)
|
||||
|
||||
about = self.cluster.get_metadata().indices[self.settings.index]
|
||||
type = self.settings.type = _get_best_type_from_mapping(about.mappings)[0]
|
||||
if type == "_default_":
|
||||
Log.error("not allowed")
|
||||
if not type:
|
||||
Log.error("not allowed")
|
||||
|
||||
self.path = "/" + full_index + "/" + type
|
||||
except Exception as e:
|
||||
# EXPLORING (get_metadata()) IS NOT ALLOWED ON THE PUBLIC CLUSTER
|
||||
Log.error("not expected", cause=e)
|
||||
|
||||
if self.debug:
|
||||
Log.alert("elasticsearch debugging for {{url}} is on", url=self.url)
|
||||
|
||||
props = self.get_properties()
|
||||
if not props:
|
||||
tjson = coalesce(kwargs.tjson, True) # TYPED JSON IS DEFAULT
|
||||
elif props[EXISTS_TYPE]:
|
||||
if tjson is False:
|
||||
Log.error("expecting tjson parameter to match properties of {{index}}", index=index)
|
||||
elif tjson == None:
|
||||
tjson = kwargs.tjson = True
|
||||
else:
|
||||
if tjson is True:
|
||||
Log.error("expecting tjson parameter to match properties of {{index}}", index=index)
|
||||
elif tjson == None:
|
||||
tjson = kwargs.tjson = False
|
||||
|
||||
if not read_only:
|
||||
if tjson:
|
||||
from pyLibrary.env.typed_inserter import TypedInserter
|
||||
|
||||
self.encode = TypedInserter(self, id_column).typed_encode
|
||||
else:
|
||||
if tjson == None and not read_only:
|
||||
props = self.get_properties()
|
||||
if props[EXISTS_TYPE]:
|
||||
kwargs.tjson=True
|
||||
from pyLibrary.env.typed_inserter import TypedInserter
|
||||
self.encode = TypedInserter(self, id_column).typed_encode
|
||||
else:
|
||||
kwargs.tjson = False
|
||||
Log.warning("{{index}} is not typed tjson={{tjson}}", index=self.settings.index, tjson=self.settings.tjson)
|
||||
self.encode = get_encoder(id_column)
|
||||
else:
|
||||
self.encode = get_encoder(id_column)
|
||||
|
||||
|
@ -145,12 +146,12 @@ class Index(Features):
|
|||
self.cluster.info = None
|
||||
return self.get_properties(retry=False)
|
||||
|
||||
if not index.mappings[self.settings.type]:
|
||||
if not index.mappings[self.settings.type] and (index.mappings.keys()-{"_default_"}):
|
||||
Log.warning(
|
||||
"ElasticSearch index {{index|quote}} does not have type {{type|quote}} in {{metadata|json}}",
|
||||
index=self.settings.index,
|
||||
type=self.settings.type,
|
||||
metadata=jx.sort(metadata.indices.keys())
|
||||
metadata=jx.sort(index.mappings.keys())
|
||||
)
|
||||
return Null
|
||||
return index.mappings[self.settings.type].properties
|
||||
|
@ -195,35 +196,12 @@ class Index(Features):
|
|||
|
||||
# WAIT FOR ALIAS TO APPEAR
|
||||
while True:
|
||||
response = self.cluster.get("/_cluster/state", retry={"times": 5}, timeout=3)
|
||||
if alias in response.metadata.indices[self.settings.index].aliases:
|
||||
metadata = self.cluster.get_metadata(force=True)
|
||||
if alias in metadata.indices[self.settings.index].aliases:
|
||||
return
|
||||
Log.note("Waiting for alias {{alias}} to appear", alias=alias)
|
||||
Till(seconds=1).wait()
|
||||
|
||||
|
||||
|
||||
def get_index(self, alias):
|
||||
"""
|
||||
RETURN THE INDEX USED BY THIS alias
|
||||
"""
|
||||
alias_list = self.cluster.get_aliases()
|
||||
output = jx.sort(set([
|
||||
a.index
|
||||
for a in alias_list
|
||||
if a.alias == alias or
|
||||
a.index == alias or
|
||||
(re.match(re.escape(alias) + "\\d{8}_\\d{6}", a.index) and a.index != alias)
|
||||
]))
|
||||
|
||||
if len(output) > 1:
|
||||
Log.error("only one index with given alias==\"{{alias}}\" expected", alias= alias)
|
||||
|
||||
if not output:
|
||||
return Null
|
||||
|
||||
return output.last()
|
||||
|
||||
def is_proto(self, index):
|
||||
"""
|
||||
RETURN True IF THIS INDEX HAS NOT BEEN ASSIGNED ITS ALIAS
|
||||
|
@ -306,8 +284,6 @@ class Index(Features):
|
|||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
|
||||
def extend(self, records):
|
||||
"""
|
||||
records - MUST HAVE FORM OF
|
||||
|
@ -407,6 +383,22 @@ class Index(Features):
|
|||
Log.error("add() has changed to only accept one record, no lists")
|
||||
self.extend([record])
|
||||
|
||||
def add_property(self, name, details):
|
||||
if self.debug:
|
||||
Log.note("Adding property {{prop}} to {{index}}", prop=name, index=self.settings.index)
|
||||
for n in jx.reverse(split_field(name)):
|
||||
if n == NESTED_TYPE:
|
||||
details = {"properties": {n: set_default(details, {"type": "nested", "dynamic": True})}}
|
||||
elif n.startswith(TYPE_PREFIX):
|
||||
details = {"properties": {n: details}}
|
||||
else:
|
||||
details = {"properties": {n: set_default(details, {"type": "object", "dynamic": True})}}
|
||||
|
||||
self.cluster.put(
|
||||
"/" + self.settings.index + "/_mapping/" + self.settings.type,
|
||||
data=details
|
||||
)
|
||||
|
||||
def refresh(self):
|
||||
self.cluster.post("/" + self.settings.index + "/_refresh")
|
||||
|
||||
|
@ -436,7 +428,7 @@ class Index(Features):
|
|||
elif self.cluster.version.startswith(("1.4.", "1.5.", "1.6.", "1.7.", "5.", "6.")):
|
||||
result = self.cluster.put(
|
||||
"/" + self.settings.index + "/_settings",
|
||||
data='{"index":{"refresh_interval":' + value2json(interval) + '}}',
|
||||
data={"index": {"refresh_interval": interval}},
|
||||
**kwargs
|
||||
)
|
||||
|
||||
|
@ -532,7 +524,7 @@ class Cluster(object):
|
|||
return cluster
|
||||
|
||||
@override
|
||||
def __init__(self, host, port=9200, explore_metadata=True, kwargs=None):
|
||||
def __init__(self, host, port=9200, explore_metadata=True, debug=False, kwargs=None):
|
||||
"""
|
||||
settings.explore_metadata == True - IF PROBING THE CLUSTER FOR METADATA IS ALLOWED
|
||||
settings.timeout == NUMBER OF SECONDS TO WAIT FOR RESPONSE, OR SECONDS TO WAIT FOR DOWNLOAD (PASSED TO requests)
|
||||
|
@ -542,12 +534,13 @@ class Cluster(object):
|
|||
|
||||
self.settings = kwargs
|
||||
self.info = None
|
||||
self._metadata = None
|
||||
self._metadata = Null
|
||||
self.index_new_since = {} # MAP FROM INDEX NAME TO TIME THE INDEX METADATA HAS CHANGED
|
||||
self.metadata_locker = Lock()
|
||||
self.debug = kwargs.debug
|
||||
self.version = None
|
||||
self.last_metadata = Date.now()
|
||||
self.debug = debug
|
||||
self._version = None
|
||||
self.path = kwargs.host + ":" + text_type(kwargs.port)
|
||||
self.get_metadata()
|
||||
|
||||
@override
|
||||
def get_or_create_index(
|
||||
|
@ -560,7 +553,7 @@ class Cluster(object):
|
|||
tjson=None,
|
||||
kwargs=None
|
||||
):
|
||||
best = self._get_best(kwargs)
|
||||
best = self.get_best_matching_index(index, alias)
|
||||
if not best:
|
||||
output = self.create_index(kwargs=kwargs, schema=schema, limit_replicas=limit_replicas)
|
||||
return output
|
||||
|
@ -573,39 +566,29 @@ class Cluster(object):
|
|||
|
||||
index = kwargs.index
|
||||
meta = self.get_metadata()
|
||||
columns = parse_properties(index, ".", meta.indices[index].mappings.values()[0].properties)
|
||||
type, about = _get_best_type_from_mapping(meta.indices[index].mappings)
|
||||
|
||||
tjson = kwargs.tjson
|
||||
if len(columns) != 0:
|
||||
kwargs.tjson = tjson or any(
|
||||
if tjson == None:
|
||||
tjson = True
|
||||
columns = parse_properties(index, ".", about.properties)
|
||||
if len(columns) > 0:
|
||||
tjson = any(
|
||||
c.names["."].startswith(TYPE_PREFIX) or
|
||||
c.names["."].find("." + TYPE_PREFIX) != -1
|
||||
for c in columns
|
||||
)
|
||||
if tjson is None and not kwargs.tjson:
|
||||
Log.warning("Not typed index, columns are:\n{{columns|json}}", columns=columns)
|
||||
kwargs.tjson = tjson
|
||||
|
||||
return Index(kwargs=kwargs, cluster=self)
|
||||
|
||||
def _get_best(self, settings):
|
||||
aliases = self.get_aliases()
|
||||
indexes = jx.sort([
|
||||
a
|
||||
for a in aliases
|
||||
if (a.alias == settings.index and settings.alias == None) or
|
||||
(re.match(re.escape(settings.index) + r'\d{8}_\d{6}', a.index) and settings.alias == None) or
|
||||
(a.index == settings.index and (settings.alias == None or a.alias == None or a.alias == settings.alias))
|
||||
], "index")
|
||||
return indexes.last()
|
||||
|
||||
@override
|
||||
def get_index(self, index, type=None, alias=None, tjson=None, read_only=True, kwargs=None):
|
||||
def get_index(self, index, type, alias=None, tjson=None, read_only=True, kwargs=None):
|
||||
"""
|
||||
TESTS THAT THE INDEX EXISTS BEFORE RETURNING A HANDLE
|
||||
"""
|
||||
if read_only:
|
||||
# GET EXACT MATCH, OR ALIAS
|
||||
aliases = self.get_aliases()
|
||||
aliases = wrap(self.get_aliases())
|
||||
if index in aliases.index:
|
||||
pass
|
||||
elif index in aliases.alias:
|
||||
|
@ -617,7 +600,7 @@ class Cluster(object):
|
|||
return Index(kwargs=kwargs, cluster=self)
|
||||
else:
|
||||
# GET BEST MATCH, INCLUDING PROTOTYPE
|
||||
best = self._get_best(kwargs)
|
||||
best = self.get_best_matching_index(index, alias)
|
||||
if not best:
|
||||
Log.error("Can not find index {{index_name}}", index_name=kwargs.index)
|
||||
|
||||
|
@ -643,6 +626,42 @@ class Cluster(object):
|
|||
return Index(read_only=True, kwargs=settings, cluster=self)
|
||||
Log.error("Can not find any index with alias {{alias_name}}", alias_name= alias)
|
||||
|
||||
def get_canonical_index(self, alias):
|
||||
"""
|
||||
RETURN THE INDEX USED BY THIS alias
|
||||
THIS IS ACCORDING TO THE STRICT LIFECYCLE RULES:
|
||||
THERE IS ONLY ONE INDEX WITH AN ALIAS
|
||||
"""
|
||||
output = jx.sort(set(
|
||||
i
|
||||
for ai in self.get_aliases()
|
||||
for a, i in [(ai.alias, ai.index)]
|
||||
if a == alias or i == alias or (re.match(re.escape(alias) + "\\d{8}_\\d{6}", i) and i != alias)
|
||||
))
|
||||
|
||||
if len(output) > 1:
|
||||
Log.error("only one index with given alias==\"{{alias}}\" expected", alias=alias)
|
||||
|
||||
if not output:
|
||||
return Null
|
||||
|
||||
return output.last()
|
||||
|
||||
def get_best_matching_index(self, index, alias=None):
|
||||
indexes = jx.sort(
|
||||
[
|
||||
ai_pair
|
||||
for pattern in [re.escape(index) + r'\d{8}_\d{6}']
|
||||
for ai_pair in self.get_aliases()
|
||||
for a, i in [(ai_pair.alias, ai_pair.index)]
|
||||
if (a == index and alias == None) or
|
||||
(re.match(pattern, i) and alias == None) or
|
||||
(i == index and (alias == None or a == None or a == alias))
|
||||
],
|
||||
"index"
|
||||
)
|
||||
return indexes.last()
|
||||
|
||||
def get_prototype(self, alias):
|
||||
"""
|
||||
RETURN ALL INDEXES THAT ARE INTENDED TO BE GIVEN alias, BUT HAVE NO
|
||||
|
@ -698,11 +717,13 @@ class Cluster(object):
|
|||
Log.error("Expecting a JSON schema")
|
||||
|
||||
for k, m in list(schema.mappings.items()):
|
||||
m.date_detection = False # DISABLE DATE DETECTION
|
||||
|
||||
if tjson:
|
||||
schema.mappings[k] = add_typed_annotations(m)
|
||||
m = schema.mappings[k] = wrap(add_typed_annotations(m))
|
||||
m = wrap(schema.mappings[k])
|
||||
|
||||
schema.mappings[k].date_detection = False # DISABLE DATE DETECTION
|
||||
m.date_detection = False # DISABLE DATE DETECTION
|
||||
m.dynamic_templates = (
|
||||
DEFAULT_DYNAMIC_TEMPLATES +
|
||||
m.dynamic_templates
|
||||
|
@ -737,11 +758,10 @@ class Cluster(object):
|
|||
)
|
||||
|
||||
# CONFIRM INDEX EXISTS
|
||||
while True:
|
||||
while not Till(seconds=30):
|
||||
try:
|
||||
state = self.get("/_cluster/state", retry={"times": 5}, timeout=3, stream=False)
|
||||
if index in state.metadata.indices:
|
||||
self._metadata = None
|
||||
metadata = self.get_metadata(force=True)
|
||||
if index in metadata.indices:
|
||||
break
|
||||
Log.note("Waiting for index {{index}} to appear", index=index)
|
||||
except Exception as e:
|
||||
|
@ -784,36 +804,49 @@ class Cluster(object):
|
|||
RETURN LIST OF {"alias":a, "index":i} PAIRS
|
||||
ALL INDEXES INCLUDED, EVEN IF NO ALIAS {"alias":Null}
|
||||
"""
|
||||
data = self.get("/_aliases", retry={"times": 5}, timeout=3, stream=False)
|
||||
output = []
|
||||
for index, desc in data.items():
|
||||
for index, desc in self.get_metadata().indices.items():
|
||||
if not desc["aliases"]:
|
||||
output.append({"index": index, "alias": None})
|
||||
yield wrap({"index": index})
|
||||
elif desc['aliases'][0] == index:
|
||||
Log.error("should not happen")
|
||||
else:
|
||||
for a in desc["aliases"]:
|
||||
output.append({"index": index, "alias": a})
|
||||
return wrap(output)
|
||||
yield wrap({"index": index, "alias": a})
|
||||
|
||||
def get_metadata(self, force=False):
|
||||
if not self.settings.explore_metadata:
|
||||
Log.error("Metadata exploration has been disabled")
|
||||
if not force and self._metadata and Date.now() < self.last_metadata + STALE_METADATA:
|
||||
return self._metadata
|
||||
|
||||
if not self._metadata or force:
|
||||
old_indices = self._metadata.indices
|
||||
response = self.get("/_cluster/state", retry={"times": 3}, timeout=30, stream=False)
|
||||
now = self.last_metadata = Date.now()
|
||||
with self.metadata_locker:
|
||||
self._metadata = wrap(response.metadata)
|
||||
# REPLICATE MAPPING OVER ALL ALIASES
|
||||
indices = self._metadata.indices
|
||||
for i, m in jx.sort(indices.items(), {"value": {"offset": 0}, "sort": -1}):
|
||||
m.index = i
|
||||
for a in m.aliases:
|
||||
if not indices[a]:
|
||||
indices[a] = m
|
||||
for new_index_name, new_meta in self._metadata.indices.items():
|
||||
old_index = old_indices[new_index_name]
|
||||
if not old_index:
|
||||
self.index_new_since[new_index_name] = now
|
||||
else:
|
||||
for type_name, new_about in new_meta.mappings.items():
|
||||
old_about = old_index.mappings[type_name]
|
||||
diff = diff_schema(new_about.properties, old_about.properties)
|
||||
if diff:
|
||||
self.index_new_since[new_index_name] = now
|
||||
for old_index_name, old_meta in old_indices.items():
|
||||
new_index = self._metadata.indices[old_index_name]
|
||||
if not new_index:
|
||||
self.index_new_since[old_index_name] = now
|
||||
self.info = wrap(self.get("/", stream=False))
|
||||
self.version = self.info.version.number
|
||||
self._version = self.info.version.number
|
||||
return self._metadata
|
||||
|
||||
return self._metadata
|
||||
@property
|
||||
def version(self):
|
||||
if self._version is None:
|
||||
self.get_metadata()
|
||||
return self._version
|
||||
|
||||
def post(self, path, **kwargs):
|
||||
url = self.settings.host + ":" + text_type(self.settings.port) + path
|
||||
|
@ -841,7 +874,7 @@ class Cluster(object):
|
|||
Log.note("POST {{url}}", url=url)
|
||||
response = http.post(url, **kwargs)
|
||||
if response.status_code not in [200, 201]:
|
||||
Log.error(response.reason.decode("latin1") + ": " + strings.limit(response.content.decode("latin1"), 100 if self.debug else 10000))
|
||||
Log.error(text_type(response.reason) + ": " + strings.limit(response.content.decode("latin1"), 100 if self.debug else 10000))
|
||||
if self.debug:
|
||||
Log.note("response: {{response}}", response=utf82unicode(response.content)[:130])
|
||||
details = json2value(utf82unicode(response.content))
|
||||
|
@ -1058,16 +1091,7 @@ class Alias(Features):
|
|||
mappings = self.cluster.get("/"+self.settings.index+"/_mapping")[self.settings.index]
|
||||
|
||||
# FIND MAPPING WITH MOST PROPERTIES (AND ASSUME THAT IS THE CANONICAL TYPE)
|
||||
max_prop = -1
|
||||
for _type, mapping in mappings.mappings.items():
|
||||
if _type == "_default_":
|
||||
continue
|
||||
num_prop = len(mapping.properties.keys())
|
||||
if max_prop < num_prop:
|
||||
max_prop = num_prop
|
||||
self.settings.type = _type
|
||||
type = _type
|
||||
|
||||
type, props = _get_best_type_from_mapping(mappings.mappings)
|
||||
if type == None:
|
||||
Log.error("Can not find schema type for index {{index}}", index=coalesce(self.settings.alias, self.settings.index))
|
||||
|
||||
|
@ -1077,7 +1101,7 @@ class Alias(Features):
|
|||
def url(self):
|
||||
return self.cluster.path.rstrip("/") + "/" + self.path.lstrip("/")
|
||||
|
||||
def get_schema(self, retry=True):
|
||||
def get_snowflake(self, retry=True):
|
||||
if self.settings.explore_metadata:
|
||||
indices = self.cluster.get_metadata().indices
|
||||
if not self.settings.alias or self.settings.alias==self.settings.index:
|
||||
|
@ -1186,6 +1210,7 @@ class Alias(Features):
|
|||
cause=e
|
||||
)
|
||||
|
||||
|
||||
def parse_properties(parent_index_name, parent_name, esProperties):
|
||||
"""
|
||||
RETURN THE COLUMN DEFINITIONS IN THE GIVEN esProperties OBJECT
|
||||
|
@ -1195,8 +1220,6 @@ def parse_properties(parent_index_name, parent_name, esProperties):
|
|||
index_name = parent_index_name
|
||||
column_name = concat_field(parent_name, name)
|
||||
jx_name = column_name
|
||||
if split_field(column_name)[-1] == EXISTS_TYPE:
|
||||
property.type = "exists"
|
||||
|
||||
if property.type == "nested" and property.properties:
|
||||
# NESTED TYPE IS A NEW TYPE DEFINITION
|
||||
|
@ -1209,7 +1232,7 @@ def parse_properties(parent_index_name, parent_name, esProperties):
|
|||
es_index=index_name,
|
||||
es_column=column_name,
|
||||
names={".": jx_name},
|
||||
type="nested",
|
||||
es_type="nested",
|
||||
nested_path=ROOT_PATH
|
||||
))
|
||||
|
||||
|
@ -1223,7 +1246,7 @@ def parse_properties(parent_index_name, parent_name, esProperties):
|
|||
es_index=index_name,
|
||||
es_column=column_name,
|
||||
nested_path=ROOT_PATH,
|
||||
type="source" if property.enabled == False else "object"
|
||||
es_type="source" if property.enabled == False else "object"
|
||||
))
|
||||
|
||||
if property.dynamic:
|
||||
|
@ -1240,7 +1263,7 @@ def parse_properties(parent_index_name, parent_name, esProperties):
|
|||
es_column=column_name,
|
||||
names={".": jx_name},
|
||||
nested_path=ROOT_PATH,
|
||||
type=property.type
|
||||
es_type=property.type
|
||||
))
|
||||
if property.index_name and name != property.index_name:
|
||||
columns.append(Column(
|
||||
|
@ -1248,7 +1271,7 @@ def parse_properties(parent_index_name, parent_name, esProperties):
|
|||
es_column=column_name,
|
||||
names={".": jx_name},
|
||||
nested_path=ROOT_PATH,
|
||||
type=property.type
|
||||
es_type=property.type
|
||||
))
|
||||
elif property.enabled == None or property.enabled == False:
|
||||
columns.append(Column(
|
||||
|
@ -1256,7 +1279,7 @@ def parse_properties(parent_index_name, parent_name, esProperties):
|
|||
es_column=column_name,
|
||||
names={".": jx_name},
|
||||
nested_path=ROOT_PATH,
|
||||
type="source" if property.enabled == False else "object"
|
||||
es_type="source" if property.enabled == False else "object"
|
||||
))
|
||||
else:
|
||||
Log.warning("unknown type {{type}} for property {{path}}", type=property.type, path=query_path)
|
||||
|
@ -1264,6 +1287,25 @@ def parse_properties(parent_index_name, parent_name, esProperties):
|
|||
return columns
|
||||
|
||||
|
||||
def _get_best_type_from_mapping(mapping):
|
||||
"""
|
||||
THERE ARE MULTIPLE TYPES IN AN INDEX, PICK THE BEST
|
||||
:param mapping: THE ES MAPPING DOCUMENT
|
||||
:return: (type_name, mapping) PAIR (mapping.properties WILL HAVE PROPERTIES
|
||||
"""
|
||||
best_type_name = None
|
||||
best_mapping = None
|
||||
for k, m in mapping.items():
|
||||
if k == "_default_":
|
||||
continue
|
||||
if best_type_name is None or len(m.properties) > len(best_mapping.properties):
|
||||
best_type_name = k
|
||||
best_mapping = m
|
||||
if best_type_name == None:
|
||||
return "_default_", mapping["_default_"]
|
||||
return best_type_name, best_mapping
|
||||
|
||||
|
||||
def get_encoder(id_expression="_id"):
|
||||
get_id = jx_expression_to_function(id_expression)
|
||||
|
||||
|
@ -1416,6 +1458,32 @@ def add_typed_annotations(meta):
|
|||
return output
|
||||
|
||||
|
||||
def diff_schema(A, B):
|
||||
"""
|
||||
RETURN PROPERTIES IN A, BUT NOT IN B
|
||||
:param A: elasticsearch properties
|
||||
:param B: elasticsearch properties
|
||||
:return: (name, properties) PAIRS WHERE name IS DOT-DELIMITED PATH
|
||||
"""
|
||||
output =[]
|
||||
def _diff_schema(path, A, B):
|
||||
for k, av in A.items():
|
||||
bv = B[k]
|
||||
if bv == None:
|
||||
output.append((concat_field(path, k), av))
|
||||
elif av.type == bv.type:
|
||||
pass # OK
|
||||
elif (av.type == None and bv.type == 'object') or (av.type == 'object' and bv.type == None):
|
||||
pass # OK
|
||||
else:
|
||||
Log.warning("inconsistent types: {{typeA}} vs {{typeB}}", typeA=av.type, typeB=bv.type)
|
||||
_diff_schema(concat_field(path, k), av.properties, bv.properties)
|
||||
|
||||
# what to do with conflicts?
|
||||
_diff_schema(".", A, B)
|
||||
return output
|
||||
|
||||
|
||||
DEFAULT_DYNAMIC_TEMPLATES = wrap([
|
||||
{
|
||||
"default_typed_boolean": {
|
||||
|
@ -1446,6 +1514,12 @@ DEFAULT_DYNAMIC_TEMPLATES = wrap([
|
|||
"mapping": {"type": "nested", "store": True},
|
||||
"match": NESTED_TYPE
|
||||
}
|
||||
},
|
||||
{
|
||||
"default_string": {
|
||||
"mapping": {"type": "keyword", "store": True},
|
||||
"match_mapping_type": "string"
|
||||
}
|
||||
}
|
||||
])
|
||||
|
||||
|
@ -1547,4 +1621,3 @@ _merge_type = {
|
|||
"nested": "nested"
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -11,6 +11,7 @@ from __future__ import division
|
|||
from __future__ import unicode_literals
|
||||
|
||||
import flask
|
||||
from flask import Response
|
||||
from mo_dots import coalesce
|
||||
|
||||
from mo_future import binary_type
|
||||
|
@ -28,10 +29,8 @@ def gzip_wrapper(func, compress_lower_limit=None):
|
|||
if 'gzip' not in accept_encoding.lower():
|
||||
return response
|
||||
|
||||
resp = response.data
|
||||
if isinstance(resp, binary_type) and len(resp) > compress_lower_limit:
|
||||
response.headers['Content-Encoding'] = 'gzip'
|
||||
response.set_data(b''.join(ibytes2icompressed([resp])))
|
||||
response.response = ibytes2icompressed(response.response)
|
||||
|
||||
return response
|
||||
|
||||
|
|
|
@ -31,7 +31,7 @@ from jx_python import jx
|
|||
from mo_dots import Data, coalesce, wrap, set_default, unwrap, Null
|
||||
from mo_future import text_type, PY2
|
||||
from mo_json import value2json, json2value
|
||||
from mo_logs import Log
|
||||
from mo_logs import Log, strings
|
||||
from mo_logs.strings import utf82unicode, unicode2utf8
|
||||
from mo_logs.exceptions import Except
|
||||
from mo_math import Math
|
||||
|
@ -157,7 +157,7 @@ def request(method, url, zip=None, retry=None, **kwargs):
|
|||
|
||||
try:
|
||||
if DEBUG:
|
||||
Log.note(u"http {{method}} to {{url}}", method=method, url=url)
|
||||
Log.note(u"http {{method|upper}} to {{url}}", method=method, url=text_type(url))
|
||||
request_count += 1
|
||||
|
||||
del kwargs['retry']
|
||||
|
@ -221,11 +221,6 @@ def post(url, **kwargs):
|
|||
return HttpResponse(request('post', url, **kwargs))
|
||||
|
||||
|
||||
def delete(url, **kwargs):
|
||||
kwargs.setdefault('stream', False)
|
||||
return HttpResponse(request('delete', url, **kwargs))
|
||||
|
||||
|
||||
def post_json(url, **kwargs):
|
||||
"""
|
||||
ASSUME RESPONSE IN IN JSON
|
||||
|
@ -238,15 +233,10 @@ def post_json(url, **kwargs):
|
|||
Log.error(u"Expecting `json` parameter")
|
||||
|
||||
response = post(url, **kwargs)
|
||||
c = response.content
|
||||
try:
|
||||
details = json2value(utf82unicode(c))
|
||||
except Exception as e:
|
||||
Log.error(u"Unexpected return value {{content}}", content=c, cause=e)
|
||||
|
||||
details = json2value(utf82unicode(response.content))
|
||||
if response.status_code not in [200, 201]:
|
||||
Log.error(u"Bad response", cause=Except.wrap(details))
|
||||
|
||||
Log.error(u"Bad response code {{code}}", code=response.status_code, cause=Except.wrap(details))
|
||||
else:
|
||||
return details
|
||||
|
||||
|
||||
|
|
|
@ -21,7 +21,7 @@ from jx_base import python_type_to_json_type, INTEGER, NUMBER, EXISTS, NESTED, S
|
|||
from jx_python.expressions import jx_expression_to_function
|
||||
from jx_python.meta import Column
|
||||
from mo_dots import Data, FlatList, NullType, unwrap
|
||||
from mo_future import text_type, binary_type, utf8_json_encoder, long
|
||||
from mo_future import text_type, binary_type, utf8_json_encoder, long, sort_using_key
|
||||
from mo_json import ESCAPE_DCT, float2json, json2value
|
||||
from mo_json.encoder import problem_serializing, UnicodeBuilder, COMMA, COLON
|
||||
from mo_json.typed_encoder import encode_property, BOOLEAN_TYPE, NESTED_TYPE, EXISTS_TYPE, STRING_TYPE, NUMBER_TYPE
|
||||
|
@ -60,7 +60,7 @@ class TypedInserter(object):
|
|||
if es:
|
||||
_schema = Data()
|
||||
for c in parse_properties(es.settings.alias, ".", es.get_properties()):
|
||||
if c.type not in (OBJECT, NESTED):
|
||||
if c.es_type not in (OBJECT, NESTED):
|
||||
_schema[c.names["."]] = c
|
||||
self.schema = unwrap(_schema)
|
||||
else:
|
||||
|
@ -127,7 +127,7 @@ class TypedInserter(object):
|
|||
try:
|
||||
if isinstance(sub_schema, Column):
|
||||
value_json_type = python_type_to_json_type[value.__class__]
|
||||
column_json_type = es_type_to_json_type[sub_schema.type]
|
||||
column_json_type = es_type_to_json_type[sub_schema.es_type]
|
||||
|
||||
if value_json_type == column_json_type:
|
||||
pass # ok
|
||||
|
@ -283,9 +283,6 @@ class TypedInserter(object):
|
|||
append(_buffer, '}')
|
||||
elif _type is NullType:
|
||||
append(_buffer, 'null')
|
||||
elif hasattr(value, '__json__'):
|
||||
from mo_logs import Log
|
||||
Log.error("do not know how to handle")
|
||||
elif hasattr(value, '__data__'):
|
||||
self._typed_encode(value.__data__(), sub_schema, path, net_new_properties, _buffer)
|
||||
elif hasattr(value, '__iter__'):
|
||||
|
@ -338,11 +335,11 @@ class TypedInserter(object):
|
|||
sep = COMMA
|
||||
self._typed_encode(v, sub_schema, path, net_new_properties, _buffer)
|
||||
count += 1
|
||||
append(_buffer, ']'+COMMA+QUOTED_EXISTS_TYPE+COLON+ + text_type(count))
|
||||
append(_buffer, ']' + COMMA + QUOTED_EXISTS_TYPE + COLON + text_type(count))
|
||||
|
||||
def _dict2json(self, value, sub_schema, path, net_new_properties, _buffer):
|
||||
prefix = '{'
|
||||
for k, v in ((kk, value[kk]) for kk in sorted(value.keys())):
|
||||
for k, v in sort_using_key(value.items(), lambda r: r[0]):
|
||||
if v == None or v == '':
|
||||
continue
|
||||
append(_buffer, prefix)
|
||||
|
|
|
@ -1,458 +0,0 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from collections import Mapping
|
||||
|
||||
import mo_json
|
||||
from jx_base.expressions import jx_expression
|
||||
from mo_collections.matrix import Matrix
|
||||
from mo_dots import coalesce
|
||||
from mo_dots import wrap, listwrap, unwrap
|
||||
from mo_dots.lists import FlatList
|
||||
from mo_future import text_type
|
||||
from mo_kwargs import override
|
||||
from mo_logs import Log
|
||||
from mo_logs.exceptions import suppress_exception
|
||||
from mo_logs.strings import indent, expand_template
|
||||
from pyLibrary import convert
|
||||
from pyLibrary.sql import SQL, SQL_IS_NULL, SQL_AND, SQL_IS_NOT_NULL, SQL_ORDERBY, SQL_LIMIT, sql_iso, sql_list, SQL_TRUE, sql_alias, SQL_OR, SQL_WHERE, SQL_NOT
|
||||
from pyLibrary.sql.mysql import int_list_packer
|
||||
|
||||
|
||||
class MySQL(object):
|
||||
"""
|
||||
jx to MySQL DATABASE QUERIES
|
||||
"""
|
||||
|
||||
@override
|
||||
def __init__(
|
||||
self,
|
||||
host,
|
||||
port,
|
||||
username,
|
||||
password,
|
||||
debug=False,
|
||||
schema=None,
|
||||
preamble=None,
|
||||
readonly=False,
|
||||
kwargs=None
|
||||
):
|
||||
from pyLibrary.sql.mysql import MySQL
|
||||
|
||||
self.settings = kwargs
|
||||
self._db = MySQL(kwargs)
|
||||
|
||||
def __data__(self):
|
||||
settings = self.settings.copy()
|
||||
settings.settings = None
|
||||
return unwrap(settings)
|
||||
|
||||
def query(self, query, stacked=False):
|
||||
"""
|
||||
TRANSLATE JSON QUERY EXPRESSION ON SINGLE TABLE TO SQL QUERY
|
||||
"""
|
||||
from jx_base.query import QueryOp
|
||||
|
||||
query = QueryOp.wrap(query)
|
||||
|
||||
sql, post = self._subquery(query, isolate=False, stacked=stacked)
|
||||
query.data = post(sql)
|
||||
return query.data
|
||||
|
||||
def update(self, query):
|
||||
self.db.execute("""
|
||||
UPDATE {{table_name}}
|
||||
SET {{assignment}}
|
||||
{{where}}
|
||||
""", {
|
||||
"table_name": query["from"],
|
||||
"assignment": ",".join(self.db.quote_column(k) + "=" + self.db.quote_value(v) for k, v in query.set),
|
||||
"where": self._where2sql(query.where)
|
||||
})
|
||||
|
||||
def _subquery(self, query, isolate=True, stacked=False):
|
||||
if isinstance(query, text_type):
|
||||
return self.db.quote_column(query), None
|
||||
if query.name: # IT WOULD BE SAFER TO WRAP TABLE REFERENCES IN A TYPED OBJECT (Cube, MAYBE?)
|
||||
return self.db.quote_column(query.name), None
|
||||
|
||||
if query.edges:
|
||||
# RETURN A CUBE
|
||||
sql, post = self._grouped(query, stacked)
|
||||
else:
|
||||
select = listwrap(query.select)
|
||||
if select[0].aggregate != "none":
|
||||
sql, post = self._aggop(query)
|
||||
else:
|
||||
sql, post = self._setop(query)
|
||||
|
||||
if isolate:
|
||||
return "(\n" + sql + "\n) a\n", post
|
||||
else:
|
||||
return sql, post
|
||||
|
||||
def _grouped(self, query, stacked=False):
|
||||
select = listwrap(query.select)
|
||||
|
||||
# RETURN SINGLE OBJECT WITH AGGREGATES
|
||||
for s in select:
|
||||
if s.aggregate not in aggregates:
|
||||
Log.error("Expecting all columns to have an aggregate: {{select}}", select=s)
|
||||
|
||||
selects = FlatList()
|
||||
groups = FlatList()
|
||||
edges = query.edges
|
||||
for e in edges:
|
||||
if e.domain.type != "default":
|
||||
Log.error("domain of type {{type}} not supported, yet", type=e.domain.type)
|
||||
groups.append(e.value)
|
||||
selects.append(sql_alias(e.value, self.db.quote_column(e.name)))
|
||||
|
||||
for s in select:
|
||||
selects.append(sql_alias(aggregates[s.aggregate].replace("{{code}}", s.value), self.db.quote_column(s.name)))
|
||||
|
||||
sql = expand_template("""
|
||||
SELECT
|
||||
{{selects}}
|
||||
FROM
|
||||
{{table}}
|
||||
{{where}}
|
||||
GROUP BY
|
||||
{{groups}}
|
||||
""", {
|
||||
"selects": SQL(",\n".join(selects)),
|
||||
"groups": SQL(",\n".join(groups)),
|
||||
"table": self._subquery(query["from"])[0],
|
||||
"where": self._where2sql(query.where)
|
||||
})
|
||||
|
||||
def post_stacked(sql):
|
||||
# RETURN IN THE USUAL DATABASE RESULT SET FORMAT
|
||||
return self.db.query(sql)
|
||||
|
||||
def post(sql):
|
||||
# FIND OUT THE default DOMAIN SIZES
|
||||
result = self.db.column_query(sql)
|
||||
num_edges = len(edges)
|
||||
for e, edge in enumerate(edges):
|
||||
domain = edge.domain
|
||||
if domain.type == "default":
|
||||
domain.type = "set"
|
||||
parts = set(result[e])
|
||||
domain.partitions = [{"index": i, "value": p} for i, p in enumerate(parts)]
|
||||
domain.map = {p: i for i, p in enumerate(parts)}
|
||||
else:
|
||||
Log.error("Do not know what to do here, yet")
|
||||
|
||||
# FILL THE DATA CUBE
|
||||
maps = [(unwrap(e.domain.map), result[i]) for i, e in enumerate(edges)]
|
||||
cubes = FlatList()
|
||||
for c, s in enumerate(select):
|
||||
data = Matrix(*[len(e.domain.partitions) + (1 if e.allow_nulls else 0) for e in edges])
|
||||
for rownum, value in enumerate(result[c + num_edges]):
|
||||
coord = [m[r[rownum]] for m, r in maps]
|
||||
data[coord] = value
|
||||
cubes.append(data)
|
||||
|
||||
if isinstance(query.select, list):
|
||||
return cubes
|
||||
else:
|
||||
return cubes[0]
|
||||
|
||||
return sql, post if not stacked else post_stacked
|
||||
|
||||
def _aggop(self, query):
|
||||
"""
|
||||
SINGLE ROW RETURNED WITH AGGREGATES
|
||||
"""
|
||||
if isinstance(query.select, list):
|
||||
# RETURN SINGLE OBJECT WITH AGGREGATES
|
||||
for s in query.select:
|
||||
if s.aggregate not in aggregates:
|
||||
Log.error("Expecting all columns to have an aggregate: {{select}}", select=s)
|
||||
|
||||
selects = FlatList()
|
||||
for s in query.select:
|
||||
selects.append(sql_alias(aggregates[s.aggregate].replace("{{code}}", s.value),self.db.quote_column(s.name)))
|
||||
|
||||
sql = expand_template("""
|
||||
SELECT
|
||||
{{selects}}
|
||||
FROM
|
||||
{{table}}
|
||||
{{where}}
|
||||
""", {
|
||||
"selects": SQL(",\n".join(selects)),
|
||||
"table": self._subquery(query["from"])[0],
|
||||
"where": self._where2sql(query.filter)
|
||||
})
|
||||
|
||||
return sql, lambda sql: self.db.column(sql)[0] # RETURNING SINGLE OBJECT WITH AGGREGATE VALUES
|
||||
else:
|
||||
# RETURN SINGLE VALUE
|
||||
s0 = query.select
|
||||
if s0.aggregate not in aggregates:
|
||||
Log.error("Expecting all columns to have an aggregate: {{select}}", select=s0)
|
||||
|
||||
select = sql_alias(aggregates[s0.aggregate].replace("{{code}}", s0.value) , self.db.quote_column(s0.name))
|
||||
|
||||
sql = expand_template("""
|
||||
SELECT
|
||||
{{selects}}
|
||||
FROM
|
||||
{{table}}
|
||||
{{where}}
|
||||
""", {
|
||||
"selects": SQL(select),
|
||||
"table": self._subquery(query["from"])[0],
|
||||
"where": self._where2sql(query.where)
|
||||
})
|
||||
|
||||
def post(sql):
|
||||
result = self.db.column_query(sql)
|
||||
return result[0][0]
|
||||
|
||||
return sql, post # RETURN SINGLE VALUE
|
||||
|
||||
def _setop(self, query):
|
||||
"""
|
||||
NO AGGREGATION, SIMPLE LIST COMPREHENSION
|
||||
"""
|
||||
if isinstance(query.select, list):
|
||||
# RETURN BORING RESULT SET
|
||||
selects = FlatList()
|
||||
for s in listwrap(query.select):
|
||||
if isinstance(s.value, Mapping):
|
||||
for k, v in s.value.items:
|
||||
selects.append(sql_alias(v, self.db.quote_column(s.name + "." + k)))
|
||||
if isinstance(s.value, list):
|
||||
for i, ss in enumerate(s.value):
|
||||
selects.append(sql_alias(s.value, self.db.quote_column(s.name + "," + str(i))))
|
||||
else:
|
||||
selects.append(sql_alias(s.value, self.db.quote_column(s.name)))
|
||||
|
||||
sql = expand_template("""
|
||||
SELECT
|
||||
{{selects}}
|
||||
FROM
|
||||
{{table}}
|
||||
{{where}}
|
||||
{{sort}}
|
||||
{{limit}}
|
||||
""", {
|
||||
"selects": SQL(",\n".join(selects)),
|
||||
"table": self._subquery(query["from"])[0],
|
||||
"where": self._where2sql(query.where),
|
||||
"limit": self._limit2sql(query.limit),
|
||||
"sort": self._sort2sql(query.sort)
|
||||
})
|
||||
|
||||
def post_process(sql):
|
||||
result = self.db.query(sql)
|
||||
for s in listwrap(query.select):
|
||||
if isinstance(s.value, Mapping):
|
||||
for r in result:
|
||||
r[s.name] = {}
|
||||
for k, v in s.value:
|
||||
r[s.name][k] = r[s.name + "." + k]
|
||||
r[s.name + "." + k] = None
|
||||
|
||||
if isinstance(s.value, list):
|
||||
# REWRITE AS TUPLE
|
||||
for r in result:
|
||||
r[s.name] = tuple(r[s.name + "," + str(i)] for i, ss in enumerate(s.value))
|
||||
for i, ss in enumerate(s.value):
|
||||
r[s.name + "," + str(i)] = None
|
||||
|
||||
expand_json(result)
|
||||
return result
|
||||
|
||||
return sql, post_process # RETURN BORING RESULT SET
|
||||
else:
|
||||
# RETURN LIST OF VALUES
|
||||
if query.select.value == ".":
|
||||
select = "*"
|
||||
else:
|
||||
name = query.select.name
|
||||
select = sql_alias(query.select.value, self.db.quote_column(name))
|
||||
|
||||
sql = expand_template("""
|
||||
SELECT
|
||||
{{selects}}
|
||||
FROM
|
||||
{{table}}
|
||||
{{where}}
|
||||
{{sort}}
|
||||
{{limit}}
|
||||
""", {
|
||||
"selects": SQL(select),
|
||||
"table": self._subquery(query["from"])[0],
|
||||
"where": self._where2sql(query.where),
|
||||
"limit": self._limit2sql(query.limit),
|
||||
"sort": self._sort2sql(query.sort)
|
||||
})
|
||||
|
||||
if query.select.value == ".":
|
||||
def post(sql):
|
||||
result = self.db.query(sql)
|
||||
expand_json(result)
|
||||
return result
|
||||
|
||||
return sql, post
|
||||
else:
|
||||
return sql, lambda sql: [r[name] for r in self.db.query(sql)] # RETURNING LIST OF VALUES
|
||||
|
||||
def _sort2sql(self, sort):
|
||||
"""
|
||||
RETURN ORDER BY CLAUSE
|
||||
"""
|
||||
if not sort:
|
||||
return ""
|
||||
return SQL_ORDERBY + sql_list([self.db.quote_column(o.field) + (" DESC" if o.sort == -1 else "") for o in sort])
|
||||
|
||||
def _limit2sql(self, limit):
|
||||
return SQL("" if not limit else SQL_LIMIT + str(limit))
|
||||
|
||||
def _where2sql(self, where):
|
||||
if where == None:
|
||||
return ""
|
||||
return SQL_WHERE + _esfilter2sqlwhere(self.db, where)
|
||||
|
||||
|
||||
def esfilter2sqlwhere(db, esfilter):
|
||||
return _esfilter2sqlwhere(db, esfilter)
|
||||
|
||||
|
||||
def _esfilter2sqlwhere(db, esfilter):
|
||||
"""
|
||||
CONVERT ElassticSearch FILTER TO SQL FILTER
|
||||
db - REQUIRED TO PROPERLY QUOTE VALUES AND COLUMN NAMES
|
||||
"""
|
||||
esfilter = wrap(esfilter)
|
||||
|
||||
if esfilter is True:
|
||||
return SQL_TRUE
|
||||
elif esfilter["and"]:
|
||||
return sql_iso(SQL_AND.join([esfilter2sqlwhere(db, a) for a in esfilter["and"]]))
|
||||
elif esfilter["or"]:
|
||||
return sql_iso(SQL_OR.join([esfilter2sqlwhere(db, a) for a in esfilter["or"]]))
|
||||
elif esfilter["not"]:
|
||||
return SQL_NOT + sql_iso(esfilter2sqlwhere(db, esfilter["not"]))
|
||||
elif esfilter.term:
|
||||
return sql_iso(SQL_AND.join([
|
||||
db.quote_column(col) + SQL("=") + db.quote_value(val)
|
||||
for col, val in esfilter.term.items()
|
||||
]))
|
||||
elif esfilter.terms:
|
||||
for col, v in esfilter.terms.items():
|
||||
if len(v) == 0:
|
||||
return "FALSE"
|
||||
|
||||
with suppress_exception:
|
||||
int_list = convert.value2intlist(v)
|
||||
has_null = False
|
||||
for vv in v:
|
||||
if vv == None:
|
||||
has_null = True
|
||||
break
|
||||
if int_list:
|
||||
filter = int_list_packer(col, int_list)
|
||||
if has_null:
|
||||
return esfilter2sqlwhere(db, {"or": [{"missing": col}, filter]})
|
||||
else:
|
||||
return esfilter2sqlwhere(db, filter)
|
||||
else:
|
||||
if has_null:
|
||||
return esfilter2sqlwhere(db, {"missing": col})
|
||||
else:
|
||||
return "false"
|
||||
return db.quote_column(col) + " in " + sql_iso(sql_list([db.quote_value(val) for val in v]))
|
||||
elif esfilter.script:
|
||||
return sql_iso(esfilter.script)
|
||||
elif esfilter.range:
|
||||
name2sign = {
|
||||
"gt": SQL(">"),
|
||||
"gte": SQL(">="),
|
||||
"lte": SQL("<="),
|
||||
"lt": SQL("<")
|
||||
}
|
||||
|
||||
def single(col, r):
|
||||
min = coalesce(r["gte"], r[">="])
|
||||
max = coalesce(r["lte"], r["<="])
|
||||
if min != None and max != None:
|
||||
# SPECIAL CASE (BETWEEN)
|
||||
sql = db.quote_column(col) + SQL(" BETWEEN ") + db.quote_value(min) + SQL_AND + db.quote_value(max)
|
||||
else:
|
||||
sql = SQL_AND.join(
|
||||
db.quote_column(col) + name2sign[sign] + db.quote_value(value)
|
||||
for sign, value in r.items()
|
||||
)
|
||||
return sql
|
||||
|
||||
output = sql_iso(SQL_AND.join([single(col, ranges) for col, ranges in esfilter.range.items()]))
|
||||
return output
|
||||
elif esfilter.missing:
|
||||
if isinstance(esfilter.missing, text_type):
|
||||
return sql_iso(db.quote_column(esfilter.missing) + SQL_IS_NULL)
|
||||
else:
|
||||
return sql_iso(db.quote_column(esfilter.missing.field) + SQL_IS_NULL)
|
||||
elif esfilter.exists:
|
||||
if isinstance(esfilter.exists, text_type):
|
||||
return sql_iso(db.quote_column(esfilter.exists) + SQL_IS_NOT_NULL)
|
||||
else:
|
||||
return sql_iso(db.quote_column(esfilter.exists.field) + SQL_IS_NOT_NULL)
|
||||
elif esfilter.match_all:
|
||||
return SQL_TRUE
|
||||
elif esfilter.instr:
|
||||
return sql_iso(SQL_AND.join(["instr" + sql_iso(db.quote_column(col) + ", " + db.quote_value(val)) + ">0" for col, val in esfilter.instr.items()]))
|
||||
else:
|
||||
Log.error("Can not convert esfilter to SQL: {{esfilter}}", esfilter=esfilter)
|
||||
|
||||
|
||||
def expand_json(rows):
|
||||
# CONVERT JSON TO VALUES
|
||||
for r in rows:
|
||||
for k, json in list(r.items()):
|
||||
if isinstance(json, text_type) and json[0:1] in ("[", "{"):
|
||||
with suppress_exception:
|
||||
value = mo_json.json2value(json)
|
||||
r[k] = value
|
||||
|
||||
|
||||
# MAP NAME TO SQL FUNCTION
|
||||
aggregates = {
|
||||
"one": "COUNT({{code}})",
|
||||
"sum": "SUM({{code}})",
|
||||
"add": "SUM({{code}})",
|
||||
"count": "COUNT({{code}})",
|
||||
"maximum": "MAX({{code}})",
|
||||
"minimum": "MIN({{code}})",
|
||||
"max": "MAX({{code}})",
|
||||
"min": "MIN({{code}})",
|
||||
"mean": "AVG({{code}})",
|
||||
"average": "AVG({{code}})",
|
||||
"avg": "AVG({{code}})",
|
||||
"N": "COUNT({{code}})",
|
||||
"X0": "COUNT({{code}})",
|
||||
"X1": "SUM({{code}})",
|
||||
"X2": "SUM(POWER({{code}}, 2))",
|
||||
"std": "STDDEV({{code}})",
|
||||
"stddev": "STDDEV({{code}})",
|
||||
"var": "POWER(STDDEV({{code}}), 2)",
|
||||
"variance": "POWER(STDDEV({{code}}), 2)"
|
||||
}
|
||||
|
||||
from jx_base.container import type2container
|
||||
|
||||
type2container["mysql"] = MySQL
|
|
@ -16,13 +16,10 @@ import subprocess
|
|||
from collections import Mapping
|
||||
from datetime import datetime
|
||||
|
||||
from pymysql import connect, InterfaceError, cursors
|
||||
|
||||
import mo_json
|
||||
from jx_python import jx
|
||||
from mo_dots import coalesce, wrap, listwrap, unwrap
|
||||
from mo_files import File
|
||||
from mo_future import text_type, utf8_json_encoder, binary_type
|
||||
from mo_kwargs import override
|
||||
from mo_logs import Log
|
||||
from mo_logs.exceptions import Except, suppress_exception
|
||||
|
@ -31,7 +28,10 @@ from mo_logs.strings import indent
|
|||
from mo_logs.strings import outdent
|
||||
from mo_math import Math
|
||||
from mo_times import Date
|
||||
from pyLibrary.sql import SQL, SQL_NULL, SQL_SELECT, SQL_LIMIT, SQL_WHERE, SQL_LEFT_JOIN, SQL_COMMA, SQL_FROM, SQL_AND, sql_list, sql_iso, SQL_ASC, SQL_TRUE, SQL_ONE, SQL_DESC, SQL_IS_NULL, sql_alias
|
||||
from pymysql import connect, InterfaceError, cursors
|
||||
|
||||
from mo_future import text_type, utf8_json_encoder
|
||||
from pyLibrary.sql import SQL, SQL_NULL, SQL_SELECT, SQL_LIMIT, SQL_WHERE, SQL_LEFT_JOIN, SQL_FROM, SQL_AND, sql_list, sql_iso, SQL_ASC, SQL_TRUE, SQL_ONE, SQL_DESC, SQL_IS_NULL, sql_alias
|
||||
from pyLibrary.sql.sqlite import join_column
|
||||
|
||||
DEBUG = False
|
||||
|
|
|
@ -17,6 +17,8 @@ import re
|
|||
import sys
|
||||
from collections import Mapping
|
||||
|
||||
from mo_kwargs import override
|
||||
|
||||
from mo_future import allocate_lock as _allocate_lock, text_type, zip_longest
|
||||
from mo_dots import Data, coalesce
|
||||
from mo_files import File
|
||||
|
@ -81,7 +83,8 @@ class Sqlite(DB):
|
|||
|
||||
canonical = None
|
||||
|
||||
def __init__(self, filename=None, db=None, upgrade=True):
|
||||
@override
|
||||
def __init__(self, filename=None, db=None, upgrade=True, load_functions=False, kwargs=None):
|
||||
"""
|
||||
:param db: Optional, wrap a sqlite db in a thread
|
||||
:return: Multithread-safe database
|
||||
|
@ -89,6 +92,7 @@ class Sqlite(DB):
|
|||
if upgrade and not _upgraded:
|
||||
_upgrade()
|
||||
|
||||
self.settings = kwargs
|
||||
self.filename = File(filename).abspath
|
||||
self.db = db
|
||||
self.queue = Queue("sql commands") # HOLD (command, result, signal) PAIRS
|
||||
|
@ -96,6 +100,8 @@ class Sqlite(DB):
|
|||
self.get_trace = TRACE
|
||||
self.upgrade = upgrade
|
||||
self.closed = False
|
||||
if DEBUG:
|
||||
Log.note("Sqlite version {{version}}", version=self.query("select sqlite_version()").data[0][0])
|
||||
|
||||
def _enhancements(self):
|
||||
def regex(pattern, value):
|
||||
|
@ -196,28 +202,16 @@ class Sqlite(DB):
|
|||
try:
|
||||
if DEBUG:
|
||||
Log.note("Sqlite version {{version}}", version=sqlite3.sqlite_version)
|
||||
try:
|
||||
if Sqlite.canonical:
|
||||
self.db = Sqlite.canonical
|
||||
else:
|
||||
self.db = sqlite3.connect(coalesce(self.filename, ':memory:'), check_same_thread=False)
|
||||
|
||||
library_loc = File.new_instance(sys.modules[__name__].__file__, "../..")
|
||||
full_path = File.new_instance(library_loc, "vendor/sqlite/libsqlitefunctions.so").abspath
|
||||
try:
|
||||
trace = extract_stack(0)[0]
|
||||
if self.upgrade:
|
||||
if os.name == 'nt':
|
||||
file = File.new_instance(trace["file"], "../../vendor/sqlite/libsqlitefunctions.so")
|
||||
else:
|
||||
file = File.new_instance(trace["file"], "../../vendor/sqlite/libsqlitefunctions")
|
||||
|
||||
full_path = file.abspath
|
||||
self.db.enable_load_extension(True)
|
||||
self.db.execute(SQL_SELECT + "load_extension" + sql_iso(self.quote_value(full_path)))
|
||||
except Exception as e:
|
||||
if not _load_extension_warning_sent:
|
||||
_load_extension_warning_sent = True
|
||||
Log.warning("Could not load {{file}}}, doing without. (no SQRT for you!)", file=full_path, cause=e)
|
||||
Log.error("could not open file {{filename}}", filename=self.filename)
|
||||
|
||||
if self.settings.load_functions:
|
||||
self._load_functions()
|
||||
|
||||
while not please_stop:
|
||||
quad = self.queue.pop(till=please_stop)
|
||||
|
@ -283,11 +277,25 @@ class Sqlite(DB):
|
|||
Log.note("Database is closed")
|
||||
self.db.close()
|
||||
|
||||
def quote_column(self, column_name, table=None):
|
||||
return quote_column(column_name, table)
|
||||
def _load_functions(self):
|
||||
global _load_extension_warning_sent
|
||||
library_loc = File.new_instance(sys.modules[__name__].__file__, "../..")
|
||||
full_path = File.new_instance(library_loc, "vendor/sqlite/libsqlitefunctions.so").abspath
|
||||
try:
|
||||
trace = extract_stack(0)[0]
|
||||
if self.upgrade:
|
||||
if os.name == 'nt':
|
||||
file = File.new_instance(trace["file"], "../../vendor/sqlite/libsqlitefunctions.so")
|
||||
else:
|
||||
file = File.new_instance(trace["file"], "../../vendor/sqlite/libsqlitefunctions")
|
||||
|
||||
def quote_value(self, value):
|
||||
return quote_value(value)
|
||||
full_path = file.abspath
|
||||
self.db.enable_load_extension(True)
|
||||
self.db.execute(SQL_SELECT + "load_extension" + sql_iso(quote_value(full_path)))
|
||||
except Exception as e:
|
||||
if not _load_extension_warning_sent:
|
||||
_load_extension_warning_sent = True
|
||||
Log.warning("Could not load {{file}}, doing without. (no SQRT for you!)", file=full_path, cause=e)
|
||||
|
||||
def create_new_functions(self):
|
||||
|
||||
|
@ -297,6 +305,7 @@ class Sqlite(DB):
|
|||
|
||||
self.db.create_function("REGEXP", 2, regexp)
|
||||
|
||||
|
||||
_no_need_to_quote = re.compile(r"^\w+$", re.UNICODE)
|
||||
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче