lib updates
This commit is contained in:
Родитель
4040b0254c
Коммит
0ddc8ea11a
|
@ -11,50 +11,14 @@ from __future__ import absolute_import
|
|||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from collections import Mapping
|
||||
from uuid import uuid4
|
||||
|
||||
from mo_dots import NullType, Data, FlatList, wrap, coalesce, listwrap
|
||||
from mo_future import text_type, none_type, PY2
|
||||
from mo_dots import wrap, coalesce, listwrap
|
||||
from mo_future import text_type
|
||||
from mo_json import value2json
|
||||
from mo_logs import Log
|
||||
from mo_logs.strings import expand_template, quote
|
||||
from mo_times import Date
|
||||
|
||||
IS_NULL = '0'
|
||||
BOOLEAN = 'boolean'
|
||||
INTEGER = 'integer'
|
||||
NUMBER = 'number'
|
||||
STRING = 'string'
|
||||
OBJECT = 'object'
|
||||
NESTED = "nested"
|
||||
EXISTS = "exists"
|
||||
|
||||
JSON_TYPES = [BOOLEAN, INTEGER, NUMBER, STRING, OBJECT]
|
||||
PRIMITIVE = [EXISTS, BOOLEAN, INTEGER, NUMBER, STRING]
|
||||
STRUCT = [EXISTS, OBJECT, NESTED]
|
||||
|
||||
|
||||
python_type_to_json_type = {
|
||||
int: NUMBER,
|
||||
text_type: STRING,
|
||||
float: NUMBER,
|
||||
None: OBJECT,
|
||||
bool: BOOLEAN,
|
||||
NullType: OBJECT,
|
||||
none_type: OBJECT,
|
||||
Data: OBJECT,
|
||||
dict: OBJECT,
|
||||
object: OBJECT,
|
||||
Mapping: OBJECT,
|
||||
list: NESTED,
|
||||
FlatList: NESTED,
|
||||
Date: NUMBER
|
||||
}
|
||||
|
||||
if PY2:
|
||||
python_type_to_json_type[str]=STRING
|
||||
python_type_to_json_type[long]=NUMBER
|
||||
|
||||
def generateGuid():
|
||||
"""Gets a random GUID.
|
||||
|
@ -65,9 +29,8 @@ def generateGuid():
|
|||
|
||||
a=GenerateGuid()
|
||||
import uuid
|
||||
print a
|
||||
print uuid.UUID(a).hex
|
||||
|
||||
print(a)
|
||||
print(uuid.UUID(a).hex)
|
||||
"""
|
||||
return text_type(uuid4())
|
||||
|
||||
|
@ -234,11 +197,10 @@ class TableDesc(DataClass(
|
|||
)):
|
||||
@property
|
||||
def columns(self):
|
||||
Log.error("not implemented")
|
||||
raise NotImplementedError()
|
||||
# return singlton.get_columns(table_name=self.name)
|
||||
|
||||
|
||||
|
||||
Column = DataClass(
|
||||
"Column",
|
||||
[
|
||||
|
|
|
@ -52,7 +52,6 @@ class Container(object):
|
|||
METADATA FOR A Container IS CALL A Namespace
|
||||
"""
|
||||
|
||||
__slots__ = ["data", "namespaces"]
|
||||
|
||||
@classmethod
|
||||
def new_instance(type, frum, schema=None):
|
||||
|
@ -100,40 +99,40 @@ class Container(object):
|
|||
def query(self, query):
|
||||
if query.frum != self:
|
||||
Log.error("not expected")
|
||||
Log.error("Not implemented")
|
||||
raise NotImplementedError()
|
||||
|
||||
def filter(self, where):
|
||||
return self.where(where)
|
||||
|
||||
def where(self, where):
|
||||
_ = where
|
||||
Log.error("not implemented")
|
||||
raise NotImplementedError()
|
||||
|
||||
def sort(self, sort):
|
||||
_ = sort
|
||||
Log.error("not implemented")
|
||||
raise NotImplementedError()
|
||||
|
||||
def select(self, select):
|
||||
_ = select
|
||||
Log.error("not implemented")
|
||||
raise NotImplementedError()
|
||||
|
||||
def window(self, window):
|
||||
Log.error("not implemented")
|
||||
raise NotImplementedError()
|
||||
|
||||
def having(self, having):
|
||||
_ = having
|
||||
Log.error("not implemented")
|
||||
raise NotImplementedError()
|
||||
|
||||
def format(self, format):
|
||||
_ = format
|
||||
Log.error("not implemented")
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_columns(self, table_name):
|
||||
"""
|
||||
USE THE frum TO DETERMINE THE COLUMNS
|
||||
"""
|
||||
Log.error("Not implemented")
|
||||
raise NotImplementedError()
|
||||
|
||||
@property
|
||||
def schema(self):
|
||||
Log.error("Not implemented")
|
||||
raise NotImplementedError()
|
||||
|
|
|
@ -14,6 +14,7 @@ from __future__ import unicode_literals
|
|||
from collections import Mapping
|
||||
|
||||
import mo_dots as dot
|
||||
from jx_base.domains import Domain, ALGEBRAIC, KNOWN
|
||||
from mo_dots import Null, coalesce, join_field, split_field, Data
|
||||
from mo_dots import wrap, listwrap
|
||||
from mo_dots.lists import FlatList
|
||||
|
@ -21,8 +22,6 @@ from mo_logs import Log
|
|||
from mo_math import SUM
|
||||
from mo_times.timer import Timer
|
||||
|
||||
from jx_base.domains import Domain, ALGEBRAIC, KNOWN
|
||||
|
||||
DEFAULT_QUERY_LIMIT = 20
|
||||
|
||||
|
||||
|
@ -138,7 +137,7 @@ class Dimension(object):
|
|||
def edges2value(*values):
|
||||
if isinstance(fields, Mapping):
|
||||
output = Data()
|
||||
for e, v in zip(edges, values):
|
||||
for e, v in transpose(edges, values):
|
||||
output[e.name] = v
|
||||
return output
|
||||
else:
|
||||
|
|
|
@ -93,7 +93,7 @@ class Domain(object):
|
|||
return output
|
||||
|
||||
def getDomain(self):
|
||||
Log.error("Not implemented")
|
||||
raise NotImplementedError()
|
||||
|
||||
def verify_attributes_not_null(self, attribute_names):
|
||||
for name in attribute_names:
|
||||
|
|
|
@ -16,11 +16,11 @@ from collections import Mapping
|
|||
from decimal import Decimal
|
||||
|
||||
import mo_json
|
||||
from jx_base import OBJECT, python_type_to_json_type, BOOLEAN, NUMBER, INTEGER, STRING, IS_NULL
|
||||
from jx_base.queries import is_variable_name, get_property_name
|
||||
from mo_dots import coalesce, wrap, Null, split_field
|
||||
from mo_future import text_type, utf8_json_encoder, get_function_name, zip_longest
|
||||
from mo_json import scrub
|
||||
from mo_json.typed_encoder import IS_NULL, OBJECT, BOOLEAN, python_type_to_json_type, NUMBER, INTEGER, STRING
|
||||
from mo_logs import Log, Except
|
||||
from mo_math import Math, MAX, MIN, UNION
|
||||
from mo_times.dates import Date, unicode2Date
|
||||
|
@ -80,7 +80,7 @@ def _jx_expression(expr):
|
|||
elif isinstance(expr, text_type):
|
||||
return Variable(expr)
|
||||
elif isinstance(expr, (list, tuple)):
|
||||
return TupleOp("tuple", map(jx_expression, expr)) # FORMALIZE
|
||||
return TupleOp("tuple", list(map(jx_expression, expr))) # FORMALIZE
|
||||
|
||||
expr = wrap(expr)
|
||||
try:
|
||||
|
@ -874,6 +874,11 @@ class InequalityOp(Expression):
|
|||
else:
|
||||
return {self.op: [self.lhs.__data__(), self.rhs.__data__()]}
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, InequalityOp):
|
||||
return False
|
||||
return self.op == other.op and self.lhs == other.lhs and self.rhs == other.rhs
|
||||
|
||||
def vars(self):
|
||||
return self.lhs.vars() | self.rhs.vars()
|
||||
|
||||
|
@ -2558,7 +2563,9 @@ class InOp(Expression):
|
|||
def partial_eval(self):
|
||||
value = self.value.partial_eval()
|
||||
superset = self.superset.partial_eval()
|
||||
if isinstance(value, Literal) and isinstance(superset, Literal):
|
||||
if superset is NULL:
|
||||
return FALSE
|
||||
elif isinstance(value, Literal) and isinstance(superset, Literal):
|
||||
return Literal(None, self())
|
||||
else:
|
||||
return self
|
||||
|
|
|
@ -28,7 +28,6 @@ class Namespace(object):
|
|||
def get_schema(self, name):
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
def convert(self, expr):
|
||||
raise NotImplementedError()
|
||||
|
||||
|
|
|
@ -15,7 +15,6 @@ from collections import Mapping
|
|||
from copy import copy
|
||||
|
||||
import jx_base
|
||||
from jx_base import STRUCT
|
||||
from jx_base.dimensions import Dimension
|
||||
from jx_base.domains import Domain, SetDomain, DefaultDomain
|
||||
from jx_base.expressions import jx_expression, Expression, Variable, LeavesOp, ScriptOp, OffsetOp, TRUE, FALSE
|
||||
|
@ -25,7 +24,7 @@ from mo_dots import coalesce, Null, set_default, unwraplist, literal_field
|
|||
from mo_dots import wrap, unwrap, listwrap
|
||||
from mo_dots.lists import FlatList
|
||||
from mo_future import text_type
|
||||
from mo_json.typed_encoder import untype_path
|
||||
from mo_json.typed_encoder import untype_path, STRUCT
|
||||
from mo_logs import Log
|
||||
from mo_math import AND, UNION, Math
|
||||
|
||||
|
|
|
@ -13,9 +13,8 @@ from __future__ import unicode_literals
|
|||
|
||||
from copy import copy
|
||||
|
||||
from jx_base import STRUCT, NESTED, OBJECT, EXISTS
|
||||
from mo_dots import Null, startswith_field, set_default, wrap
|
||||
from mo_json.typed_encoder import unnest_path, untype_path
|
||||
from mo_json.typed_encoder import unnest_path, untype_path, STRUCT, EXISTS, OBJECT, NESTED
|
||||
from mo_logs import Log
|
||||
|
||||
|
||||
|
|
|
@ -11,15 +11,12 @@ from __future__ import absolute_import
|
|||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from mo_future import text_type
|
||||
|
||||
from jx_base.container import type2container
|
||||
from mo_files.url import URL
|
||||
from mo_kwargs import override
|
||||
from mo_logs import Log
|
||||
from mo_logs.url import URL
|
||||
from pyLibrary.env import http
|
||||
|
||||
|
||||
DEBUG = False
|
||||
|
||||
known_hosts = {}
|
||||
|
@ -30,7 +27,6 @@ def new_instance(
|
|||
host,
|
||||
index,
|
||||
type=None,
|
||||
alias=None,
|
||||
name=None,
|
||||
port=9200,
|
||||
read_only=True,
|
||||
|
@ -46,7 +42,7 @@ def new_instance(
|
|||
|
||||
url = URL(host)
|
||||
url.port = port
|
||||
status = http.get_json(text_type(url), stream=False)
|
||||
status = http.get_json(url, stream=False)
|
||||
version = status.version.number
|
||||
if version.startswith("1."):
|
||||
from jx_elasticsearch.es14 import ES14
|
||||
|
|
|
@ -172,9 +172,9 @@ class _MVEL(object):
|
|||
if len(split_field(self.fromData.name)) == 1 and fields:
|
||||
if isinstance(fields, Mapping):
|
||||
# CONVERT UNORDERED FIELD DEFS
|
||||
jx_fields, es_fields = zip(*[(k, fields[k]) for k in sorted(fields.keys())])
|
||||
jx_fields, es_fields = transpose(*[(k, fields[k]) for k in sorted(fields.keys())])
|
||||
else:
|
||||
jx_fields, es_fields = zip(*[(i, e) for i, e in enumerate(fields)])
|
||||
jx_fields, es_fields = transpose(*[(i, e) for i, e in enumerate(fields)])
|
||||
|
||||
# NO LOOPS BECAUSE QUERY IS SHALLOW
|
||||
# DOMAIN IS FROM A DIMENSION, USE IT'S FIELD DEFS TO PULL
|
||||
|
|
|
@ -175,7 +175,7 @@ def es_setop(es, mvel, query):
|
|||
if not data_list:
|
||||
cube = Cube(select, [], {s.name: Matrix.wrap([]) for s in select})
|
||||
else:
|
||||
output = zip(*data_list)
|
||||
output = transpose(*data_list)
|
||||
cube = Cube(select, [], {s.name: Matrix(list=output[i]) for i, s in enumerate(select)})
|
||||
|
||||
return Data(
|
||||
|
@ -221,7 +221,7 @@ def es_deepop(es, mvel, query):
|
|||
data = es_post(es, FromES, query.limit)
|
||||
|
||||
rows = unpack_terms(data.facets.mvel, query.edges)
|
||||
terms = zip(*rows)
|
||||
terms = transpose(*rows)
|
||||
|
||||
# NUMBER ALL EDGES FOR JSON EXPRESSION INDEXING
|
||||
edges = query.edges
|
||||
|
|
|
@ -52,7 +52,6 @@ class ES14(Container):
|
|||
host,
|
||||
index,
|
||||
type=None,
|
||||
alias=None,
|
||||
name=None,
|
||||
port=9200,
|
||||
read_only=True,
|
||||
|
@ -161,7 +160,7 @@ class ES14(Container):
|
|||
except Exception as e:
|
||||
e = Except.wrap(e)
|
||||
if "Data too large, data for" in e:
|
||||
http.post(self.es.cluster.path+"/_cache/clear")
|
||||
http.post(self.es.cluster.url / "_cache/clear")
|
||||
Log.error("Problem (Tried to clear Elasticsearch cache)", e)
|
||||
Log.error("problem", e)
|
||||
|
||||
|
|
|
@ -11,7 +11,6 @@ from __future__ import absolute_import
|
|||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from jx_base import EXISTS
|
||||
from jx_base.domains import SetDomain
|
||||
from jx_base.expressions import TupleOp, NULL
|
||||
from jx_base.query import DEFAULT_LIMIT, MAX_LIMIT
|
||||
|
@ -24,6 +23,7 @@ from jx_python import jx
|
|||
from jx_python.expressions import jx_expression_to_function
|
||||
from mo_dots import listwrap, Data, wrap, literal_field, set_default, coalesce, Null, split_field, FlatList, unwrap, unwraplist
|
||||
from mo_future import text_type
|
||||
from mo_json.typed_encoder import EXISTS
|
||||
from mo_json.typed_encoder import encode_property
|
||||
from mo_logs import Log
|
||||
from mo_math import Math, MAX, UNION
|
||||
|
@ -175,6 +175,7 @@ def es_aggsop(es, frum, query):
|
|||
|
||||
es_query.aggs[key].percentiles.field = columns[0].es_column
|
||||
es_query.aggs[key].percentiles.percents += [percent]
|
||||
es_query.aggs[key].percentiles.compression = 2
|
||||
s.pull = jx_expression_to_function(key + ".values." + literal_field(text_type(percent)))
|
||||
elif s.aggregate == "cardinality":
|
||||
canonical_names = []
|
||||
|
|
|
@ -13,7 +13,6 @@ from __future__ import unicode_literals
|
|||
|
||||
from collections import Mapping
|
||||
|
||||
from jx_base import STRING, NUMBER, BOOLEAN
|
||||
from jx_base.dimensions import Dimension
|
||||
from jx_base.domains import SimpleSetDomain, DefaultDomain, PARTITION
|
||||
from jx_base.expressions import TupleOp, TRUE
|
||||
|
@ -22,6 +21,7 @@ from jx_elasticsearch.es14.expressions import Variable, NotOp, InOp, Literal, An
|
|||
from jx_python import jx
|
||||
from mo_dots import wrap, set_default, coalesce, literal_field, Data, relative_field, unwraplist
|
||||
from mo_future import text_type
|
||||
from mo_json.typed_encoder import STRING, NUMBER, BOOLEAN
|
||||
from mo_json.typed_encoder import untype_path
|
||||
from mo_logs import Log
|
||||
from mo_logs.strings import quote, expand_template
|
||||
|
@ -123,13 +123,13 @@ class AggsDecoder(object):
|
|||
pass
|
||||
|
||||
def get_value_from_row(self, row):
|
||||
Log.error("Not implemented")
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_value(self, index):
|
||||
Log.error("Not implemented")
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_index(self, row):
|
||||
Log.error("Not implemented")
|
||||
raise NotImplementedError()
|
||||
|
||||
@property
|
||||
def num_columns(self):
|
||||
|
@ -159,7 +159,7 @@ class SetDecoder(AggsDecoder):
|
|||
domain = self.domain
|
||||
|
||||
domain_key = domain.key
|
||||
include, text_include = zip(*(
|
||||
include, text_include = transpose(*(
|
||||
(
|
||||
float(v) if isinstance(v, (int, float)) else v,
|
||||
text_type(float(v)) if isinstance(v, (int, float)) else v
|
||||
|
@ -497,7 +497,7 @@ class ObjectDecoder(AggsDecoder):
|
|||
prefix = edge.value.var
|
||||
flatter = lambda k: relative_field(k, prefix)
|
||||
|
||||
self.put, self.fields = zip(*[
|
||||
self.put, self.fields = transpose(*[
|
||||
(flatter(untype_path(c.names["."])), c.es_column)
|
||||
for c in query.frum.schema.leaves(prefix)
|
||||
])
|
||||
|
|
|
@ -11,7 +11,6 @@ from __future__ import absolute_import
|
|||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from jx_base import NESTED
|
||||
from jx_base.expressions import NULL
|
||||
from jx_base.query import DEFAULT_LIMIT
|
||||
from jx_elasticsearch import post as es_post
|
||||
|
@ -20,6 +19,7 @@ from jx_elasticsearch.es14.setop import format_dispatch, get_pull_function, get_
|
|||
from jx_elasticsearch.es14.util import jx_sort_to_es_sort, es_query_template
|
||||
from jx_python.expressions import compile_expression, jx_expression_to_function
|
||||
from mo_dots import split_field, FlatList, listwrap, literal_field, coalesce, Data, concat_field, set_default, relative_field, startswith_field
|
||||
from mo_json.typed_encoder import NESTED
|
||||
from mo_json.typed_encoder import untype_path, EXISTS_TYPE
|
||||
from mo_logs import Log
|
||||
from mo_threads import Thread
|
||||
|
|
|
@ -13,15 +13,15 @@ from __future__ import unicode_literals
|
|||
|
||||
import itertools
|
||||
|
||||
from jx_base import NUMBER, STRING, BOOLEAN, OBJECT, INTEGER
|
||||
from jx_base.expressions import Variable, TupleOp, LeavesOp, BinaryOp, OrOp, ScriptOp, \
|
||||
WhenOp, InequalityOp, extend, Literal, NullOp, TrueOp, FalseOp, DivOp, FloorOp, \
|
||||
EqOp, NeOp, NotOp, LengthOp, NumberOp, StringOp, CountOp, MultiOp, RegExpOp, CoalesceOp, MissingOp, ExistsOp, \
|
||||
PrefixOp, NotLeftOp, InOp, CaseOp, AndOp, \
|
||||
ConcatOp, IsNumberOp, Expression, BasicIndexOfOp, MaxOp, MinOp, BasicEqOp, BooleanOp, IntegerOp, BasicSubstringOp, ZERO, NULL, FirstOp, FALSE, TRUE, SuffixOp, simplified, ONE, LeftOp
|
||||
ConcatOp, IsNumberOp, Expression, BasicIndexOfOp, MaxOp, MinOp, BasicEqOp, BooleanOp, IntegerOp, BasicSubstringOp, ZERO, NULL, FirstOp, FALSE, TRUE, SuffixOp, simplified, ONE
|
||||
from jx_elasticsearch.es14.util import es_not, es_script, es_or, es_and, es_missing
|
||||
from mo_dots import coalesce, wrap, Null, set_default, literal_field
|
||||
from mo_future import text_type
|
||||
from mo_json.typed_encoder import NUMBER, STRING, BOOLEAN, OBJECT, INTEGER
|
||||
from mo_logs import Log, suppress_exception
|
||||
from mo_logs.strings import expand_template, quote
|
||||
from mo_math import MAX, OR
|
||||
|
|
|
@ -13,18 +13,18 @@ from __future__ import unicode_literals
|
|||
|
||||
from collections import Mapping
|
||||
|
||||
from jx_base import NESTED
|
||||
from jx_base.domains import ALGEBRAIC
|
||||
from jx_base.expressions import IDENTITY
|
||||
from jx_base.query import DEFAULT_LIMIT
|
||||
from jx_elasticsearch import post as es_post
|
||||
from jx_elasticsearch.es14.expressions import Variable, LeavesOp
|
||||
from jx_elasticsearch.es14.util import jx_sort_to_es_sort, es_query_template, es_and, es_or, es_not, es_script
|
||||
from jx_elasticsearch.es14.util import jx_sort_to_es_sort, es_query_template, es_and, es_or, es_script
|
||||
from jx_python.containers.cube import Cube
|
||||
from jx_python.expressions import jx_expression_to_function
|
||||
from mo_collections.matrix import Matrix
|
||||
from mo_dots import coalesce, split_field, set_default, Data, unwraplist, literal_field, unwrap, wrap, concat_field, relative_field, join_field, listwrap
|
||||
from mo_dots.lists import FlatList
|
||||
from mo_json.typed_encoder import NESTED
|
||||
from mo_json.typed_encoder import untype_path, unnest_path, untyped
|
||||
from mo_logs import Log
|
||||
from mo_math import AND
|
||||
|
@ -328,7 +328,7 @@ def format_cube(T, select, query=None):
|
|||
data={h: Matrix(list=[]) for i, h in enumerate(table.header)}
|
||||
)
|
||||
|
||||
cols = zip(*unwrap(table.data))
|
||||
cols = transpose(*unwrap(table.data))
|
||||
return Cube(
|
||||
select,
|
||||
edges=[{"name": "rownum", "domain": {"type": "rownum", "min": 0, "max": len(table.data), "interval": 1}}],
|
||||
|
|
|
@ -11,13 +11,11 @@ from __future__ import absolute_import
|
|||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from mo_future import text_type
|
||||
|
||||
from mo_logs import Log
|
||||
|
||||
from jx_base import STRING, BOOLEAN, NUMBER, OBJECT
|
||||
from jx_elasticsearch.es14.expressions import Variable
|
||||
from mo_dots import wrap
|
||||
from mo_future import text_type
|
||||
from mo_json.typed_encoder import STRING, BOOLEAN, NUMBER, OBJECT
|
||||
from mo_logs import Log
|
||||
|
||||
|
||||
def es_query_template(path):
|
||||
|
|
|
@ -11,13 +11,10 @@ from __future__ import absolute_import
|
|||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from collections import Mapping
|
||||
|
||||
from jx_base import container
|
||||
from jx_base.container import Container
|
||||
from jx_base.dimensions import Dimension
|
||||
from jx_base.expressions import jx_expression
|
||||
from jx_base.queries import is_variable_name
|
||||
from jx_base.query import QueryOp
|
||||
from jx_elasticsearch.es52.aggs import es_aggsop, is_aggsop
|
||||
from jx_elasticsearch.es52.deep import is_deepop, es_deepop
|
||||
|
@ -25,9 +22,9 @@ from jx_elasticsearch.es52.setop import is_setop, es_setop
|
|||
from jx_elasticsearch.es52.util import aggregates
|
||||
from jx_elasticsearch.meta import ElasticsearchMetadata, Table
|
||||
from jx_python import jx
|
||||
from mo_dots import Data, Null, unwrap, coalesce, split_field, literal_field, unwraplist, join_field, wrap, listwrap, FlatList
|
||||
from mo_json import scrub, value2json
|
||||
from mo_json.typed_encoder import TYPE_PREFIX, EXISTS_TYPE
|
||||
from mo_dots import Data, unwrap, coalesce, split_field, join_field, wrap, listwrap
|
||||
from mo_json import value2json
|
||||
from mo_json.typed_encoder import EXISTS_TYPE
|
||||
from mo_kwargs import override
|
||||
from mo_logs import Log, Except
|
||||
from pyLibrary.env import elasticsearch, http
|
||||
|
@ -52,7 +49,6 @@ class ES52(Container):
|
|||
host,
|
||||
index,
|
||||
type=None,
|
||||
alias=None,
|
||||
name=None,
|
||||
port=9200,
|
||||
read_only=True,
|
||||
|
@ -68,9 +64,9 @@ class ES52(Container):
|
|||
"settings": unwrap(kwargs)
|
||||
}
|
||||
self.settings = kwargs
|
||||
self.name = name = coalesce(name, alias, index)
|
||||
self.name = name = coalesce(name, index)
|
||||
if read_only:
|
||||
self.es = elasticsearch.Alias(alias=coalesce(alias, index), kwargs=kwargs)
|
||||
self.es = elasticsearch.Alias(alias=index, kwargs=kwargs)
|
||||
else:
|
||||
self.es = elasticsearch.Cluster(kwargs=kwargs).get_index(read_only=read_only, kwargs=kwargs)
|
||||
|
||||
|
@ -79,7 +75,7 @@ class ES52(Container):
|
|||
self.edges = Data()
|
||||
self.worker = None
|
||||
|
||||
columns = self._namespace.get_snowflake(self.es.settings.alias).columns # ABSOLUTE COLUMNS
|
||||
columns = self.snowflake.columns # ABSOLUTE COLUMNS
|
||||
is_typed = any(c.es_column == EXISTS_TYPE for c in columns)
|
||||
|
||||
if typed == None:
|
||||
|
@ -98,7 +94,6 @@ class ES52(Container):
|
|||
def namespace(self):
|
||||
return self._namespace
|
||||
|
||||
|
||||
def get_table(self, full_name):
|
||||
return Table(full_name, self)
|
||||
|
||||
|
@ -161,7 +156,7 @@ class ES52(Container):
|
|||
except Exception as e:
|
||||
e = Except.wrap(e)
|
||||
if "Data too large, data for" in e:
|
||||
http.post(self.es.cluster.path+"/_cache/clear")
|
||||
http.post(self.es.cluster.url / "_cache/clear")
|
||||
Log.error("Problem (Tried to clear Elasticsearch cache)", e)
|
||||
Log.error("problem", e)
|
||||
|
||||
|
@ -198,37 +193,38 @@ class ES52(Container):
|
|||
THE where CLAUSE IS AN ES FILTER
|
||||
"""
|
||||
command = wrap(command)
|
||||
schema = self.es.get_properties()
|
||||
table = self.get_table(command['update'])
|
||||
|
||||
es_index = self.es.cluster.get_index(read_only=False, alias=None, kwargs=self.es.settings)
|
||||
|
||||
schema = table.schema
|
||||
es_filter = jx_expression(command.where).to_esfilter(schema)
|
||||
|
||||
# GET IDS OF DOCUMENTS
|
||||
results = self.es.search({
|
||||
"stored_fields": listwrap(schema._routing.path),
|
||||
"query": {"bool": {
|
||||
"filter": jx_expression(command.where).to_esfilter(Null)
|
||||
}},
|
||||
"size": 10000
|
||||
})
|
||||
query = {
|
||||
"from": command['update'],
|
||||
"select": ["_id"] + [
|
||||
{"name": k, "value": v}
|
||||
for k, v in command.set.items()
|
||||
],
|
||||
"where": command.where,
|
||||
"format": "list",
|
||||
"limit": 10000
|
||||
}
|
||||
|
||||
# SCRIPT IS SAME FOR ALL (CAN ONLY HANDLE ASSIGNMENT TO CONSTANT)
|
||||
scripts = FlatList()
|
||||
for k, v in command.set.items():
|
||||
if not is_variable_name(k):
|
||||
Log.error("Only support simple paths for now")
|
||||
if isinstance(v, Mapping) and v.doc:
|
||||
scripts.append({"doc": v.doc})
|
||||
else:
|
||||
v = scrub(v)
|
||||
scripts.append({"script": "ctx._source." + k + " = " + jx_expression(v).to_es_script(schema).script(schema)})
|
||||
results = self.query(query)
|
||||
|
||||
if results.hits.hits:
|
||||
updates = []
|
||||
for h in results.hits.hits:
|
||||
for s in scripts:
|
||||
updates.append({"update": {"_id": h._id, "_routing": unwraplist(h.fields[literal_field(schema._routing.path)])}})
|
||||
updates.append(s)
|
||||
content = ("\n".join(value2json(c) for c in updates) + "\n")
|
||||
if results.data:
|
||||
content = "".join(
|
||||
t
|
||||
for r in results.data
|
||||
for _id, row in [(r._id, r)]
|
||||
for _ in [row.__setitem__('_id', None)] # WARNING! DESTRUCTIVE TO row
|
||||
for update in map(value2json, ({"update": {"_id": _id}}, {"doc": row}))
|
||||
for t in (update, "\n")
|
||||
)
|
||||
response = self.es.cluster.post(
|
||||
self.es.path + "/_bulk",
|
||||
es_index.path + "/" + "_bulk",
|
||||
data=content,
|
||||
headers={"Content-Type": "application/json"},
|
||||
timeout=self.settings.timeout,
|
||||
|
@ -237,3 +233,11 @@ class ES52(Container):
|
|||
if response.errors:
|
||||
Log.error("could not update: {{error}}", error=[e.error for i in response["items"] for e in i.values() if e.status not in (200, 201)])
|
||||
|
||||
# DELETE BY QUERY, IF NEEDED
|
||||
if '.' in listwrap(command.clear):
|
||||
self.es.delete_record(es_filter)
|
||||
return
|
||||
|
||||
es_index.flush()
|
||||
|
||||
|
||||
|
|
|
@ -11,7 +11,6 @@ from __future__ import absolute_import
|
|||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from jx_base import EXISTS
|
||||
from jx_base.domains import SetDomain
|
||||
from jx_base.expressions import TupleOp, NULL
|
||||
from jx_base.query import DEFAULT_LIMIT, MAX_LIMIT
|
||||
|
@ -24,7 +23,7 @@ from jx_python import jx
|
|||
from jx_python.expressions import jx_expression_to_function
|
||||
from mo_dots import listwrap, Data, wrap, literal_field, set_default, coalesce, Null, split_field, FlatList, unwrap, unwraplist
|
||||
from mo_future import text_type
|
||||
from mo_json.typed_encoder import encode_property
|
||||
from mo_json.typed_encoder import encode_property, EXISTS
|
||||
from mo_logs import Log
|
||||
from mo_logs.strings import quote, expand_template
|
||||
from mo_math import Math, MAX, UNION
|
||||
|
@ -222,6 +221,7 @@ def es_aggsop(es, frum, query):
|
|||
|
||||
es_query.aggs[key].percentiles.field = columns[0].es_column
|
||||
es_query.aggs[key].percentiles.percents += [percent]
|
||||
es_query.aggs[key].percentiles.tdigest.compression = 2
|
||||
s.pull = jx_expression_to_function(key + ".values." + literal_field(text_type(percent)))
|
||||
elif s.aggregate == "cardinality":
|
||||
canonical_names = []
|
||||
|
@ -251,7 +251,7 @@ def es_aggsop(es, frum, query):
|
|||
for column in columns:
|
||||
script = {"scripted_metric": {
|
||||
'init_script': 'params._agg.terms = new HashSet()',
|
||||
'map_script': 'for (v in doc['+quote(column.es_column)+'].values) params._agg.terms.add(v)',
|
||||
'map_script': 'for (v in doc['+quote(column.es_column)+'].values) params._agg.terms.add(v);',
|
||||
'combine_script': 'return params._agg.terms.toArray()',
|
||||
'reduce_script': 'HashSet output = new HashSet(); for (a in params._aggs) { if (a!=null) for (v in a) {output.add(v)} } return output.toArray()',
|
||||
}}
|
||||
|
@ -366,7 +366,7 @@ def es_aggsop(es, frum, query):
|
|||
decoders = get_decoders_by_depth(query)
|
||||
start = 0
|
||||
|
||||
#<TERRIBLE SECTION> THIS IS WHERE WE WEAVE THE where CLAUSE WITH nested
|
||||
# <TERRIBLE SECTION> THIS IS WHERE WE WEAVE THE where CLAUSE WITH nested
|
||||
split_where = split_expression_by_depth(query.where, schema=frum.schema)
|
||||
|
||||
if len(split_field(frum.name)) > 1:
|
||||
|
|
|
@ -13,7 +13,6 @@ from __future__ import unicode_literals
|
|||
|
||||
from collections import Mapping
|
||||
|
||||
from jx_base import STRING, NUMBER, BOOLEAN
|
||||
from jx_base.dimensions import Dimension
|
||||
from jx_base.domains import SimpleSetDomain, DefaultDomain, PARTITION
|
||||
from jx_base.expressions import TupleOp, TRUE
|
||||
|
@ -22,8 +21,8 @@ from jx_elasticsearch.es52.expressions import Variable, NotOp, InOp, Literal, An
|
|||
from jx_elasticsearch.es52.util import es_missing
|
||||
from jx_python import jx
|
||||
from mo_dots import wrap, set_default, coalesce, literal_field, Data, relative_field, unwraplist
|
||||
from mo_future import text_type
|
||||
from mo_json.typed_encoder import untype_path
|
||||
from mo_future import text_type, transpose
|
||||
from mo_json.typed_encoder import untype_path, STRING, NUMBER, BOOLEAN
|
||||
from mo_logs import Log
|
||||
from mo_logs.strings import quote, expand_template
|
||||
from mo_math import MAX, MIN, Math
|
||||
|
@ -125,13 +124,13 @@ class AggsDecoder(object):
|
|||
pass
|
||||
|
||||
def get_value_from_row(self, row):
|
||||
Log.error("Not implemented")
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_value(self, index):
|
||||
Log.error("Not implemented")
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_index(self, row):
|
||||
Log.error("Not implemented")
|
||||
raise NotImplementedError()
|
||||
|
||||
@property
|
||||
def num_columns(self):
|
||||
|
@ -161,7 +160,7 @@ class SetDecoder(AggsDecoder):
|
|||
domain = self.domain
|
||||
|
||||
domain_key = domain.key
|
||||
include, text_include = zip(*(
|
||||
include, text_include = transpose(*(
|
||||
(
|
||||
float(v) if isinstance(v, (int, float)) else v,
|
||||
text_type(float(v)) if isinstance(v, (int, float)) else v
|
||||
|
@ -502,7 +501,7 @@ class ObjectDecoder(AggsDecoder):
|
|||
prefix = edge.value.var
|
||||
flatter = lambda k: relative_field(k, prefix)
|
||||
|
||||
self.put, self.fields = zip(*[
|
||||
self.put, self.fields = transpose(*[
|
||||
(flatter(untype_path(c.names["."])), c.es_column)
|
||||
for c in query.frum.schema.leaves(prefix)
|
||||
])
|
||||
|
@ -562,7 +561,7 @@ class ObjectDecoder(AggsDecoder):
|
|||
return None
|
||||
|
||||
output = Data()
|
||||
for k, v in zip(self.put, part):
|
||||
for k, v in transpose(self.put, part):
|
||||
output[k] = v.get('key')
|
||||
return output
|
||||
|
||||
|
|
|
@ -11,7 +11,6 @@ from __future__ import absolute_import
|
|||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from jx_base import NESTED
|
||||
from jx_base.expressions import NULL
|
||||
from jx_base.query import DEFAULT_LIMIT
|
||||
from jx_elasticsearch import post as es_post
|
||||
|
@ -20,6 +19,7 @@ from jx_elasticsearch.es52.setop import format_dispatch, get_pull_function, get_
|
|||
from jx_elasticsearch.es52.util import jx_sort_to_es_sort, es_query_template
|
||||
from jx_python.expressions import compile_expression, jx_expression_to_function
|
||||
from mo_dots import split_field, FlatList, listwrap, literal_field, coalesce, Data, concat_field, set_default, relative_field, startswith_field
|
||||
from mo_json.typed_encoder import NESTED
|
||||
from mo_json.typed_encoder import untype_path
|
||||
from mo_logs import Log
|
||||
from mo_threads import Thread
|
||||
|
|
|
@ -13,7 +13,6 @@ from __future__ import unicode_literals
|
|||
|
||||
import itertools
|
||||
|
||||
from jx_base import NUMBER, STRING, BOOLEAN, OBJECT, INTEGER
|
||||
from jx_base.expressions import Variable, TupleOp, LeavesOp, BinaryOp, OrOp, ScriptOp, \
|
||||
WhenOp, InequalityOp, extend, Literal, NullOp, TrueOp, FalseOp, DivOp, FloorOp, \
|
||||
EqOp, NeOp, NotOp, LengthOp, NumberOp, StringOp, CountOp, MultiOp, RegExpOp, CoalesceOp, MissingOp, ExistsOp, \
|
||||
|
@ -22,9 +21,11 @@ from jx_base.expressions import Variable, TupleOp, LeavesOp, BinaryOp, OrOp, Scr
|
|||
from jx_elasticsearch.es52.util import es_not, es_script, es_or, es_and, es_missing
|
||||
from mo_dots import coalesce, wrap, Null, set_default, literal_field
|
||||
from mo_future import text_type
|
||||
from mo_json.typed_encoder import NUMBER, STRING, BOOLEAN, OBJECT, INTEGER
|
||||
from mo_logs import Log, suppress_exception
|
||||
from mo_logs.strings import expand_template, quote
|
||||
from mo_math import MAX, OR
|
||||
from mo_times import Date
|
||||
from pyLibrary.convert import string2regexp
|
||||
|
||||
NUMBER_TO_STRING = """
|
||||
|
@ -257,6 +258,12 @@ def to_es_script(self, schema):
|
|||
expr="[" + ", ".join(_convert(vv).expr for vv in v) + "]",
|
||||
frum=self
|
||||
)
|
||||
if isinstance(v, Date):
|
||||
return EsScript(
|
||||
type=NUMBER,
|
||||
expr=text_type(v.unix),
|
||||
frum=self
|
||||
)
|
||||
|
||||
return _convert(self.term)
|
||||
|
||||
|
@ -705,18 +712,22 @@ def to_es_script(self, schema):
|
|||
|
||||
@extend(OrOp)
|
||||
def to_esfilter(self, schema):
|
||||
return es_or([t.partial_eval().to_esfilter(schema) for t in self.terms])
|
||||
# TODO: REPLICATE THIS WHOLE expression.py SO IT IS CLEAR ES5 QUERIES ARE A BIT DIFFERENT
|
||||
if schema.snowflake.namespace.es_cluster.version.startswith("5."):
|
||||
# VERSION 5.2.x
|
||||
# WE REQUIRE EXIT-EARLY SEMANTICS, OTHERWISE EVERY EXPRESSION IS A SCRIPT EXPRESSION
|
||||
# {"bool":{"should" :[a, b, c]}} RUNS IN PARALLEL
|
||||
# {"bool":{"must_not":[a, b, c]}} ALSO RUNS IN PARALLEL
|
||||
|
||||
# OR(x) == NOT(AND(NOT(xi) for xi in x))
|
||||
# output = es_not(es_and([
|
||||
# NotOp("not", t).partial_eval().to_esfilter(schema)
|
||||
# for t in self.terms
|
||||
# ]))
|
||||
# return output
|
||||
|
||||
# WE REQUIRE EXIT-EARLY SEMANTICS, OTHERWISE EVERY EXPRESSION IS A SCRIPT EXPRESSION
|
||||
# {"bool":{"should" :[a, b, c]}} RUNS IN PARALLEL
|
||||
# {"bool":{"must_not":[a, b, c]}} ALSO RUNS IN PARALLEL
|
||||
# OR(x) == NOT(AND(NOT(xi) for xi in x))
|
||||
output = es_not(es_and([
|
||||
NotOp("not", t).partial_eval().to_esfilter(schema)
|
||||
for t in self.terms
|
||||
]))
|
||||
return output
|
||||
else:
|
||||
# VERSION 6.2
|
||||
return es_or([t.partial_eval().to_esfilter(schema) for t in self.terms])
|
||||
|
||||
|
||||
@extend(LengthOp)
|
||||
|
@ -1170,7 +1181,7 @@ def to_es_script(self, schema):
|
|||
frum=self
|
||||
)
|
||||
else:
|
||||
Log.error("do not know how to handle")
|
||||
Log.error("do not know how to handle: {{self}}", self=self.__data__())
|
||||
else:
|
||||
return self.partial_eval().to_es_script(schema)
|
||||
|
||||
|
|
|
@ -13,22 +13,22 @@ from __future__ import unicode_literals
|
|||
|
||||
from collections import Mapping
|
||||
|
||||
from jx_base import NESTED
|
||||
from jx_base.domains import ALGEBRAIC
|
||||
from jx_base.expressions import IDENTITY
|
||||
from jx_base.query import DEFAULT_LIMIT
|
||||
from jx_elasticsearch import post as es_post
|
||||
from jx_elasticsearch.es52.expressions import Variable, LeavesOp
|
||||
from jx_elasticsearch.es52.util import jx_sort_to_es_sort, es_query_template, es_and, es_or, es_not, es_script
|
||||
from jx_elasticsearch.es52.util import jx_sort_to_es_sort, es_query_template, es_and, es_or, es_script
|
||||
from jx_python.containers.cube import Cube
|
||||
from jx_python.expressions import jx_expression_to_function
|
||||
from mo_collections.matrix import Matrix
|
||||
from mo_dots import coalesce, split_field, set_default, Data, unwraplist, literal_field, unwrap, wrap, concat_field, relative_field, join_field, listwrap
|
||||
from mo_dots.lists import FlatList
|
||||
from mo_future import transpose
|
||||
from mo_json.typed_encoder import NESTED
|
||||
from mo_json.typed_encoder import untype_path, unnest_path, untyped
|
||||
from mo_logs import Log
|
||||
from mo_math import AND
|
||||
from mo_math import MAX
|
||||
from mo_math import AND, MAX
|
||||
from mo_times.timer import Timer
|
||||
|
||||
format_dispatch = {}
|
||||
|
@ -102,11 +102,20 @@ def es_setop(es, query):
|
|||
leaves = schema.leaves(s_column)
|
||||
nested_selects = {}
|
||||
if leaves:
|
||||
if s_column == '.' or any(c.jx_type == NESTED for c in leaves):
|
||||
if s_column == '.':
|
||||
# PULL ALL SOURCE
|
||||
es_query.stored_fields = ["_source"]
|
||||
new_select.append({
|
||||
"name": select.name,
|
||||
"value": select.value,
|
||||
"put": {"name": select.name, "index": put_index, "child": "."},
|
||||
"pull": get_pull_source(".")
|
||||
})
|
||||
elif any(c.jx_type == NESTED for c in leaves):
|
||||
# PULL WHOLE NESTED ARRAYS
|
||||
es_query.stored_fields = ["_source"]
|
||||
for c in leaves:
|
||||
if len(c.nested_path) == 1:
|
||||
if len(c.nested_path) == 1: # NESTED PROPERTIES ARE IGNORED, CAPTURED BY THESE FIRT LEVEL PROPERTIES
|
||||
jx_name = untype_path(c.names["."])
|
||||
new_select.append({
|
||||
"name": select.name,
|
||||
|
@ -193,12 +202,14 @@ def es_setop(es, query):
|
|||
if es_query.stored_fields[0] == "_source":
|
||||
es_query.stored_fields = ["_source"]
|
||||
n.pull = get_pull_source(n.value.var)
|
||||
elif n.value == "_id":
|
||||
n.pull = jx_expression_to_function("_id")
|
||||
else:
|
||||
n.pull = jx_expression_to_function(concat_field("fields", literal_field(n.value.var)))
|
||||
else:
|
||||
Log.error("Do not know what to do")
|
||||
|
||||
with Timer("call to ES", silent=True) as call_timer:
|
||||
with Timer("call to ES") as call_timer:
|
||||
data = es_post(es, es_query, query.limit)
|
||||
|
||||
T = data.hits.hits
|
||||
|
@ -206,7 +217,8 @@ def es_setop(es, query):
|
|||
try:
|
||||
formatter, groupby_formatter, mime_type = format_dispatch[query.format]
|
||||
|
||||
output = formatter(T, new_select, query)
|
||||
with Timer("formatter"):
|
||||
output = formatter(T, new_select, query)
|
||||
output.meta.timing.es = call_timer.duration
|
||||
output.meta.content_type = mime_type
|
||||
output.meta.es_query = es_query
|
||||
|
@ -318,7 +330,8 @@ def format_table(T, select, query=None):
|
|||
|
||||
|
||||
def format_cube(T, select, query=None):
|
||||
table = format_table(T, select, query)
|
||||
with Timer("format table"):
|
||||
table = format_table(T, select, query)
|
||||
|
||||
if len(table.data) == 0:
|
||||
return Cube(
|
||||
|
@ -327,7 +340,7 @@ def format_cube(T, select, query=None):
|
|||
data={h: Matrix(list=[]) for i, h in enumerate(table.header)}
|
||||
)
|
||||
|
||||
cols = zip(*unwrap(table.data))
|
||||
cols = transpose(*unwrap(table.data))
|
||||
return Cube(
|
||||
select,
|
||||
edges=[{"name": "rownum", "domain": {"type": "rownum", "min": 0, "max": len(table.data), "interval": 1}}],
|
||||
|
|
|
@ -11,13 +11,11 @@ from __future__ import absolute_import
|
|||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from mo_future import text_type
|
||||
|
||||
from mo_logs import Log
|
||||
|
||||
from jx_base import STRING, BOOLEAN, NUMBER, OBJECT
|
||||
from jx_elasticsearch.es52.expressions import Variable
|
||||
from mo_dots import wrap
|
||||
from mo_future import text_type
|
||||
from mo_json.typed_encoder import STRING, BOOLEAN, NUMBER, OBJECT
|
||||
from mo_logs import Log
|
||||
|
||||
|
||||
def es_query_template(path):
|
||||
|
|
|
@ -15,18 +15,18 @@ import itertools
|
|||
from itertools import product
|
||||
|
||||
import jx_base
|
||||
from jx_base import STRUCT, TableDesc, BOOLEAN
|
||||
from jx_base import TableDesc
|
||||
from jx_base.namespace import Namespace
|
||||
from jx_base.query import QueryOp
|
||||
from jx_python import jx, meta as jx_base_meta
|
||||
from jx_python import jx
|
||||
from jx_python.containers.list_usingPythonList import ListContainer
|
||||
from jx_python.meta import ColumnList, Column
|
||||
from mo_collections.relation import Relation_usingList
|
||||
from mo_dots import Data, relative_field, SELF_PATH, ROOT_PATH, coalesce, set_default, Null, split_field, join_field, wrap, concat_field, startswith_field, literal_field
|
||||
from mo_json.typed_encoder import EXISTS_TYPE, untype_path, unnest_path
|
||||
from mo_json.typed_encoder import EXISTS_TYPE, untype_path, unnest_path, OBJECT, EXISTS, STRUCT, BOOLEAN
|
||||
from mo_kwargs import override
|
||||
from mo_logs import Log
|
||||
from mo_logs.exceptions import extract_stack
|
||||
from mo_logs.exceptions import Except
|
||||
from mo_logs.strings import quote
|
||||
from mo_math import MAX
|
||||
from mo_threads import Queue, THREAD_STOP, Thread, Till
|
||||
|
@ -50,8 +50,9 @@ class ElasticsearchMetadata(Namespace):
|
|||
MANAGE SNOWFLAKE SCHEMAS FOR EACH OF THE ALIASES FOUND IN THE CLUSTER
|
||||
"""
|
||||
|
||||
def __new__(cls, *args, **kwargs):
|
||||
es_cluster = elasticsearch.Cluster(kwargs['kwargs'])
|
||||
@override
|
||||
def __new__(cls, kwargs, *args, **_kwargs):
|
||||
es_cluster = elasticsearch.Cluster(kwargs)
|
||||
output = known_clusters.get(id(es_cluster))
|
||||
if output is None:
|
||||
output = object.__new__(cls)
|
||||
|
@ -88,7 +89,14 @@ class ElasticsearchMetadata(Namespace):
|
|||
"meta.tables": Date.now()
|
||||
}
|
||||
table_columns = metadata_tables()
|
||||
self.meta.tables = ListContainer("meta.tables", [], jx_base.Schema(".", table_columns))
|
||||
self.meta.tables = ListContainer(
|
||||
"meta.tables",
|
||||
[
|
||||
# TableDesc("meta.columns", None, ".", Date.now()),
|
||||
# TableDesc("meta.tables", None, ".", Date.now())
|
||||
],
|
||||
jx_base.Schema(".", table_columns)
|
||||
)
|
||||
self.meta.columns.extend(table_columns)
|
||||
# TODO: fix monitor so it does not bring down ES
|
||||
if ENABLE_META_SCAN:
|
||||
|
@ -97,9 +105,13 @@ class ElasticsearchMetadata(Namespace):
|
|||
self.worker = Thread.run("refresh metadata", self.not_monitor)
|
||||
return
|
||||
|
||||
@property
|
||||
def namespace(self):
|
||||
return self.meta.columns.namespace
|
||||
|
||||
@property
|
||||
def url(self):
|
||||
return self.es_cluster.path + "/" + self.default_name.replace(".", "/")
|
||||
return self.es_cluster.url / self.default_name.replace(".", "/")
|
||||
|
||||
def _reload_columns(self, table_desc):
|
||||
"""
|
||||
|
@ -141,7 +153,17 @@ class ElasticsearchMetadata(Namespace):
|
|||
|
||||
def _parse_properties(self, alias, mapping, meta):
|
||||
abs_columns = elasticsearch.parse_properties(alias, None, mapping.properties)
|
||||
with Timer("upserting {{num}} columns", {"num": len(abs_columns)}, debug=DEBUG):
|
||||
if any(c.cardinality == 0 and c.names['.'] != '_id' for c in abs_columns):
|
||||
Log.warning(
|
||||
"Some columns are not stored {{names}}",
|
||||
names=[
|
||||
".".join((c.es_index, c.names['.']))
|
||||
for c in abs_columns
|
||||
if c.cardinality == 0
|
||||
]
|
||||
)
|
||||
|
||||
with Timer("upserting {{num}} columns", {"num": len(abs_columns)}, silent=not DEBUG):
|
||||
# LIST OF EVERY NESTED PATH
|
||||
query_paths = [[c.es_column] for c in abs_columns if c.es_type == "nested"]
|
||||
for a, b in itertools.product(query_paths, query_paths):
|
||||
|
@ -159,11 +181,13 @@ class ElasticsearchMetadata(Namespace):
|
|||
q.append(SELF_PATH)
|
||||
query_paths.append(ROOT_PATH)
|
||||
self.alias_to_query_paths[alias] = query_paths
|
||||
for i in self.index_to_alias.get_domain(alias):
|
||||
self.alias_to_query_paths[i] = query_paths
|
||||
|
||||
# ADD RELATIVE NAMES
|
||||
for abs_column in abs_columns:
|
||||
abs_column.last_updated = None
|
||||
abs_column.jx_type = es_type_to_json_type[abs_column.es_type]
|
||||
abs_column.jx_type = jx_type(abs_column)
|
||||
for query_path in query_paths:
|
||||
abs_column.names[query_path[0]] = relative_field(abs_column.names["."], query_path[0])
|
||||
self.todo.add(self.meta.columns.add(abs_column))
|
||||
|
@ -203,7 +227,7 @@ class ElasticsearchMetadata(Namespace):
|
|||
Log.error("{{table|quote}} does not exist", table=table_name)
|
||||
|
||||
try:
|
||||
last_update = MAX([
|
||||
last_update = MAX([
|
||||
self.es_cluster.index_last_updated[i]
|
||||
for i in self.index_to_alias.get_domain(alias)
|
||||
])
|
||||
|
@ -288,7 +312,7 @@ class ElasticsearchMetadata(Namespace):
|
|||
"size": 0
|
||||
})
|
||||
count = result.hits.total
|
||||
cardinality = 1001
|
||||
cardinality = max(1001, count)
|
||||
multi = 1001
|
||||
elif column.es_column == "_id":
|
||||
result = self.es_cluster.post("/" + es_index + "/_search", data={
|
||||
|
@ -350,7 +374,7 @@ class ElasticsearchMetadata(Namespace):
|
|||
})
|
||||
return
|
||||
elif column.es_type in elasticsearch.ES_NUMERIC_TYPES and cardinality > 30:
|
||||
DEBUG and Log.note("{{field}} has {{num}} parts", field=column.es_index, num=cardinality)
|
||||
DEBUG and Log.note("{{table}}.{{field}} has {{num}} parts", table=column.es_index, field=column.es_column, num=cardinality)
|
||||
self.meta.columns.update({
|
||||
"set": {
|
||||
"count": count,
|
||||
|
@ -393,9 +417,10 @@ class ElasticsearchMetadata(Namespace):
|
|||
except Exception as e:
|
||||
# CAN NOT IMPORT: THE TEST MODULES SETS UP LOGGING
|
||||
# from tests.test_jx import TEST_TABLE
|
||||
e = Except.wrap(e)
|
||||
TEST_TABLE = "testdata"
|
||||
is_missing_index = any(w in e for w in ["IndexMissingException", "index_not_found_exception"])
|
||||
is_test_table = any(column.es_index.startswith(t) for t in [TEST_TABLE_PREFIX, TEST_TABLE])
|
||||
is_test_table = column.es_index.startswith((TEST_TABLE_PREFIX, TEST_TABLE))
|
||||
if is_missing_index and is_test_table:
|
||||
# WE EXPECT TEST TABLES TO DISAPPEAR
|
||||
self.meta.columns.update({
|
||||
|
@ -414,7 +439,7 @@ class ElasticsearchMetadata(Namespace):
|
|||
"multi",
|
||||
"partitions",
|
||||
],
|
||||
"where": {"eq": {"names.\\.": ".", "es_index": column.es_index, "es_column": column.es_column}}
|
||||
"where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}}
|
||||
})
|
||||
Log.warning("Could not get {{col.es_index}}.{{col.es_column}} info", col=column, cause=e)
|
||||
|
||||
|
@ -430,10 +455,10 @@ class ElasticsearchMetadata(Namespace):
|
|||
]
|
||||
if old_columns:
|
||||
DEBUG and Log.note(
|
||||
"Old columns {{names|json}} last updated {{dates|json}}",
|
||||
names=wrap(old_columns).es_column,
|
||||
dates=[Date(t).format() for t in wrap(old_columns).last_updated]
|
||||
)
|
||||
"Old columns {{names|json}} last updated {{dates|json}}",
|
||||
names=wrap(old_columns).es_column,
|
||||
dates=[Date(t).format() for t in wrap(old_columns).last_updated]
|
||||
)
|
||||
self.todo.extend(old_columns)
|
||||
# TEST CONSISTENCY
|
||||
for c, d in product(list(self.todo.queue), list(self.todo.queue)):
|
||||
|
@ -447,23 +472,29 @@ class ElasticsearchMetadata(Namespace):
|
|||
if column is THREAD_STOP:
|
||||
continue
|
||||
|
||||
DEBUG and Log.note("update {{table}}.{{column}}", table=column.es_index, column=column.es_column)
|
||||
if column.es_index in self.index_does_not_exist:
|
||||
self.meta.columns.update({
|
||||
"clear": ".",
|
||||
"where": {"eq": {"es_index": column.es_index}}
|
||||
})
|
||||
continue
|
||||
if column.jx_type in STRUCT or column.es_column.endswith("." + EXISTS_TYPE):
|
||||
column.last_updated = Date.now()
|
||||
continue
|
||||
elif column.last_updated >= Date.now()-TOO_OLD:
|
||||
continue
|
||||
try:
|
||||
self._update_cardinality(column)
|
||||
(DEBUG and not column.es_index.startswith(TEST_TABLE_PREFIX)) and Log.note("updated {{column.name}}", column=column)
|
||||
except Exception as e:
|
||||
Log.warning("problem getting cardinality for {{column.name}}", column=column, cause=e)
|
||||
with Timer("update {{table}}.{{column}}", param={"table": column.es_index, "column": column.es_column}, silent=not DEBUG):
|
||||
if column.es_index in self.index_does_not_exist:
|
||||
self.meta.columns.update({
|
||||
"clear": ".",
|
||||
"where": {"eq": {"es_index": column.es_index}}
|
||||
})
|
||||
continue
|
||||
if column.jx_type in STRUCT or column.es_column.endswith("." + EXISTS_TYPE):
|
||||
column.last_updated = Date.now()
|
||||
continue
|
||||
elif column.last_updated >= Date.now()-TOO_OLD:
|
||||
continue
|
||||
try:
|
||||
self._update_cardinality(column)
|
||||
(DEBUG and not column.es_index.startswith(TEST_TABLE_PREFIX)) and Log.note("updated {{column.name}}", column=column)
|
||||
except Exception as e:
|
||||
if '"status":404' in e:
|
||||
self.meta.columns.update({
|
||||
"clear": ".",
|
||||
"where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}}
|
||||
})
|
||||
else:
|
||||
Log.warning("problem getting cardinality for {{column.name}}", column=column, cause=e)
|
||||
except Exception as e:
|
||||
Log.warning("problem in cardinality monitor", cause=e)
|
||||
|
||||
|
@ -478,23 +509,27 @@ class ElasticsearchMetadata(Namespace):
|
|||
if c.last_updated >= Date.now()-TOO_OLD:
|
||||
continue
|
||||
|
||||
self.meta.columns.update({
|
||||
"set": {
|
||||
"last_updated": Date.now()
|
||||
},
|
||||
"clear":[
|
||||
"count",
|
||||
"cardinality",
|
||||
"multi",
|
||||
"partitions",
|
||||
],
|
||||
"where": {"eq": {"es_index": c.es_index, "es_column": c.es_column}}
|
||||
})
|
||||
DEBUG and Log.note("Did not get {{col.es_index}}.{{col.es_column}} info", col=c)
|
||||
with Timer("Update {{col.es_index}}.{{col.es_column}}", param={"col": c}, silent=not DEBUG, too_long=0.05):
|
||||
self.meta.columns.update({
|
||||
"set": {
|
||||
"last_updated": Date.now()
|
||||
},
|
||||
"clear": [
|
||||
"count",
|
||||
"cardinality",
|
||||
"multi",
|
||||
"partitions",
|
||||
],
|
||||
"where": {"eq": {"es_index": c.es_index, "es_column": c.es_column}}
|
||||
})
|
||||
|
||||
def get_table(self, alias_name):
|
||||
def get_table(self, name):
|
||||
if name == "meta.columns":
|
||||
return self.meta.columns
|
||||
|
||||
# return self.meta.columns
|
||||
with self.meta.tables.locker:
|
||||
return wrap([t for t in self.meta.tables.data if t.name == alias_name])
|
||||
return wrap([t for t in self.meta.tables.data if t.name == name])
|
||||
|
||||
def get_snowflake(self, fact_table_name):
|
||||
return Snowflake(fact_table_name, self)
|
||||
|
@ -512,8 +547,8 @@ class Snowflake(object):
|
|||
REPRESENT ONE ALIAS, AND ITS NESTED ARRAYS
|
||||
"""
|
||||
|
||||
def __init__(self, alias, namespace):
|
||||
self.alias = alias
|
||||
def __init__(self, name, namespace):
|
||||
self.name = name
|
||||
self.namespace = namespace
|
||||
|
||||
def get_schema(self, query_path):
|
||||
|
@ -524,20 +559,17 @@ class Snowflake(object):
|
|||
"""
|
||||
RETURN A LIST OF ALL NESTED COLUMNS
|
||||
"""
|
||||
output = self.namespace.alias_to_query_paths.get(self.alias)
|
||||
output = self.namespace.alias_to_query_paths.get(self.name)
|
||||
if output:
|
||||
return output
|
||||
Log.error("Can not find index {{index|quote}}", index=self.alias)
|
||||
Log.error("Can not find index {{index|quote}}", index=self.name)
|
||||
|
||||
@property
|
||||
def columns(self):
|
||||
"""
|
||||
RETURN ALL COLUMNS FROM ORIGIN OF FACT TABLE
|
||||
"""
|
||||
if any("verify_no_private_attachments" in t['method'] for t in extract_stack()):
|
||||
pass
|
||||
|
||||
return self.namespace.get_columns(literal_field(self.alias))
|
||||
return self.namespace.get_columns(literal_field(self.name))
|
||||
|
||||
|
||||
class Schema(jx_base.Schema):
|
||||
|
@ -605,11 +637,11 @@ class Schema(jx_base.Schema):
|
|||
|
||||
@property
|
||||
def name(self):
|
||||
return concat_field(self.snowflake.alias, self.query_path[0])
|
||||
return concat_field(self.snowflake.name, self.query_path[0])
|
||||
|
||||
@property
|
||||
def columns(self):
|
||||
return self.snowflake.namespace.get_columns(literal_field(self.snowflake.alias))
|
||||
return self.snowflake.namespace.get_columns(literal_field(self.snowflake.name))
|
||||
|
||||
def map_to_es(self):
|
||||
"""
|
||||
|
@ -689,4 +721,13 @@ def metadata_tables():
|
|||
)
|
||||
|
||||
|
||||
OBJECTS = (jx_base.OBJECT, jx_base.EXISTS)
|
||||
def jx_type(column):
|
||||
"""
|
||||
return the jx_type for given column
|
||||
"""
|
||||
if column.es_column.endswith(EXISTS_TYPE):
|
||||
return EXISTS
|
||||
return es_type_to_json_type[column.es_type]
|
||||
|
||||
|
||||
OBJECTS = (OBJECT, EXISTS)
|
||||
|
|
|
@ -1,103 +0,0 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from collections import Mapping
|
||||
|
||||
from jx_base import container
|
||||
from mo_dots import Data
|
||||
from mo_dots import wrap, set_default, split_field
|
||||
from mo_future import text_type
|
||||
from mo_logs import Log
|
||||
|
||||
config = Data() # config.default IS EXPECTED TO BE SET BEFORE CALLS ARE MADE
|
||||
_ListContainer = None
|
||||
_meta = None
|
||||
|
||||
|
||||
def _delayed_imports():
|
||||
global _ListContainer
|
||||
global _meta
|
||||
|
||||
|
||||
from jx_python import meta as _meta
|
||||
from jx_python.containers.list_usingPythonList import ListContainer as _ListContainer
|
||||
|
||||
_ = _ListContainer
|
||||
_ = _meta
|
||||
|
||||
try:
|
||||
from pyLibrary.queries.jx_usingMySQL import MySQL
|
||||
except Exception:
|
||||
MySQL = None
|
||||
|
||||
try:
|
||||
from jx_elasticsearch.meta import ElasticsearchMetadata
|
||||
except Exception:
|
||||
ElasticsearchSnowflake = None
|
||||
|
||||
set_default(container.type2container, {
|
||||
"mysql": MySQL,
|
||||
"memory": None,
|
||||
"meta": ElasticsearchMetadata
|
||||
})
|
||||
|
||||
|
||||
def find_container(frum, schema=None):
|
||||
"""
|
||||
:param frum:
|
||||
:param schema:
|
||||
:return:
|
||||
"""
|
||||
if not _meta:
|
||||
_delayed_imports()
|
||||
|
||||
frum = wrap(frum)
|
||||
|
||||
if isinstance(frum, text_type):
|
||||
if not container.config.default.settings:
|
||||
Log.error("expecting jx_base.container.config.default.settings to contain default elasticsearch connection info")
|
||||
|
||||
type_ = None
|
||||
if frum.startswith("meta."):
|
||||
if frum == "meta.columns":
|
||||
return _meta.singlton.meta.columns.denormalized()
|
||||
elif frum == "meta.tables":
|
||||
return _meta.singlton.meta.tables
|
||||
else:
|
||||
Log.error("{{name}} not a recognized table", name=frum)
|
||||
|
||||
type_ = container.config.default.type
|
||||
fact_table_name = split_field(frum)[0]
|
||||
|
||||
settings = set_default(
|
||||
{
|
||||
"index": fact_table_name,
|
||||
"name": frum,
|
||||
"exists": True,
|
||||
},
|
||||
container.config.default.settings
|
||||
)
|
||||
settings.type = None
|
||||
return container.type2container[type_](settings)
|
||||
elif isinstance(frum, Mapping) and frum.type and container.type2container[frum.type]:
|
||||
# TODO: Ensure the frum.name is set, so we capture the deep queries
|
||||
if not frum.type:
|
||||
Log.error("Expecting from clause to have a 'type' property")
|
||||
return container.type2container[frum.type](frum.settings)
|
||||
elif isinstance(frum, Mapping) and (frum["from"] or isinstance(frum["from"], (list, set))):
|
||||
from jx_base.query import QueryOp
|
||||
return QueryOp.wrap(frum, namespace=schema)
|
||||
elif isinstance(frum, (list, set)):
|
||||
return _ListContainer("test_list", frum)
|
||||
else:
|
||||
return frum
|
||||
|
||||
|
|
@ -322,16 +322,16 @@ class Cube(Container):
|
|||
|
||||
if isinstance(self.select, list):
|
||||
selects = listwrap(self.select)
|
||||
index, v = zip(*self.data[selects[0].name].groupby(selector))
|
||||
index, v = transpose(*self.data[selects[0].name].groupby(selector))
|
||||
|
||||
coord = wrap([coord2term(c) for c in index])
|
||||
|
||||
values = [v]
|
||||
for s in selects[1::]:
|
||||
i, v = zip(*self.data[s.name].group_by(selector))
|
||||
i, v = transpose(*self.data[s.name].group_by(selector))
|
||||
values.append(v)
|
||||
|
||||
output = zip(coord, [Cube(self.select, remainder, {s.name: v[i] for i, s in enumerate(selects)}) for v in zip(*values)])
|
||||
output = transpose(coord, [Cube(self.select, remainder, {s.name: v[i] for i, s in enumerate(selects)}) for v in zip(*values)])
|
||||
elif not remainder:
|
||||
# v IS A VALUE, NO NEED TO WRAP IT IN A Cube
|
||||
output = (
|
||||
|
@ -377,7 +377,7 @@ class Cube(Container):
|
|||
|
||||
if isinstance(self.select, list):
|
||||
selects = listwrap(self.select)
|
||||
index, v = zip(*self.data[selects[0].name].groupby(selector))
|
||||
index, v = transpose(*self.data[selects[0].name].groupby(selector))
|
||||
|
||||
coord = wrap([coord2term(c) for c in index])
|
||||
|
||||
|
@ -386,7 +386,7 @@ class Cube(Container):
|
|||
i, v = zip(*self.data[s.name].group_by(selector))
|
||||
values.append(v)
|
||||
|
||||
output = zip(coord, [Cube(self.select, remainder, {s.name: v[i] for i, s in enumerate(selects)}) for v in zip(*values)])
|
||||
output = transpose(coord, [Cube(self.select, remainder, {s.name: v[i] for i, s in enumerate(selects)}) for v in zip(*values)])
|
||||
elif not remainder:
|
||||
# v IS A VALUE, NO NEED TO WRAP IT IN A Cube
|
||||
output = (
|
||||
|
@ -409,7 +409,7 @@ class Cube(Container):
|
|||
|
||||
def window(self, window):
|
||||
if window.edges or window.sort:
|
||||
Log.error("not implemented")
|
||||
raise NotImplementedError()
|
||||
|
||||
from jx_python import jx
|
||||
|
||||
|
|
|
@ -192,7 +192,7 @@ class DocStore(Container):
|
|||
|
||||
def having(self, having):
|
||||
_ = having
|
||||
Log.error("not implemented")
|
||||
raise NotImplementedError()
|
||||
|
||||
def format(self, format):
|
||||
if format == "list":
|
||||
|
|
|
@ -14,8 +14,6 @@ from __future__ import unicode_literals
|
|||
import itertools
|
||||
from collections import Mapping
|
||||
|
||||
from mo_math import UNION
|
||||
|
||||
import jx_base
|
||||
from jx_base import Container
|
||||
from jx_base.expressions import jx_expression, Expression, Variable, TRUE
|
||||
|
@ -207,7 +205,7 @@ class ListContainer(Container, jx_base.Namespace, jx_base.Table):
|
|||
|
||||
def having(self, having):
|
||||
_ = having
|
||||
Log.error("not implemented")
|
||||
raise NotImplementedError()
|
||||
|
||||
def format(self, format):
|
||||
if format == "table":
|
||||
|
|
|
@ -12,12 +12,11 @@ from __future__ import absolute_import
|
|||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from mo_times import Date
|
||||
|
||||
_range = range
|
||||
|
||||
from mo_times import Date
|
||||
from collections import Mapping
|
||||
|
||||
from jx_base import query
|
||||
from jx_python import expressions as _expressions
|
||||
from jx_python import flat_list, group_by
|
||||
|
|
|
@ -14,16 +14,16 @@ from __future__ import unicode_literals
|
|||
from collections import Mapping
|
||||
from datetime import date
|
||||
from datetime import datetime
|
||||
from decimal import Decimal
|
||||
|
||||
import jx_base
|
||||
from jx_base import python_type_to_json_type
|
||||
from jx_base import STRUCT, Column, Table
|
||||
from jx_base import Column, Table
|
||||
from jx_base.schema import Schema
|
||||
from jx_python import jx
|
||||
from mo_collections import UniqueIndex
|
||||
from mo_dots import Data, concat_field, get_attr, listwrap, unwraplist, NullType, FlatList, set_default, split_field, join_field, ROOT_PATH, wrap, coalesce
|
||||
from mo_dots import Data, concat_field, listwrap, unwraplist, NullType, FlatList, set_default, split_field, join_field, ROOT_PATH, wrap, coalesce
|
||||
from mo_future import none_type, text_type, long, PY2
|
||||
from mo_json.typed_encoder import untype_path, unnest_path
|
||||
from mo_json.typed_encoder import untype_path, unnest_path, python_type_to_json_type, STRUCT
|
||||
from mo_logs import Log
|
||||
from mo_threads import Lock
|
||||
from mo_times.dates import Date
|
||||
|
@ -31,7 +31,7 @@ from mo_times.dates import Date
|
|||
singlton = None
|
||||
|
||||
|
||||
class ColumnList(Table):
|
||||
class ColumnList(Table, jx_base.Container):
|
||||
"""
|
||||
OPTIMIZED FOR THE PARTICULAR ACCESS PATTERNS USED
|
||||
"""
|
||||
|
@ -89,24 +89,22 @@ class ColumnList(Table):
|
|||
values = set()
|
||||
objects = 0
|
||||
multi = 1
|
||||
for t, cs in self.data.items():
|
||||
for c, css in cs.items():
|
||||
for column in css:
|
||||
value = column[mc.names["."]]
|
||||
if value == None:
|
||||
pass
|
||||
else:
|
||||
count += 1
|
||||
if isinstance(value, list):
|
||||
multi = max(multi, len(value))
|
||||
try:
|
||||
values |= set(value)
|
||||
except Exception:
|
||||
objects += len(value)
|
||||
elif isinstance(value, Mapping):
|
||||
objects += 1
|
||||
else:
|
||||
values.add(value)
|
||||
for column in self._all_columns():
|
||||
value = column[mc.names["."]]
|
||||
if value == None:
|
||||
pass
|
||||
else:
|
||||
count += 1
|
||||
if isinstance(value, list):
|
||||
multi = max(multi, len(value))
|
||||
try:
|
||||
values |= set(value)
|
||||
except Exception:
|
||||
objects += len(value)
|
||||
elif isinstance(value, Mapping):
|
||||
objects += 1
|
||||
else:
|
||||
values.add(value)
|
||||
mc.count = count
|
||||
mc.cardinality = len(values) + objects
|
||||
mc.partitions = jx.sort(values)
|
||||
|
@ -114,12 +112,18 @@ class ColumnList(Table):
|
|||
mc.last_updated = Date.now()
|
||||
self.dirty = False
|
||||
|
||||
def _all_columns(self):
|
||||
return [
|
||||
column
|
||||
for t, cs in self.data.items()
|
||||
for _, css in cs.items()
|
||||
for column in css
|
||||
]
|
||||
|
||||
def __iter__(self):
|
||||
self._update_meta()
|
||||
for t, cs in self.data.items():
|
||||
for c, css in cs.items():
|
||||
for column in css:
|
||||
yield column
|
||||
with self.locker:
|
||||
self._update_meta()
|
||||
return iter(self._all_columns())
|
||||
|
||||
def __len__(self):
|
||||
return self.data['meta.columns']['es_index'].count
|
||||
|
@ -130,22 +134,49 @@ class ColumnList(Table):
|
|||
command = wrap(command)
|
||||
eq = command.where.eq
|
||||
if eq.es_index:
|
||||
columns = self.find(eq.es_index, eq.name)
|
||||
columns = [
|
||||
c
|
||||
for c in columns
|
||||
if all(get_attr(c, k) == v for k, v in eq.items())
|
||||
]
|
||||
all_columns = self.data.get(eq.es_index, {}).values()
|
||||
if len(eq) == 1:
|
||||
# FASTEST
|
||||
with self.locker:
|
||||
columns = [
|
||||
c
|
||||
for cs in all_columns
|
||||
for c in cs
|
||||
]
|
||||
elif eq.es_column and len(eq) == 2:
|
||||
# FASTER
|
||||
with self.locker:
|
||||
columns = [
|
||||
c
|
||||
for cs in all_columns
|
||||
for c in cs
|
||||
if c.es_column == eq.es_column
|
||||
]
|
||||
|
||||
else:
|
||||
# SLOWER
|
||||
with self.locker:
|
||||
columns = [
|
||||
c
|
||||
for cs in all_columns
|
||||
for c in cs
|
||||
if all(c[k] == v for k, v in eq.items()) # THIS LINE IS VERY SLOW
|
||||
]
|
||||
else:
|
||||
with self.locker:
|
||||
columns = list(self)
|
||||
columns = jx.filter(columns, command.where)
|
||||
columns = list(self)
|
||||
columns = jx.filter(columns, command.where)
|
||||
|
||||
with self.locker:
|
||||
for col in list(columns):
|
||||
for col in columns:
|
||||
for k in command["clear"]:
|
||||
if k == ".":
|
||||
columns.remove(col)
|
||||
lst = self.data[col.es_index]
|
||||
cols = lst[col.names['.']]
|
||||
cols.remove(col)
|
||||
if len(cols) == 0:
|
||||
del lst[col.names['.']]
|
||||
if len(lst) == 0:
|
||||
del self.data[col.es_index]
|
||||
else:
|
||||
col[k] = None
|
||||
|
||||
|
@ -155,12 +186,17 @@ class ColumnList(Table):
|
|||
Log.error("should not happen", cause=e)
|
||||
|
||||
def query(self, query):
|
||||
# NOT EXPECTED TO BE RUN
|
||||
Log.error("not")
|
||||
with self.locker:
|
||||
self._update_meta()
|
||||
query.frum = self.__iter__()
|
||||
output = jx.run(query)
|
||||
if not self._schema:
|
||||
self._schema = Schema(".", [c for cs in self.data["meta.columns"].values() for c in cs])
|
||||
snapshot = self._all_columns()
|
||||
|
||||
return output
|
||||
from jx_python.containers.list_usingPythonList import ListContainer
|
||||
query.frum = ListContainer("meta.columns", snapshot, self._schema)
|
||||
return jx.run(query)
|
||||
|
||||
def groupby(self, keys):
|
||||
with self.locker:
|
||||
|
@ -179,6 +215,11 @@ class ColumnList(Table):
|
|||
def namespace(self):
|
||||
return self
|
||||
|
||||
def get_table(self, table_name):
|
||||
if table_name != "meta.columns":
|
||||
Log.error("this container has only the meta.columns")
|
||||
return self
|
||||
|
||||
def denormalized(self):
|
||||
"""
|
||||
THE INTERNAL STRUCTURE FOR THE COLUMN METADATA IS VERY DIFFERENT FROM
|
||||
|
@ -374,6 +415,7 @@ _type_to_name = {
|
|||
list: "nested",
|
||||
FlatList: "nested",
|
||||
Date: "double",
|
||||
Decimal: "double",
|
||||
datetime: "double",
|
||||
date: "double"
|
||||
}
|
||||
|
|
|
@ -232,7 +232,7 @@ class Max(WindowFunction):
|
|||
self.max = mo_math.MAX([self.max, value])
|
||||
|
||||
def sub(self, value):
|
||||
Log.error("Not implemented")
|
||||
raise NotImplementedError()
|
||||
|
||||
def end(self):
|
||||
return self.max
|
||||
|
|
|
@ -38,7 +38,7 @@ class Index(object):
|
|||
try:
|
||||
if isinstance(key, (list, tuple)) and len(key) < len(self._keys):
|
||||
# RETURN ANOTHER Index
|
||||
Log.error("not implemented")
|
||||
raise NotImplementedError()
|
||||
|
||||
key = value2key(self._keys, key)
|
||||
return wrap(copy(self._data.get(key, [])))
|
||||
|
|
|
@ -11,7 +11,7 @@ from __future__ import absolute_import
|
|||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from mo_future import text_type
|
||||
from mo_future import text_type, xrange
|
||||
from mo_dots import Null, Data, coalesce, get_module
|
||||
from mo_kwargs import override
|
||||
from mo_logs import Log
|
||||
|
@ -335,18 +335,18 @@ def _getitem(c, i):
|
|||
return (len(c), ), c
|
||||
elif isinstance(select, slice):
|
||||
sub = c[select]
|
||||
dims, cube = zip(*[_getitem(cc, i[1::]) for cc in sub])
|
||||
dims, cube = transpose(*[_getitem(cc, i[1::]) for cc in sub])
|
||||
return (len(cube),) + dims[0], cube
|
||||
else:
|
||||
return (), c[select]
|
||||
else:
|
||||
select = i[0]
|
||||
if select == None:
|
||||
dims, cube = zip(*[_getitem(cc, i[1::]) for cc in c])
|
||||
dims, cube = transpose(*[_getitem(cc, i[1::]) for cc in c])
|
||||
return (len(cube),)+dims[0], cube
|
||||
elif isinstance(select, slice):
|
||||
sub = c[select]
|
||||
dims, cube = zip(*[_getitem(cc, i[1::]) for cc in sub])
|
||||
dims, cube = transpose(*[_getitem(cc, i[1::]) for cc in sub])
|
||||
return (len(cube),)+dims[0], cube
|
||||
else:
|
||||
with suppress_exception:
|
||||
|
|
|
@ -93,7 +93,6 @@ class PersistentQueue(object):
|
|||
yield value
|
||||
except Exception as e:
|
||||
Log.warning("Tell me about what happened here", cause=e)
|
||||
DEBUG and Log.note("queue iterator is done")
|
||||
|
||||
def add(self, value):
|
||||
with self.lock:
|
||||
|
|
|
@ -13,6 +13,8 @@ from __future__ import unicode_literals
|
|||
from __future__ import division
|
||||
from __future__ import absolute_import
|
||||
|
||||
from collections import deque
|
||||
|
||||
|
||||
class Queue(object):
|
||||
"""
|
||||
|
@ -29,14 +31,28 @@ class Queue(object):
|
|||
"""
|
||||
def __init__(self):
|
||||
self.set = set()
|
||||
self.list = []
|
||||
self.list = deque()
|
||||
|
||||
def __nonzero__(self):
|
||||
return len(self.list) > 0
|
||||
|
||||
def __contains__(self, value):
|
||||
return value in self.set
|
||||
|
||||
def __len__(self):
|
||||
return self.list.__len__()
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self.list)
|
||||
|
||||
def __rsub__(self, other):
|
||||
if isinstance(other, set):
|
||||
return other - self.set
|
||||
return set(o for o in other if o not in self.set)
|
||||
|
||||
def __data__(self):
|
||||
return list(self.list)
|
||||
|
||||
def add(self, value):
|
||||
if value in self.set:
|
||||
return self
|
||||
|
@ -44,7 +60,12 @@ class Queue(object):
|
|||
self.list.append(value)
|
||||
|
||||
def push(self, value):
|
||||
self.add(value)
|
||||
if value in self.set:
|
||||
self.list.remove(value)
|
||||
else:
|
||||
self.set.add(value)
|
||||
|
||||
self.list.appendleft(value)
|
||||
|
||||
def extend(self, values):
|
||||
for v in values:
|
||||
|
@ -54,7 +75,6 @@ class Queue(object):
|
|||
if len(self.list) == 0:
|
||||
return None
|
||||
|
||||
output = self.list.pop(0)
|
||||
output = self.list.popleft()
|
||||
self.set.remove(output)
|
||||
return output
|
||||
|
||||
|
|
|
@ -208,7 +208,7 @@ def _all_default(d, default, seen=None):
|
|||
if default is None:
|
||||
return
|
||||
if isinstance(default, Data):
|
||||
default = object.__getattribute__(default, b"_dict") # REACH IN AND GET THE dict
|
||||
default = object.__getattribute__(default, SLOT) # REACH IN AND GET THE dict
|
||||
# Log = _late_import()
|
||||
# Log.error("strictly dict (or object) allowed: got {{type}}", type=default.__class__.__name__)
|
||||
|
||||
|
@ -417,11 +417,11 @@ def wrap(v):
|
|||
:return: Data INSTANCE
|
||||
"""
|
||||
|
||||
type_ = _get(v, "__class__")
|
||||
type_ = v.__class__
|
||||
|
||||
if type_ is dict:
|
||||
m = object.__new__(Data)
|
||||
_set(m, "_dict", v)
|
||||
_set(m, SLOT, v)
|
||||
return m
|
||||
elif type_ is none_type:
|
||||
return Null
|
||||
|
@ -489,7 +489,7 @@ def _wrap_leaves(value):
|
|||
def unwrap(v):
|
||||
_type = _get(v, "__class__")
|
||||
if _type is Data:
|
||||
d = _get(v, "_dict")
|
||||
d = _get(v, SLOT)
|
||||
return d
|
||||
elif _type is FlatList:
|
||||
return v.list
|
||||
|
@ -569,6 +569,6 @@ def tuplewrap(value):
|
|||
|
||||
|
||||
from mo_dots.nones import Null, NullType
|
||||
from mo_dots.datas import Data
|
||||
from mo_dots.datas import Data, SLOT
|
||||
from mo_dots.lists import FlatList
|
||||
from mo_dots.objects import DataObject
|
||||
|
|
|
@ -14,13 +14,15 @@ from __future__ import unicode_literals
|
|||
from collections import MutableMapping, Mapping
|
||||
from copy import deepcopy
|
||||
|
||||
from mo_dots.lists import FlatList
|
||||
|
||||
from mo_dots import _getdefault, hash_value, literal_field, coalesce, listwrap, get_logger
|
||||
from mo_future import text_type, PY2
|
||||
from mo_future import text_type, PY2, iteritems, none_type, generator_types
|
||||
|
||||
_get = object.__getattribute__
|
||||
_set = object.__setattr__
|
||||
|
||||
|
||||
SLOT = str("_internal_dict")
|
||||
DEBUG = False
|
||||
|
||||
|
||||
|
@ -29,7 +31,7 @@ class Data(MutableMapping):
|
|||
Please see README.md
|
||||
"""
|
||||
|
||||
__slots__ = ["_dict"]
|
||||
__slots__ = [SLOT]
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
"""
|
||||
|
@ -37,59 +39,59 @@ class Data(MutableMapping):
|
|||
IS UNLIKELY TO BE USEFUL. USE wrap() INSTEAD
|
||||
"""
|
||||
if DEBUG:
|
||||
d = _get(self, "_dict")
|
||||
d = self._internal_dict
|
||||
for k, v in kwargs.items():
|
||||
d[literal_field(k)] = unwrap(v)
|
||||
else:
|
||||
if args:
|
||||
args0 = args[0]
|
||||
if isinstance(args0, Data):
|
||||
_set(self, "_dict", _get(args0, "_dict"))
|
||||
_set(self, SLOT, _get(args0, SLOT))
|
||||
elif isinstance(args0, dict):
|
||||
_set(self, "_dict", args0)
|
||||
_set(self, SLOT, args0)
|
||||
else:
|
||||
_set(self, "_dict", dict(args0))
|
||||
_set(self, SLOT, dict(args0))
|
||||
elif kwargs:
|
||||
_set(self, "_dict", unwrap(kwargs))
|
||||
_set(self, SLOT, unwrap(kwargs))
|
||||
else:
|
||||
_set(self, "_dict", {})
|
||||
_set(self, SLOT, {})
|
||||
|
||||
def __bool__(self):
|
||||
d = _get(self, "_dict")
|
||||
d = self._internal_dict
|
||||
if isinstance(d, dict):
|
||||
return bool(d)
|
||||
else:
|
||||
return d != None
|
||||
|
||||
def __nonzero__(self):
|
||||
d = _get(self, "_dict")
|
||||
d = self._internal_dict
|
||||
if isinstance(d, dict):
|
||||
return True if d else False
|
||||
else:
|
||||
return d != None
|
||||
|
||||
def __contains__(self, item):
|
||||
if Data.__getitem__(self, item):
|
||||
value = Data.__getitem__(self, item)
|
||||
if isinstance(value, Mapping) or value:
|
||||
return True
|
||||
return False
|
||||
|
||||
def __iter__(self):
|
||||
d = _get(self, "_dict")
|
||||
d = self._internal_dict
|
||||
return d.__iter__()
|
||||
|
||||
def __getitem__(self, key):
|
||||
if key == None:
|
||||
return Null
|
||||
if key == ".":
|
||||
output = _get(self, "_dict")
|
||||
output = self._internal_dict
|
||||
if isinstance(output, Mapping):
|
||||
return self
|
||||
else:
|
||||
return output
|
||||
|
||||
key = text_type(key)
|
||||
|
||||
d = _get(self, "_dict")
|
||||
d = self._internal_dict
|
||||
|
||||
if key.find(".") >= 0:
|
||||
seq = _split_field(key)
|
||||
|
@ -118,11 +120,11 @@ class Data(MutableMapping):
|
|||
# SOMETHING TERRIBLE HAPPENS WHEN value IS NOT A Mapping;
|
||||
# HOPEFULLY THE ONLY OTHER METHOD RUN ON self IS unwrap()
|
||||
v = unwrap(value)
|
||||
_set(self, "_dict", v)
|
||||
_set(self, SLOT, v)
|
||||
return v
|
||||
|
||||
try:
|
||||
d = _get(self, "_dict")
|
||||
d = self._internal_dict
|
||||
value = unwrap(value)
|
||||
if key.find(".") == -1:
|
||||
if value is None:
|
||||
|
@ -148,31 +150,43 @@ class Data(MutableMapping):
|
|||
raise e
|
||||
|
||||
def __getattr__(self, key):
|
||||
d = _get(self, "_dict")
|
||||
o = d.get(key)
|
||||
if o == None:
|
||||
d = self._internal_dict
|
||||
v = d.get(key)
|
||||
t = v.__class__
|
||||
|
||||
# OPTIMIZED wrap()
|
||||
if t is dict:
|
||||
m = object.__new__(Data)
|
||||
_set(m, SLOT, v)
|
||||
return m
|
||||
elif t in (none_type, NullType):
|
||||
return NullType(d, key)
|
||||
return wrap(o)
|
||||
elif t is list:
|
||||
return FlatList(v)
|
||||
elif t in generator_types:
|
||||
return FlatList(list(unwrap(vv) for vv in v))
|
||||
else:
|
||||
return v
|
||||
|
||||
def __setattr__(self, key, value):
|
||||
d = _get(self, "_dict")
|
||||
d = self._internal_dict
|
||||
value = unwrap(value)
|
||||
if value is None:
|
||||
d = _get(self, "_dict")
|
||||
d = self._internal_dict
|
||||
d.pop(key, None)
|
||||
else:
|
||||
d[key] = value
|
||||
return self
|
||||
|
||||
def __hash__(self):
|
||||
d = _get(self, "_dict")
|
||||
d = self._internal_dict
|
||||
return hash_value(d)
|
||||
|
||||
def __eq__(self, other):
|
||||
if self is other:
|
||||
return True
|
||||
|
||||
d = _get(self, "_dict")
|
||||
d = self._internal_dict
|
||||
if not isinstance(d, dict):
|
||||
return d == other
|
||||
|
||||
|
@ -194,11 +208,11 @@ class Data(MutableMapping):
|
|||
return not self.__eq__(other)
|
||||
|
||||
def get(self, key, default=None):
|
||||
d = _get(self, "_dict")
|
||||
d = self._internal_dict
|
||||
return d.get(key, default)
|
||||
|
||||
def items(self):
|
||||
d = _get(self, "_dict")
|
||||
d = self._internal_dict
|
||||
return [(k, wrap(v)) for k, v in d.items() if v != None or isinstance(v, Mapping)]
|
||||
|
||||
def leaves(self, prefix=None):
|
||||
|
@ -209,42 +223,42 @@ class Data(MutableMapping):
|
|||
|
||||
def iteritems(self):
|
||||
# LOW LEVEL ITERATION, NO WRAPPING
|
||||
d = _get(self, "_dict")
|
||||
d = self._internal_dict
|
||||
return ((k, wrap(v)) for k, v in iteritems(d))
|
||||
|
||||
def keys(self):
|
||||
d = _get(self, "_dict")
|
||||
d = self._internal_dict
|
||||
return set(d.keys())
|
||||
|
||||
def values(self):
|
||||
d = _get(self, "_dict")
|
||||
d = self._internal_dict
|
||||
return listwrap(list(d.values()))
|
||||
|
||||
def clear(self):
|
||||
get_logger().error("clear() not supported")
|
||||
|
||||
def __len__(self):
|
||||
d = _get(self, "_dict")
|
||||
d = self._internal_dict
|
||||
return dict.__len__(d)
|
||||
|
||||
def copy(self):
|
||||
return Data(**self)
|
||||
|
||||
def __copy__(self):
|
||||
d = _get(self, "_dict")
|
||||
d = self._internal_dict
|
||||
return Data(**d)
|
||||
|
||||
def __deepcopy__(self, memo):
|
||||
d = _get(self, "_dict")
|
||||
d = self._internal_dict
|
||||
return wrap(deepcopy(d, memo))
|
||||
|
||||
def __delitem__(self, key):
|
||||
if key.find(".") == -1:
|
||||
d = _get(self, "_dict")
|
||||
d = self._internal_dict
|
||||
d.pop(key, None)
|
||||
return
|
||||
|
||||
d = _get(self, "_dict")
|
||||
d = self._internal_dict
|
||||
seq = _split_field(key)
|
||||
for k in seq[:-1]:
|
||||
d = d[k]
|
||||
|
@ -252,7 +266,7 @@ class Data(MutableMapping):
|
|||
|
||||
def __delattr__(self, key):
|
||||
key = text_type(key)
|
||||
d = _get(self, "_dict")
|
||||
d = self._internal_dict
|
||||
d.pop(key, None)
|
||||
|
||||
def setdefault(self, k, d=None):
|
||||
|
@ -262,13 +276,13 @@ class Data(MutableMapping):
|
|||
|
||||
def __str__(self):
|
||||
try:
|
||||
return dict.__str__(_get(self, "_dict"))
|
||||
return dict.__str__(self._internal_dict)
|
||||
except Exception:
|
||||
return "{}"
|
||||
|
||||
def __repr__(self):
|
||||
try:
|
||||
return "Data("+dict.__repr__(_get(self, "_dict"))+")"
|
||||
return "Data("+dict.__repr__(self._internal_dict)+")"
|
||||
except Exception as e:
|
||||
return "Data()"
|
||||
|
||||
|
@ -449,7 +463,7 @@ class _DictUsingSelf(dict):
|
|||
get_logger().error("clear() not supported")
|
||||
|
||||
def __len__(self):
|
||||
d = _get(self, "_dict")
|
||||
d = self._internal_dict
|
||||
return d.__len__()
|
||||
|
||||
def copy(self):
|
||||
|
|
|
@ -17,6 +17,7 @@ from mo_dots import wrap, unwrap, coalesce
|
|||
from mo_dots.nones import Null
|
||||
|
||||
_get = object.__getattribute__
|
||||
_get_list = lambda self: _get(self, "list")
|
||||
_set = object.__setattr__
|
||||
_emit_slice_warning = True
|
||||
|
||||
|
@ -62,7 +63,7 @@ class FlatList(list):
|
|||
if not Log:
|
||||
_late_import()
|
||||
Log.error("slice step must be None, do not know how to deal with values")
|
||||
length = len(_get(self, "list"))
|
||||
length = len(_get_list(self))
|
||||
|
||||
i = index.start
|
||||
if i is None:
|
||||
|
@ -74,15 +75,15 @@ class FlatList(list):
|
|||
j = length
|
||||
else:
|
||||
j = max(min(j, length), 0)
|
||||
return FlatList(_get(self, "list")[i:j])
|
||||
return FlatList(_get_list(self)[i:j])
|
||||
|
||||
if index < 0 or len(_get(self, "list")) <= index:
|
||||
if index < 0 or len(_get_list(self)) <= index:
|
||||
return Null
|
||||
return wrap(_get(self, "list")[index])
|
||||
return wrap(_get_list(self)[index])
|
||||
|
||||
def __setitem__(self, i, y):
|
||||
try:
|
||||
_list = _get(self, "list")
|
||||
_list = _get_list(self)
|
||||
if i <= len(_list):
|
||||
for i in range(len(_list), i):
|
||||
_list.append(None)
|
||||
|
@ -109,7 +110,7 @@ class FlatList(list):
|
|||
if not Log:
|
||||
_late_import()
|
||||
|
||||
return FlatList(vals=[unwrap(coalesce(_datawrap(v), Null)[key]) for v in _get(self, "list")])
|
||||
return FlatList(vals=[unwrap(coalesce(_datawrap(v), Null)[key]) for v in _get_list(self)])
|
||||
|
||||
def select(self, key):
|
||||
if not Log:
|
||||
|
@ -117,7 +118,7 @@ class FlatList(list):
|
|||
Log.error("Not supported. Use `get()`")
|
||||
|
||||
def filter(self, _filter):
|
||||
return FlatList(vals=[unwrap(u) for u in (wrap(v) for v in _get(self, "list")) if _filter(u)])
|
||||
return FlatList(vals=[unwrap(u) for u in (wrap(v) for v in _get_list(self)) if _filter(u)])
|
||||
|
||||
def __delslice__(self, i, j):
|
||||
if not Log:
|
||||
|
@ -128,20 +129,21 @@ class FlatList(list):
|
|||
self.list = []
|
||||
|
||||
def __iter__(self):
|
||||
return (wrap(v) for v in _get(self, "list"))
|
||||
temp = [wrap(v) for v in _get_list(self)]
|
||||
return iter(temp)
|
||||
|
||||
def __contains__(self, item):
|
||||
return list.__contains__(_get(self, "list"), item)
|
||||
return list.__contains__(_get_list(self), item)
|
||||
|
||||
def append(self, val):
|
||||
_get(self, "list").append(unwrap(val))
|
||||
_get_list(self).append(unwrap(val))
|
||||
return self
|
||||
|
||||
def __str__(self):
|
||||
return _get(self, "list").__str__()
|
||||
return _get_list(self).__str__()
|
||||
|
||||
def __len__(self):
|
||||
return _get(self, "list").__len__()
|
||||
return _get_list(self).__len__()
|
||||
|
||||
def __getslice__(self, i, j):
|
||||
global _emit_slice_warning
|
||||
|
@ -157,45 +159,59 @@ class FlatList(list):
|
|||
return self.list
|
||||
|
||||
def copy(self):
|
||||
return FlatList(list(_get(self, "list")))
|
||||
return FlatList(list(_get_list(self)))
|
||||
|
||||
def __copy__(self):
|
||||
return FlatList(list(_get(self, "list")))
|
||||
return FlatList(list(_get_list(self)))
|
||||
|
||||
def __deepcopy__(self, memo):
|
||||
d = _get(self, "list")
|
||||
d = _get_list(self)
|
||||
return wrap(deepcopy(d, memo))
|
||||
|
||||
def remove(self, x):
|
||||
_get(self, "list").remove(x)
|
||||
_get_list(self).remove(x)
|
||||
return self
|
||||
|
||||
def extend(self, values):
|
||||
lst = _get_list(self)
|
||||
for v in values:
|
||||
_get(self, "list").append(unwrap(v))
|
||||
lst.append(unwrap(v))
|
||||
return self
|
||||
|
||||
def pop(self, index=None):
|
||||
if index is None:
|
||||
return wrap(_get(self, "list").pop())
|
||||
return wrap(_get_list(self).pop())
|
||||
else:
|
||||
return wrap(_get(self, "list").pop(index))
|
||||
return wrap(_get_list(self).pop(index))
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, FlatList):
|
||||
other = _get_list(other)
|
||||
lst = _get_list(self)
|
||||
if other == None and len(lst) == 0:
|
||||
return True
|
||||
if not isinstance(other, list):
|
||||
return False
|
||||
if len(lst) != len(other):
|
||||
return False
|
||||
return all([s == o for s, o in zip(lst, other)])
|
||||
|
||||
|
||||
def __add__(self, value):
|
||||
if value == None:
|
||||
return self
|
||||
output = list(_get(self, "list"))
|
||||
output = list(_get_list(self))
|
||||
output.extend(value)
|
||||
return FlatList(vals=output)
|
||||
|
||||
def __or__(self, value):
|
||||
output = list(_get(self, "list"))
|
||||
output = list(_get_list(self))
|
||||
output.append(value)
|
||||
return FlatList(vals=output)
|
||||
|
||||
def __radd__(self, other):
|
||||
output = list(other)
|
||||
output.extend(_get(self, "list"))
|
||||
output.extend(_get_list(self))
|
||||
return FlatList(vals=output)
|
||||
|
||||
def __iadd__(self, other):
|
||||
|
@ -210,59 +226,59 @@ class FlatList(list):
|
|||
WITH SLICES BEING FLAT, WE NEED A SIMPLE WAY TO SLICE FROM THE RIGHT [-num:]
|
||||
"""
|
||||
if num == None:
|
||||
return FlatList([_get(self, "list")[-1]])
|
||||
return FlatList([_get_list(self)[-1]])
|
||||
if num <= 0:
|
||||
return Null
|
||||
|
||||
return FlatList(_get(self, "list")[-num:])
|
||||
return FlatList(_get_list(self)[-num:])
|
||||
|
||||
def left(self, num=None):
|
||||
"""
|
||||
NOT REQUIRED, BUT EXISTS AS OPPOSITE OF right()
|
||||
"""
|
||||
if num == None:
|
||||
return FlatList([_get(self, "list")[0]])
|
||||
return FlatList([_get_list(self)[0]])
|
||||
if num <= 0:
|
||||
return Null
|
||||
|
||||
return FlatList(_get(self, "list")[:num])
|
||||
return FlatList(_get_list(self)[:num])
|
||||
|
||||
def not_right(self, num):
|
||||
"""
|
||||
WITH SLICES BEING FLAT, WE NEED A SIMPLE WAY TO SLICE FROM THE LEFT [:-num:]
|
||||
"""
|
||||
if num == None:
|
||||
return FlatList([_get(self, "list")[:-1:]])
|
||||
return FlatList([_get_list(self)[:-1:]])
|
||||
if num <= 0:
|
||||
return FlatList.EMPTY
|
||||
|
||||
return FlatList(_get(self, "list")[:-num:])
|
||||
return FlatList(_get_list(self)[:-num:])
|
||||
|
||||
def not_left(self, num):
|
||||
"""
|
||||
NOT REQUIRED, EXISTS AS OPPOSITE OF not_right()
|
||||
"""
|
||||
if num == None:
|
||||
return FlatList([_get(self, "list")[-1]])
|
||||
return FlatList([_get_list(self)[-1]])
|
||||
if num <= 0:
|
||||
return self
|
||||
|
||||
return FlatList(_get(self, "list")[num::])
|
||||
return FlatList(_get_list(self)[num::])
|
||||
|
||||
def last(self):
|
||||
"""
|
||||
RETURN LAST ELEMENT IN FlatList [-1]
|
||||
"""
|
||||
lst = _get(self, "list")
|
||||
lst = _get_list(self)
|
||||
if lst:
|
||||
return wrap(lst[-1])
|
||||
return Null
|
||||
|
||||
def map(self, oper, includeNone=True):
|
||||
if includeNone:
|
||||
return FlatList([oper(v) for v in _get(self, "list")])
|
||||
return FlatList([oper(v) for v in _get_list(self)])
|
||||
else:
|
||||
return FlatList([oper(v) for v in _get(self, "list") if v != None])
|
||||
return FlatList([oper(v) for v in _get_list(self) if v != None])
|
||||
|
||||
|
||||
FlatList.EMPTY = Null
|
||||
|
|
|
@ -177,7 +177,11 @@ class NullType(object):
|
|||
v = o.get(k)
|
||||
if v == None:
|
||||
return NullType(self, key)
|
||||
return wrap(v.get(key))
|
||||
try:
|
||||
return wrap(v.get(key))
|
||||
except Exception as e:
|
||||
from mo_logs import Log
|
||||
Log.error("not expected", cause=e)
|
||||
|
||||
def __setattr__(self, key, value):
|
||||
key = text_type(key)
|
||||
|
@ -223,6 +227,7 @@ class NullType(object):
|
|||
def __hash__(self):
|
||||
return hash(None)
|
||||
|
||||
|
||||
Null = NullType() # INSTEAD OF None!!!
|
||||
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@ from collections import Mapping
|
|||
from datetime import date, datetime
|
||||
from decimal import Decimal
|
||||
|
||||
from mo_dots import wrap, unwrap, Data, FlatList, NullType, get_attr, set_attr
|
||||
from mo_dots import wrap, unwrap, Data, FlatList, NullType, get_attr, set_attr, SLOT
|
||||
from mo_future import text_type, binary_type, get_function_defaults, get_function_arguments, none_type, generator_types
|
||||
|
||||
_get = object.__getattribute__
|
||||
|
@ -103,7 +103,7 @@ def datawrap(v):
|
|||
|
||||
if type_ is dict:
|
||||
m = Data()
|
||||
_set(m, "_dict", v) # INJECT m.__dict__=v SO THERE IS NO COPY
|
||||
_set(m, SLOT, v) # INJECT m.__dict__=v SO THERE IS NO COPY
|
||||
return m
|
||||
elif type_ is Data:
|
||||
return v
|
||||
|
@ -127,7 +127,7 @@ def datawrap(v):
|
|||
|
||||
class DictClass(object):
|
||||
"""
|
||||
ALLOW INSTANCES OF class_ TO ACK LIKE dicts
|
||||
ALLOW INSTANCES OF class_ TO ACT LIKE dicts
|
||||
ALLOW CONSTRUCTOR TO ACCEPT @override
|
||||
"""
|
||||
|
||||
|
|
|
@ -14,9 +14,17 @@ from __future__ import unicode_literals
|
|||
import importlib
|
||||
import sys
|
||||
|
||||
from mo_future import PY2
|
||||
|
||||
_Log = None
|
||||
|
||||
if PY2:
|
||||
STDOUT = sys.stdout
|
||||
STDERR = sys.stderr
|
||||
else:
|
||||
STDOUT = sys.stdout.buffer
|
||||
STDERR = sys.stderr.buffer
|
||||
|
||||
|
||||
def get_logger():
|
||||
global _Log
|
||||
|
@ -31,6 +39,7 @@ def get_logger():
|
|||
return _Log
|
||||
|
||||
|
||||
|
||||
def get_module(name):
|
||||
try:
|
||||
return importlib.import_module(name)
|
||||
|
@ -39,16 +48,19 @@ def get_module(name):
|
|||
|
||||
|
||||
class PoorLogger(object):
|
||||
def note(self, note, **kwargs):
|
||||
sys.stdout.write(note+"\n")
|
||||
@classmethod
|
||||
def note(cls, note, **kwargs):
|
||||
STDOUT.write(note.encode('utf8')+b"\n")
|
||||
|
||||
def warning(self, note, **kwargs):
|
||||
sys.stdout.write("WARNING: "+note+"\n")
|
||||
@classmethod
|
||||
def warning(cls, note, **kwargs):
|
||||
STDOUT.write(b"WARNING: " + note.encode('utf8') + b"\n")
|
||||
|
||||
def error(self, note, **kwargs):
|
||||
sys.stderr.write(note)
|
||||
if "cause" in kwargs:
|
||||
raise kwargs["cause"]
|
||||
@classmethod
|
||||
def error(cls, note, **kwargs):
|
||||
STDERR.write(note.encode('utf8'))
|
||||
if str("cause") in kwargs:
|
||||
raise kwargs[str("cause")]
|
||||
else:
|
||||
raise Exception(note)
|
||||
|
||||
|
|
|
@ -9,22 +9,22 @@
|
|||
#
|
||||
import base64
|
||||
import io
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
from datetime import datetime
|
||||
from mimetypes import MimeTypes
|
||||
from tempfile import mkdtemp, NamedTemporaryFile
|
||||
|
||||
import os
|
||||
|
||||
from mo_future import text_type, binary_type
|
||||
from mo_dots import get_module, coalesce, Null
|
||||
from mo_future import text_type, binary_type, PY3
|
||||
from mo_logs import Log, Except
|
||||
from mo_logs.exceptions import extract_stack
|
||||
from mo_threads import Thread, Till
|
||||
|
||||
mime = MimeTypes()
|
||||
|
||||
|
||||
class File(object):
|
||||
"""
|
||||
ASSUMES ALL FILE CONTENT IS UTF8 ENCODED STRINGS
|
||||
|
@ -48,27 +48,32 @@ class File(object):
|
|||
elif isinstance(filename, File):
|
||||
return
|
||||
elif isinstance(filename, (binary_type, text_type)):
|
||||
self.key = None
|
||||
if filename==".":
|
||||
self._filename = ""
|
||||
elif filename.startswith("~"):
|
||||
home_path = os.path.expanduser("~")
|
||||
if os.sep == "\\":
|
||||
home_path = home_path.replace(os.sep, "/")
|
||||
if home_path.endswith("/"):
|
||||
home_path = home_path[:-1]
|
||||
filename = home_path + filename[1::]
|
||||
self._filename = filename.replace(os.sep, "/") # USE UNIX STANDARD
|
||||
try:
|
||||
self.key = None
|
||||
if filename==".":
|
||||
self._filename = ""
|
||||
elif filename.startswith("~"):
|
||||
home_path = os.path.expanduser("~")
|
||||
if os.sep == "\\":
|
||||
home_path = home_path.replace(os.sep, "/")
|
||||
if home_path.endswith("/"):
|
||||
home_path = home_path[:-1]
|
||||
filename = home_path + filename[1::]
|
||||
self._filename = filename.replace(os.sep, "/") # USE UNIX STANDARD
|
||||
except Exception as e:
|
||||
Log.error(u"can not load {{file}}", file=filename, cause=e)
|
||||
else:
|
||||
self.key = base642bytearray(filename.key)
|
||||
self._filename = "/".join(filename.path.split(os.sep)) # USE UNIX STANDARD
|
||||
try:
|
||||
self.key = base642bytearray(filename.key)
|
||||
self._filename = "/".join(filename.path.split(os.sep)) # USE UNIX STANDARD
|
||||
except Exception as e:
|
||||
Log.error(u"can not load {{file}}", file=filename.path, cause=e)
|
||||
|
||||
while self._filename.find(".../") >= 0:
|
||||
# LET ... REFER TO GRANDPARENT, .... REFER TO GREAT-GRAND-PARENT, etc...
|
||||
self._filename = self._filename.replace(".../", "../../")
|
||||
self.buffering = buffering
|
||||
|
||||
|
||||
if suffix:
|
||||
self._filename = File.add_suffix(self._filename, suffix)
|
||||
|
||||
|
@ -419,9 +424,15 @@ class File(object):
|
|||
def copy(cls, from_, to_):
|
||||
_copy(File(from_), File(to_))
|
||||
|
||||
def __data__(self):
|
||||
return self._filename
|
||||
|
||||
def __unicode__(self):
|
||||
return self.abspath
|
||||
|
||||
def __str__(self):
|
||||
return self.abspath
|
||||
|
||||
|
||||
class TempDirectory(File):
|
||||
"""
|
||||
|
@ -469,11 +480,18 @@ def _copy(from_, to_):
|
|||
File.new_instance(to_).write_bytes(File.new_instance(from_).read_bytes())
|
||||
|
||||
|
||||
def base642bytearray(value):
|
||||
if value == None:
|
||||
return bytearray("")
|
||||
else:
|
||||
return bytearray(base64.b64decode(value))
|
||||
if PY3:
|
||||
def base642bytearray(value):
|
||||
if value == None:
|
||||
return bytearray(b"")
|
||||
else:
|
||||
return bytearray(base64.b64decode(value))
|
||||
else:
|
||||
def base642bytearray(value):
|
||||
if value == None:
|
||||
return bytearray(b"")
|
||||
else:
|
||||
return bytearray(base64.b64decode(value))
|
||||
|
||||
|
||||
def datetime2string(value, format="%Y-%m-%d %H:%M:%S"):
|
||||
|
|
|
@ -1,45 +1,18 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
# REMOVED SO LOGIC SWITCHES FROM BYTES TO STRING BETWEEN PY2 AND PY3 RESPECTIVELY
|
||||
# from __future__ import unicode_literals
|
||||
|
||||
from collections import Mapping
|
||||
|
||||
from mo_dots import wrap, Data
|
||||
from mo_future import text_type, PY3
|
||||
from mo_future import urlparse
|
||||
|
||||
_value2json = None
|
||||
_json2value = None
|
||||
_Log = None
|
||||
|
||||
|
||||
def _late_import():
|
||||
global _value2json
|
||||
global _json2value
|
||||
global _Log
|
||||
|
||||
from mo_json import value2json as value2json_
|
||||
from mo_json import json2value as json2value_
|
||||
from mo_logs import Log as _Log
|
||||
|
||||
if PY3:
|
||||
_value2json = value2json_
|
||||
_json2value = json2value_
|
||||
else:
|
||||
_value2json = lambda v: value2json_(v).encode('latin1')
|
||||
_json2value = lambda v: json2value_(v.decode('latin1'))
|
||||
|
||||
_ = _Log
|
||||
from mo_dots import wrap, Data, coalesce, Null
|
||||
from mo_future import urlparse, text_type, PY2, unichr
|
||||
from mo_json import value2json, json2value
|
||||
from mo_logs import Log
|
||||
|
||||
|
||||
class URL(object):
|
||||
|
@ -49,37 +22,34 @@ class URL(object):
|
|||
[1] https://docs.python.org/3/library/urllib.parse.html
|
||||
"""
|
||||
|
||||
def __init__(self, value):
|
||||
def __init__(self, value, port=None, path=None, query=None, fragment=None):
|
||||
try:
|
||||
self.scheme = None
|
||||
self.host = None
|
||||
self.port = None
|
||||
self.path = ""
|
||||
self.query = ""
|
||||
self.fragment = ""
|
||||
self.port = port
|
||||
self.path = path
|
||||
self.query = query
|
||||
self.fragment = fragment
|
||||
|
||||
if value == None:
|
||||
return
|
||||
|
||||
if value.startswith("file://") or value.startswith("//"):
|
||||
# urlparse DOES NOT WORK IN THESE CASES
|
||||
scheme, suffix = value.split("//", 1)
|
||||
scheme, suffix = value.split("//", 2)
|
||||
self.scheme = scheme.rstrip(":")
|
||||
parse(self, suffix, 0, 1)
|
||||
self.query = wrap(url_param2value(self.query))
|
||||
else:
|
||||
output = urlparse(value)
|
||||
self.scheme = output.scheme
|
||||
self.port = output.port
|
||||
self.port = coalesce(port, output.port)
|
||||
self.host = output.netloc.split(":")[0]
|
||||
self.path = output.path
|
||||
self.query = wrap(url_param2value(output.query))
|
||||
self.fragment = output.fragment
|
||||
self.path = coalesce(path, output.path)
|
||||
self.query = coalesce(query, wrap(url_param2value(output.query)))
|
||||
self.fragment = coalesce(fragment, output.fragment)
|
||||
except Exception as e:
|
||||
if not _Log:
|
||||
_late_import()
|
||||
|
||||
_Log.error(u"problem parsing {{value}} to URL", value=value, cause=e)
|
||||
Log.error(u"problem parsing {{value}} to URL", value=value, cause=e)
|
||||
|
||||
def __nonzero__(self):
|
||||
if self.scheme or self.host or self.port or self.path or self.query or self.fragment:
|
||||
|
@ -91,19 +61,39 @@ class URL(object):
|
|||
return True
|
||||
return False
|
||||
|
||||
def __truediv__(self, other):
|
||||
if not isinstance(other, text_type):
|
||||
Log.error(u"Expecting text path")
|
||||
output = self.__copy__()
|
||||
output.path = output.path.rstrip('/') + "/" + other.lstrip('/')
|
||||
return output
|
||||
|
||||
def __unicode__(self):
|
||||
return self.__str__().decode('utf8') # ASSUME chr<128 ARE VALID UNICODE
|
||||
|
||||
def __copy__(self):
|
||||
output = URL(None)
|
||||
output.scheme = self.scheme
|
||||
output.host = self.host
|
||||
output.port = self.port
|
||||
output.path = self.path
|
||||
output.query = self.query
|
||||
output.fragment = self.fragment
|
||||
return output
|
||||
|
||||
def __data__(self):
|
||||
return str(self)
|
||||
|
||||
def __str__(self):
|
||||
url = ""
|
||||
if self.host:
|
||||
url = self.host
|
||||
if self.scheme:
|
||||
url = self.scheme + "://" + url
|
||||
url = self.scheme + "://"+url
|
||||
if self.port:
|
||||
url = url + ":" + str(self.port)
|
||||
if self.path:
|
||||
if self.path[0] == "/":
|
||||
if self.path[0] == text_type("/"):
|
||||
url += str(self.path)
|
||||
else:
|
||||
url += "/" + str(self.path)
|
||||
|
@ -114,16 +104,27 @@ class URL(object):
|
|||
return url
|
||||
|
||||
|
||||
def int_to_hex(value, size):
|
||||
def int2hex(value, size):
|
||||
return (("0" * size) + hex(value)[2:])[-size:]
|
||||
|
||||
_str_to_url = {chr(i): chr(i) for i in range(32, 128)}
|
||||
for c in " {}<>;/?:@&=+$,":
|
||||
_str_to_url[c] = "%" + int_to_hex(ord(c), 2)
|
||||
for i in range(128, 256):
|
||||
_str_to_url[chr(i)] = "%" + int_to_hex(i, 2)
|
||||
|
||||
_url_to_str = {v: k for k, v in _str_to_url.items()}
|
||||
def hex2chr(hex):
|
||||
return unichr(int(hex, 16))
|
||||
|
||||
|
||||
if PY2:
|
||||
_map2url = {chr(i): chr(i) for i in range(32, 128)}
|
||||
for c in " {}<>;/?:@&=+$,":
|
||||
_map2url[c] = "%" + str(int2hex(ord(c), 2))
|
||||
for i in range(128, 256):
|
||||
_map2url[chr(i)] = "%" + str(int2hex(i, 2))
|
||||
else:
|
||||
_map2url = {i: unichr(i) for i in range(32, 128)}
|
||||
for c in b" {}<>;/?:@&=+$,":
|
||||
_map2url[c] = "%" + int2hex(c, 2)
|
||||
for i in range(128, 256):
|
||||
_map2url[i] = "%" + str(int2hex(i, 2))
|
||||
|
||||
|
||||
names = ["path", "query", "fragment"]
|
||||
indicator = ["/", "?", "#"]
|
||||
|
@ -146,30 +147,33 @@ def url_param2value(param):
|
|||
"""
|
||||
CONVERT URL QUERY PARAMETERS INTO DICT
|
||||
"""
|
||||
if param == None:
|
||||
return Null
|
||||
if param == None:
|
||||
return Null
|
||||
|
||||
def _decode(v):
|
||||
output = []
|
||||
i = 0
|
||||
while i < len(v):
|
||||
c = v[i]
|
||||
if c == "%":
|
||||
d = _url_to_str[v[i:i + 3]]
|
||||
d = hex2chr(v[i + 1:i + 3])
|
||||
output.append(d)
|
||||
i += 3
|
||||
else:
|
||||
output.append(c)
|
||||
i += 1
|
||||
|
||||
output = ("".join(output))
|
||||
output = text_type("".join(output))
|
||||
try:
|
||||
if not _Log:
|
||||
_late_import()
|
||||
return _json2value(output)
|
||||
return json2value(output)
|
||||
except Exception:
|
||||
pass
|
||||
return output
|
||||
|
||||
query = Data()
|
||||
for p in param.split("&"):
|
||||
for p in param.split('&'):
|
||||
if not p:
|
||||
continue
|
||||
if p.find("=") == -1:
|
||||
|
@ -190,27 +194,24 @@ def url_param2value(param):
|
|||
return query
|
||||
|
||||
|
||||
|
||||
|
||||
def value2url_param(value):
|
||||
"""
|
||||
:param value:
|
||||
:return: ascii URL
|
||||
"""
|
||||
if not _Log:
|
||||
_late_import()
|
||||
|
||||
if value == None:
|
||||
_Log.error(u"Can not encode None into a URL")
|
||||
Log.error("Can not encode None into a URL")
|
||||
|
||||
if isinstance(value, Mapping):
|
||||
value_ = wrap(value)
|
||||
output = "&".join([
|
||||
value2url_param(k) + "=" + (value2url_param(v) if isinstance(v, text_type) else value2url_param(_value2json(v)))
|
||||
value2url_param(k) + "=" + (value2url_param(v) if isinstance(v, text_type) else value2url_param(value2json(v)))
|
||||
for k, v in value_.leaves()
|
||||
])
|
||||
elif isinstance(value, text_type):
|
||||
output = "".join(_str_to_url[c] for c in value)
|
||||
output = "".join(_map2url[c] for c in value.encode('utf8'))
|
||||
elif isinstance(value, str):
|
||||
output = "".join(_map2url[c] for c in value)
|
||||
elif hasattr(value, "__iter__"):
|
||||
output = ",".join(value2url_param(v) for v in value)
|
||||
else:
|
|
@ -14,19 +14,30 @@ from __future__ import unicode_literals
|
|||
import json
|
||||
import sys
|
||||
|
||||
|
||||
PY3 = sys.version_info[0] == 3
|
||||
PY2 = sys.version_info[0] == 2
|
||||
|
||||
PYPY = False
|
||||
try:
|
||||
import __pypy__ as _
|
||||
PYPY=True
|
||||
except Exception:
|
||||
PYPY=False
|
||||
|
||||
|
||||
none_type = type(None)
|
||||
boolean_type = type(True)
|
||||
|
||||
if PY3:
|
||||
import itertools
|
||||
import collections
|
||||
from functools import cmp_to_key
|
||||
from configparser import ConfigParser
|
||||
from itertools import zip_longest
|
||||
|
||||
izip = zip
|
||||
zip_longest = itertools.zip_longest
|
||||
|
||||
text_type = str
|
||||
string_types = str
|
||||
|
@ -55,6 +66,9 @@ if PY3:
|
|||
from io import BytesIO
|
||||
from _thread import allocate_lock, get_ident, start_new_thread, interrupt_main
|
||||
|
||||
def items(d):
|
||||
return list(d.items())
|
||||
|
||||
def iteritems(d):
|
||||
return d.items()
|
||||
|
||||
|
@ -90,7 +104,10 @@ if PY3:
|
|||
sort_keys=True # <-- IMPORTANT! sort_keys==True
|
||||
).encode
|
||||
|
||||
UserDict = collections.UserDict
|
||||
|
||||
else:
|
||||
import collections
|
||||
import __builtin__
|
||||
from types import GeneratorType
|
||||
from ConfigParser import ConfigParser
|
||||
|
@ -117,6 +134,9 @@ else:
|
|||
from io import BytesIO
|
||||
from thread import allocate_lock, get_ident, start_new_thread, interrupt_main
|
||||
|
||||
def items(d):
|
||||
return d.items()
|
||||
|
||||
def iteritems(d):
|
||||
return d.iteritems()
|
||||
|
||||
|
@ -154,3 +174,67 @@ else:
|
|||
sort_keys=True # <-- IMPORTANT! sort_keys==True
|
||||
).encode
|
||||
|
||||
|
||||
# COPIED FROM Python's collections.UserDict (copied July 2018)
|
||||
class UserDict(collections.MutableMapping):
|
||||
|
||||
# Start by filling-out the abstract methods
|
||||
def __init__(*args, **kwargs):
|
||||
if not args:
|
||||
raise TypeError("descriptor '__init__' of 'UserDict' object "
|
||||
"needs an argument")
|
||||
self, args = args[0], args[1:]
|
||||
if len(args) > 1:
|
||||
raise TypeError('expected at most 1 arguments, got %d' % len(args))
|
||||
if args:
|
||||
dict = args[0]
|
||||
elif 'dict' in kwargs:
|
||||
dict = kwargs.pop('dict')
|
||||
import warnings
|
||||
warnings.warn("Passing 'dict' as keyword argument is deprecated",
|
||||
DeprecationWarning, stacklevel=2)
|
||||
else:
|
||||
dict = None
|
||||
self.data = {}
|
||||
if dict is not None:
|
||||
self.update(dict)
|
||||
if len(kwargs):
|
||||
self.update(kwargs)
|
||||
def __len__(self): return len(self.data)
|
||||
def __getitem__(self, key):
|
||||
if key in self.data:
|
||||
return self.data[key]
|
||||
if hasattr(self.__class__, "__missing__"):
|
||||
return self.__class__.__missing__(self, key)
|
||||
raise KeyError(key)
|
||||
def __setitem__(self, key, item): self.data[key] = item
|
||||
def __delitem__(self, key): del self.data[key]
|
||||
def __iter__(self):
|
||||
return iter(self.data)
|
||||
|
||||
# Modify __contains__ to work correctly when __missing__ is present
|
||||
def __contains__(self, key):
|
||||
return key in self.data
|
||||
|
||||
# Now, add the methods in dicts but not in MutableMapping
|
||||
def __repr__(self): return repr(self.data)
|
||||
def copy(self):
|
||||
if self.__class__ is UserDict:
|
||||
return UserDict(self.data.copy())
|
||||
import copy
|
||||
data = self.data
|
||||
try:
|
||||
self.data = {}
|
||||
c = copy.copy(self)
|
||||
finally:
|
||||
self.data = data
|
||||
c.update(self)
|
||||
return c
|
||||
@classmethod
|
||||
def fromkeys(cls, iterable, value=None):
|
||||
d = cls()
|
||||
for key in iterable:
|
||||
d[key] = value
|
||||
return d
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,208 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from flask import Response
|
||||
from mo_dots import coalesce
|
||||
from mo_files.url import URL
|
||||
from mo_future import text_type, xrange
|
||||
from mo_json import value2json
|
||||
from mo_kwargs import override
|
||||
from mo_logs import Log
|
||||
from mo_threads import Lock, Signal, Queue, Thread, Till
|
||||
from mo_times import Date, SECOND, MINUTE
|
||||
from pyLibrary.env import http
|
||||
from pyLibrary.sql.sqlite import Sqlite, quote_value, quote_list
|
||||
|
||||
from mo_hg.rate_logger import RateLogger
|
||||
|
||||
APP_NAME = "HG Cache"
|
||||
CONCURRENCY = 5
|
||||
AMORTIZATION_PERIOD = SECOND
|
||||
HG_REQUEST_PER_SECOND = 10
|
||||
CACHE_RETENTION = 10 * MINUTE
|
||||
|
||||
|
||||
class Cache(object):
|
||||
"""
|
||||
For Caching hg.mo requests
|
||||
"""
|
||||
|
||||
@override
|
||||
def __init__(self, rate=None, amortization_period=None, source=None, database=None, kwargs=None):
|
||||
self.amortization_period = coalesce(amortization_period, AMORTIZATION_PERIOD)
|
||||
self.rate = coalesce(rate, HG_REQUEST_PER_SECOND)
|
||||
self.cache_locker = Lock()
|
||||
self.cache = {} # MAP FROM url TO (ready, headers, response, timestamp) PAIR
|
||||
self.no_cache = {} # VERY SHORT TERM CACHE
|
||||
self.workers = []
|
||||
self.todo = Queue(APP_NAME+" todo")
|
||||
self.requests = Queue(APP_NAME + " requests", max=int(self.rate * self.amortization_period.seconds))
|
||||
self.url = URL(source.url)
|
||||
self.db = Sqlite(database)
|
||||
self.inbound_rate = RateLogger("Inbound")
|
||||
self.outbound_rate = RateLogger("hg.mo")
|
||||
|
||||
if not self.db.query("SELECT name FROM sqlite_master WHERE type='table'").data:
|
||||
with self.db.transaction() as t:
|
||||
t.execute(
|
||||
"CREATE TABLE cache ("
|
||||
" path TEXT PRIMARY KEY, "
|
||||
" headers TEXT, "
|
||||
" response TEXT, "
|
||||
" timestamp REAL "
|
||||
")"
|
||||
)
|
||||
|
||||
self.threads = [
|
||||
Thread.run(APP_NAME+" worker" + text_type(i), self._worker)
|
||||
for i in range(CONCURRENCY)
|
||||
]
|
||||
self.limiter = Thread.run(APP_NAME+" limiter", self._rate_limiter)
|
||||
self.cleaner = Thread.run(APP_NAME+" cleaner", self._cache_cleaner)
|
||||
|
||||
def _rate_limiter(self, please_stop):
|
||||
try:
|
||||
max_requests = self.requests.max
|
||||
recent_requests = []
|
||||
|
||||
while not please_stop:
|
||||
now = Date.now()
|
||||
too_old = now - self.amortization_period
|
||||
|
||||
recent_requests = [t for t in recent_requests if t > too_old]
|
||||
|
||||
num_recent = len(recent_requests)
|
||||
if num_recent >= max_requests:
|
||||
space_free_at = recent_requests[0] + self.amortization_period
|
||||
(please_stop | Till(till=space_free_at.unix)).wait()
|
||||
continue
|
||||
for _ in xrange(num_recent, max_requests):
|
||||
request = self.todo.pop()
|
||||
now = Date.now()
|
||||
recent_requests.append(now)
|
||||
self.requests.add(request)
|
||||
except Exception as e:
|
||||
Log.warning("failure", cause=e)
|
||||
|
||||
def _cache_cleaner(self, please_stop):
|
||||
while not please_stop:
|
||||
now = Date.now()
|
||||
too_old = now-CACHE_RETENTION
|
||||
|
||||
remove = set()
|
||||
with self.cache_locker:
|
||||
for path, (ready, headers, response, timestamp) in self.cache:
|
||||
if timestamp < too_old:
|
||||
remove.add(path)
|
||||
for r in remove:
|
||||
del self.cache[r]
|
||||
(please_stop | Till(seconds=CACHE_RETENTION.seconds / 2)).wait()
|
||||
|
||||
def please_cache(self, path):
|
||||
"""
|
||||
:return: False if `path` is not to be cached
|
||||
"""
|
||||
if path.endswith("/tip"):
|
||||
return False
|
||||
if any(k in path for k in ["/json-annotate/", "/json-info/", "/json-log/", "/json-rev/", "/rev/", "/raw-rev/", "/raw-file/", "/json-pushes", "/pushloghtml", "/file/"]):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def request(self, method, path, headers):
|
||||
now = Date.now()
|
||||
self.inbound_rate.add(now)
|
||||
ready = Signal(path)
|
||||
|
||||
# TEST CACHE
|
||||
with self.cache_locker:
|
||||
pair = self.cache.get(path)
|
||||
if pair is None:
|
||||
self.cache[path] = (ready, None, None, now)
|
||||
|
||||
|
||||
if pair is not None:
|
||||
# REQUEST IS IN THE QUEUE ALREADY, WAIT
|
||||
ready, headers, response, then = pair
|
||||
if response is None:
|
||||
ready.wait()
|
||||
with self.cache_locker:
|
||||
ready, headers, response, timestamp = self.cache.get(path)
|
||||
with self.db.transaction() as t:
|
||||
t.execute("UPDATE cache SET timestamp=" + quote_value(now) + " WHERE path=" + quote_value(path) + " AND timestamp<" + quote_value(now))
|
||||
return Response(
|
||||
response,
|
||||
status=200,
|
||||
headers=json.loads(headers)
|
||||
)
|
||||
|
||||
# TEST DB
|
||||
db_response = self.db.query("SELECT headers, response FROM cache WHERE path=" + quote_value(path)).data
|
||||
if db_response:
|
||||
headers, response = db_response[0]
|
||||
with self.db.transaction() as t:
|
||||
t.execute("UPDATE cache SET timestamp=" + quote_value(now) + " WHERE path=" + quote_value(path) + " AND timestamp<" + quote_value(now))
|
||||
with self.cache_locker:
|
||||
self.cache[path] = (ready, headers, response.encode('latin1'), now)
|
||||
ready.go()
|
||||
|
||||
return Response(
|
||||
response,
|
||||
status=200,
|
||||
headers=json.loads(headers)
|
||||
)
|
||||
|
||||
# MAKE A NETWORK REQUEST
|
||||
self.todo.add((ready, method, path, headers, now))
|
||||
ready.wait()
|
||||
with self.cache_locker:
|
||||
ready, headers, response, timestamp = self.cache[path]
|
||||
return Response(
|
||||
response,
|
||||
status=200,
|
||||
headers=json.loads(headers)
|
||||
)
|
||||
|
||||
def _worker(self, please_stop):
|
||||
while not please_stop:
|
||||
pair = self.requests.pop(till=please_stop)
|
||||
if please_stop:
|
||||
break
|
||||
ready, method, path, req_headers, timestamp = pair
|
||||
|
||||
try:
|
||||
url = self.url / path
|
||||
self.outbound_rate.add(Date.now())
|
||||
response = http.request(method, url, req_headers)
|
||||
|
||||
del response.headers['transfer-encoding']
|
||||
resp_headers = value2json(response.headers)
|
||||
resp_content = response.raw.read()
|
||||
|
||||
please_cache = self.please_cache(path)
|
||||
if please_cache:
|
||||
with self.db.transaction() as t:
|
||||
t.execute("INSERT INTO cache (path, headers, response, timestamp) VALUES" + quote_list((path, resp_headers, resp_content.decode('latin1'), timestamp)))
|
||||
with self.cache_locker:
|
||||
self.cache[path] = (ready, resp_headers, resp_content, timestamp)
|
||||
except Exception as e:
|
||||
Log.warning("problem with request to {{path}}", path=path, cause=e)
|
||||
with self.cache_locker:
|
||||
ready, headers, response = self.cache[path]
|
||||
del self.cache[path]
|
||||
finally:
|
||||
ready.go()
|
||||
|
||||
|
||||
|
|
@ -24,6 +24,7 @@ from pyLibrary.env import elasticsearch, http
|
|||
|
||||
EXTRA_WAIT_TIME = 20 * SECOND # WAIT TIME TO SEND TO AWS, IF WE wait_forever
|
||||
OLD_BRANCH = DAY
|
||||
BRANCH_WHITELIST = None
|
||||
|
||||
|
||||
@override
|
||||
|
@ -175,6 +176,15 @@ def _get_single_branch_from_hg(settings, description, dir):
|
|||
detail.locale = _path[-1]
|
||||
detail.name = "weave"
|
||||
|
||||
if BRANCH_WHITELIST is not None:
|
||||
found = False
|
||||
for br in BRANCH_WHITELIST:
|
||||
if br in str(detail.name):
|
||||
found = True
|
||||
break
|
||||
if not found:
|
||||
continue
|
||||
|
||||
Log.note("Branch {{name}} {{locale}}", name=detail.name, locale=detail.locale)
|
||||
output.append(detail)
|
||||
except Exception as e:
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
@ -575,10 +576,10 @@ class HgMozillaOrg(object):
|
|||
json_diff = diff_to_json(diff)
|
||||
num_changes = _count(c for f in json_diff for c in f.changes)
|
||||
if json_diff:
|
||||
if num_changes < MAX_DIFF_SIZE:
|
||||
return json_diff
|
||||
elif revision.changeset.description.startswith("merge "):
|
||||
if revision.changeset.description.startswith("merge "):
|
||||
return None # IGNORE THE MERGE CHANGESETS
|
||||
elif num_changes < MAX_DIFF_SIZE:
|
||||
return json_diff
|
||||
else:
|
||||
Log.warning("Revision at {{url}} has a diff with {{num}} changes, ignored", url=url, num=num_changes)
|
||||
for file in json_diff:
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
|
|
@ -0,0 +1,45 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from mo_logs import Log
|
||||
from mo_threads import Till, Thread, Lock
|
||||
from mo_times import Date, SECOND
|
||||
|
||||
METRIC_DECAY_RATE = 0.9 # PER-SECOND DECAY RATE FOR REPORTING REQUEST RATE
|
||||
METRIC_REPORT_PERIOD = 10 * SECOND
|
||||
|
||||
|
||||
class RateLogger(object):
|
||||
|
||||
def __init__(self, name):
|
||||
self.name = name
|
||||
self.lock = Lock("rate locker")
|
||||
self.request_rate = 0.0
|
||||
self.last_request = Date.now()
|
||||
|
||||
Thread.run("rate logger", self._daemon)
|
||||
|
||||
def add(self, timestamp):
|
||||
with self.lock:
|
||||
decay = METRIC_DECAY_RATE ** (timestamp - self.last_request).seconds
|
||||
self.request_rate = decay*self.request_rate + 1
|
||||
self.last_request = timestamp
|
||||
|
||||
def _daemon(self, please_stop):
|
||||
while not please_stop:
|
||||
timestamp = Date.now()
|
||||
with self.lock:
|
||||
decay = METRIC_DECAY_RATE ** (timestamp - self.last_request).seconds
|
||||
request_rate = self.request_rate = decay * self.request_rate
|
||||
self.last_request = timestamp
|
||||
|
||||
Log.note("{{name}} request rate: {{rate|round(places=2)}} requests per second", name=self.name, rate=request_rate)
|
||||
(please_stop | Till(seconds=METRIC_REPORT_PERIOD.seconds)).wait()
|
||||
|
|
@ -0,0 +1,107 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os
|
||||
|
||||
import flask
|
||||
from flask import Flask, Response
|
||||
|
||||
from mo_hg.cache import Cache
|
||||
from mo_json import value2json
|
||||
from mo_logs import Log, constants, startup, Except
|
||||
from mo_logs.strings import unicode2utf8
|
||||
from pyLibrary.env.flask_wrappers import cors_wrapper
|
||||
|
||||
APP_NAME = "HG Relay"
|
||||
|
||||
|
||||
class RelayApp(Flask):
|
||||
|
||||
def run(self, *args, **kwargs):
|
||||
# ENSURE THE LOGGING IS CLEANED UP
|
||||
try:
|
||||
Flask.run(self, *args, **kwargs)
|
||||
except BaseException as e: # MUST CATCH BaseException BECAUSE argparse LIKES TO EXIT THAT WAY, AND gunicorn WILL NOT REPORT
|
||||
Log.warning(APP_NAME + " service shutdown!", cause=e)
|
||||
finally:
|
||||
Log.stop()
|
||||
|
||||
|
||||
flask_app = None
|
||||
config = None
|
||||
cache = None
|
||||
|
||||
|
||||
@cors_wrapper
|
||||
def relay_get(path):
|
||||
try:
|
||||
return cache.request("get", path, flask.request.headers)
|
||||
except Exception as e:
|
||||
e = Except.wrap(e)
|
||||
Log.warning("could not handle request", cause=e)
|
||||
return Response(
|
||||
unicode2utf8(value2json(e, pretty=True)),
|
||||
status=400,
|
||||
headers={
|
||||
"Content-Type": "text/html"
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@cors_wrapper
|
||||
def relay_post(path):
|
||||
try:
|
||||
return cache.request("post", path, flask.request.headers)
|
||||
except Exception as e:
|
||||
e = Except.wrap(e)
|
||||
Log.warning("could not handle request", cause=e)
|
||||
return Response(
|
||||
unicode2utf8(value2json(e, pretty=True)),
|
||||
status=400,
|
||||
headers={
|
||||
"Content-Type": "text/html"
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def add(any_flask_app):
|
||||
global cache
|
||||
|
||||
cache = Cache(config.cache)
|
||||
any_flask_app.add_url_rule(str('/<path:path>'), None, relay_get, methods=[str('GET')])
|
||||
any_flask_app.add_url_rule(str('/<path:path>'), None, relay_post, methods=[str('POST')])
|
||||
any_flask_app.add_url_rule(str('/'), None, relay_get, methods=[str('GET')])
|
||||
any_flask_app.add_url_rule(str('/'), None, relay_post, methods=[str('POST')])
|
||||
|
||||
|
||||
if __name__ in ("__main__",):
|
||||
Log.note("Starting " + APP_NAME + " Service App...")
|
||||
flask_app = RelayApp(__name__)
|
||||
|
||||
try:
|
||||
config = startup.read_settings(
|
||||
filename=os.environ.get('HG_RELAY_CONFIG')
|
||||
)
|
||||
constants.set(config.constants)
|
||||
Log.start(config.debug)
|
||||
|
||||
add(flask_app)
|
||||
Log.note("Started " + APP_NAME + " Service")
|
||||
except BaseException as e: # MUST CATCH BaseException BECAUSE argparse LIKES TO EXIT THAT WAY, AND gunicorn WILL NOT REPORT
|
||||
try:
|
||||
Log.error("Serious problem with " + APP_NAME + " service construction! Shutdown!", cause=e)
|
||||
finally:
|
||||
Log.stop()
|
||||
|
||||
if config.flask:
|
||||
if config.flask.port and config.args.process_num:
|
||||
config.flask.port += config.args.process_num
|
||||
Log.note("Running Flask...")
|
||||
flask_app.run(**config.flask)
|
|
@ -7,6 +7,7 @@
|
|||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
|
|
@ -17,7 +17,7 @@ from collections import Mapping
|
|||
from datetime import date, timedelta, datetime
|
||||
from decimal import Decimal
|
||||
|
||||
from mo_dots import FlatList, NullType, Data, wrap_leaves, wrap, Null
|
||||
from mo_dots import FlatList, NullType, Data, wrap_leaves, wrap, Null, SLOT
|
||||
from mo_dots.objects import DataObject
|
||||
from mo_future import text_type, none_type, long, binary_type, PY2
|
||||
from mo_logs import Except, strings, Log
|
||||
|
@ -158,7 +158,7 @@ def _scrub(value, is_done, stack, scrub_text, scrub_number):
|
|||
elif type_ is Decimal:
|
||||
return scrub_number(value)
|
||||
elif type_ is Data:
|
||||
return _scrub(_get(value, '_dict'), is_done, stack, scrub_text, scrub_number)
|
||||
return _scrub(_get(value, SLOT), is_done, stack, scrub_text, scrub_number)
|
||||
elif isinstance(value, Mapping):
|
||||
_id = id(value)
|
||||
if _id in is_done:
|
||||
|
|
|
@ -13,7 +13,6 @@ from __future__ import unicode_literals
|
|||
|
||||
import json
|
||||
import math
|
||||
import sys
|
||||
import time
|
||||
from collections import Mapping
|
||||
from datetime import datetime, date, timedelta
|
||||
|
@ -21,11 +20,12 @@ from decimal import Decimal
|
|||
from json.encoder import encode_basestring
|
||||
from math import floor
|
||||
|
||||
from mo_dots import Data, FlatList, NullType, Null
|
||||
from mo_future import text_type, binary_type, long, utf8_json_encoder, sort_using_key, xrange
|
||||
from mo_dots import Data, FlatList, NullType, Null, SLOT
|
||||
from mo_future import text_type, binary_type, long, utf8_json_encoder, sort_using_key, xrange, PYPY
|
||||
from mo_json import ESCAPE_DCT, scrub, float2json
|
||||
from mo_logs import Except
|
||||
from mo_logs.strings import utf82unicode, quote
|
||||
from mo_times import Timer
|
||||
from mo_times.dates import Date
|
||||
from mo_times.durations import Duration
|
||||
|
||||
|
@ -43,8 +43,6 @@ _ = Except
|
|||
# 2) WHEN USING PYPY, WE USE CLEAR-AND-SIMPLE PROGRAMMING SO THE OPTIMIZER CAN DO
|
||||
# ITS JOB. ALONG WITH THE UnicodeBuilder WE GET NEAR C SPEEDS
|
||||
|
||||
use_pypy = False
|
||||
|
||||
COMMA = u","
|
||||
QUOTE = u'"'
|
||||
COLON = u":"
|
||||
|
@ -54,20 +52,10 @@ COMMA_QUOTE = COMMA + QUOTE
|
|||
PRETTY_COMMA = u", "
|
||||
PRETTY_COLON = u": "
|
||||
|
||||
try:
|
||||
if PYPY:
|
||||
# UnicodeBuilder IS ABOUT 2x FASTER THAN list()
|
||||
from __pypy__.builders import UnicodeBuilder
|
||||
|
||||
use_pypy = True
|
||||
except Exception as e:
|
||||
if use_pypy:
|
||||
sys.stdout.write(
|
||||
b"*********************************************************\n"
|
||||
b"** The PyLibrary JSON serializer for PyPy is in use!\n"
|
||||
b"** Currently running CPython: This will run sloooow!\n"
|
||||
b"*********************************************************\n"
|
||||
)
|
||||
|
||||
else:
|
||||
class UnicodeBuilder(list):
|
||||
def __init__(self, length=None):
|
||||
list.__init__(self)
|
||||
|
@ -121,8 +109,10 @@ class cPythonJSONEncoder(object):
|
|||
return pretty_json(value)
|
||||
|
||||
try:
|
||||
scrubbed = scrub(value)
|
||||
return text_type(self.encoder(scrubbed))
|
||||
with Timer("scrub", too_long=0.1):
|
||||
scrubbed = scrub(value)
|
||||
with Timer("encode", too_long=0.1):
|
||||
return text_type(self.encoder(scrubbed))
|
||||
except Exception as e:
|
||||
from mo_logs.exceptions import Except
|
||||
from mo_logs import Log
|
||||
|
@ -184,7 +174,7 @@ def _value2json(value, _buffer):
|
|||
_dict2json(value, _buffer)
|
||||
return
|
||||
elif type is Data:
|
||||
d = _get(value, "_dict") # MIGHT BE A VALUE NOT A DICT
|
||||
d = _get(value, SLOT) # MIGHT BE A VALUE NOT A DICT
|
||||
_value2json(d, _buffer)
|
||||
return
|
||||
elif type in (int, long, Decimal):
|
||||
|
@ -287,7 +277,7 @@ def pretty_json(value):
|
|||
return "true"
|
||||
elif isinstance(value, Mapping):
|
||||
try:
|
||||
items = sort_using_key(list(value.items()), lambda r: r[0])
|
||||
items = sort_using_key(value.items(), lambda r: r[0])
|
||||
values = [encode_basestring(k) + PRETTY_COLON + indent(pretty_json(v)).strip() for k, v in items if v != None]
|
||||
if not values:
|
||||
return "{}"
|
||||
|
@ -509,7 +499,7 @@ def unicode_key(key):
|
|||
# OH HUM, cPython with uJSON, OR pypy WITH BUILTIN JSON?
|
||||
# http://liangnuren.wordpress.com/2012/08/13/python-json-performance/
|
||||
# http://morepypy.blogspot.ca/2011/10/speeding-up-json-encoding-in-pypy.html
|
||||
if use_pypy:
|
||||
if PYPY:
|
||||
json_encoder = pypy_json_encode
|
||||
else:
|
||||
# from ujson import dumps as ujson_dumps
|
||||
|
|
|
@ -11,22 +11,20 @@ from __future__ import absolute_import
|
|||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import time
|
||||
from collections import Mapping
|
||||
from datetime import date, datetime, timedelta
|
||||
from decimal import Decimal
|
||||
from json.encoder import encode_basestring
|
||||
|
||||
from datetime import date, datetime, timedelta
|
||||
import time
|
||||
from jx_base import Column, python_type_to_json_type, NESTED, EXISTS, STRING, NUMBER, INTEGER, BOOLEAN
|
||||
from mo_dots import Data, FlatList, NullType, join_field, split_field
|
||||
from mo_future import text_type, binary_type, sort_using_key
|
||||
from mo_dots import Data, FlatList, NullType, join_field, split_field, _get, SLOT, DataObject
|
||||
from mo_future import text_type, binary_type, sort_using_key, long, PY2, none_type, generator_types
|
||||
from mo_json import ESCAPE_DCT, float2json
|
||||
from mo_json.encoder import UnicodeBuilder, COLON, COMMA, problem_serializing, json_encoder
|
||||
from mo_logs import Log
|
||||
from mo_logs.strings import quote, utf82unicode
|
||||
from mo_times import Date, Duration
|
||||
|
||||
from mo_json import ESCAPE_DCT, float2json
|
||||
from mo_json.encoder import UnicodeBuilder, COLON, COMMA, problem_serializing, json_encoder
|
||||
|
||||
|
||||
def encode_property(name):
|
||||
return name.replace(",", "\\,").replace(".", ",")
|
||||
|
@ -55,23 +53,59 @@ def unnest_path(encoded):
|
|||
|
||||
|
||||
def untyped(value):
|
||||
return _untype(value)
|
||||
return _untype_value(value)
|
||||
|
||||
|
||||
def _untype(value):
|
||||
if isinstance(value, Mapping):
|
||||
output = {}
|
||||
def _untype_list(value):
|
||||
if any(isinstance(v, Mapping) for v in value):
|
||||
# MAY BE MORE TYPED OBJECTS IN THIS LIST
|
||||
output = [_untype_value(v) for v in value]
|
||||
else:
|
||||
# LIST OF PRIMITIVE VALUES
|
||||
output = value
|
||||
|
||||
for k, v in value.items():
|
||||
if len(output) == 0:
|
||||
return None
|
||||
elif len(output) == 1:
|
||||
return output[0]
|
||||
else:
|
||||
return output
|
||||
|
||||
|
||||
def _untype_dict(value):
|
||||
output = {}
|
||||
|
||||
for k, v in value.items():
|
||||
if k.startswith(TYPE_PREFIX):
|
||||
if k == EXISTS_TYPE:
|
||||
continue
|
||||
elif k.startswith(TYPE_PREFIX):
|
||||
return v
|
||||
elif k == NESTED_TYPE:
|
||||
return _untype_list(v)
|
||||
else:
|
||||
output[decode_property(k)] = _untype(v)
|
||||
return output
|
||||
elif isinstance(value, list):
|
||||
return [_untype(v) for v in value]
|
||||
return v
|
||||
else:
|
||||
new_v = _untype_value(v)
|
||||
if new_v is not None:
|
||||
output[decode_property(k)] = new_v
|
||||
return output
|
||||
|
||||
|
||||
def _untype_value(value):
|
||||
_type = _get(value, "__class__")
|
||||
if _type is Data:
|
||||
return _untype_dict(_get(value, SLOT))
|
||||
elif _type is dict:
|
||||
return _untype_dict(value)
|
||||
elif _type is FlatList:
|
||||
return _untype_list(value.list)
|
||||
elif _type is list:
|
||||
return _untype_list(value)
|
||||
elif _type is NullType:
|
||||
return None
|
||||
elif _type is DataObject:
|
||||
return _untype_value(_get(value, "_obj"))
|
||||
elif _type in generator_types:
|
||||
return _untype_list(value)
|
||||
else:
|
||||
return value
|
||||
|
||||
|
@ -90,7 +124,7 @@ def encode(value):
|
|||
|
||||
def typed_encode(value, sub_schema, path, net_new_properties, buffer):
|
||||
"""
|
||||
:param value: THE DATASCRUTURE TO ENCODE
|
||||
:param value: THE DATA STRUCTURE TO ENCODE
|
||||
:param sub_schema: dict FROM PATH TO Column DESCRIBING THE TYPE
|
||||
:param path: list OF CURRENT PATH
|
||||
:param net_new_properties: list FOR ADDING NEW PROPERTIES NOT FOUND IN sub_schema
|
||||
|
@ -98,7 +132,8 @@ def typed_encode(value, sub_schema, path, net_new_properties, buffer):
|
|||
:return:
|
||||
"""
|
||||
try:
|
||||
if isinstance(sub_schema, Column):
|
||||
# from jx_base import Column
|
||||
if sub_schema.__class__.__name__=='Column':
|
||||
value_json_type = python_type_to_json_type[value.__class__]
|
||||
column_json_type = es_type_to_json_type[sub_schema.es_type]
|
||||
|
||||
|
@ -135,7 +170,7 @@ def typed_encode(value, sub_schema, path, net_new_properties, buffer):
|
|||
|
||||
_type = value.__class__
|
||||
if _type in (dict, Data):
|
||||
if isinstance(sub_schema, Column):
|
||||
if sub_schema.__class__.__name__ == 'Column':
|
||||
from mo_logs import Log
|
||||
Log.error("Can not handle {{column|json}}", column=sub_schema)
|
||||
|
||||
|
@ -191,16 +226,16 @@ def typed_encode(value, sub_schema, path, net_new_properties, buffer):
|
|||
for c in value:
|
||||
append(buffer, ESCAPE_DCT.get(c, c))
|
||||
append(buffer, '"}')
|
||||
elif _type in (int, long, Decimal):
|
||||
elif _type in (int, long):
|
||||
if NUMBER_TYPE not in sub_schema:
|
||||
sub_schema[NUMBER_TYPE] = True
|
||||
net_new_properties.append(path + [NUMBER_TYPE])
|
||||
|
||||
append(buffer, '{')
|
||||
append(buffer, QUOTED_NUMBER_TYPE)
|
||||
append(buffer, float2json(value))
|
||||
append(buffer, text_type(value))
|
||||
append(buffer, '}')
|
||||
elif _type is float:
|
||||
elif _type in (float, Decimal):
|
||||
if NUMBER_TYPE not in sub_schema:
|
||||
sub_schema[NUMBER_TYPE] = True
|
||||
net_new_properties.append(path + [NUMBER_TYPE])
|
||||
|
@ -362,7 +397,7 @@ def _dict2json(value, sub_schema, path, net_new_properties, buffer):
|
|||
if k not in sub_schema:
|
||||
sub_schema[k] = {}
|
||||
net_new_properties.append(path + [k])
|
||||
append(buffer, encode_basestring(k))
|
||||
append(buffer, encode_basestring(encode_property(k)))
|
||||
append(buffer, COLON)
|
||||
typed_encode(v, sub_schema[k], path + [k], net_new_properties, buffer)
|
||||
if prefix is COMMA:
|
||||
|
@ -372,7 +407,43 @@ def _dict2json(value, sub_schema, path, net_new_properties, buffer):
|
|||
else:
|
||||
append(buffer, '{')
|
||||
append(buffer, QUOTED_EXISTS_TYPE)
|
||||
append(buffer, '0}')
|
||||
append(buffer, '1}')
|
||||
|
||||
|
||||
IS_NULL = '0'
|
||||
BOOLEAN = 'boolean'
|
||||
INTEGER = 'integer'
|
||||
NUMBER = 'number'
|
||||
STRING = 'string'
|
||||
OBJECT = 'object'
|
||||
NESTED = "nested"
|
||||
EXISTS = "exists"
|
||||
|
||||
JSON_TYPES = [BOOLEAN, INTEGER, NUMBER, STRING, OBJECT]
|
||||
PRIMITIVE = [EXISTS, BOOLEAN, INTEGER, NUMBER, STRING]
|
||||
STRUCT = [EXISTS, OBJECT, NESTED]
|
||||
|
||||
|
||||
python_type_to_json_type = {
|
||||
int: NUMBER,
|
||||
text_type: STRING,
|
||||
float: NUMBER,
|
||||
None: OBJECT,
|
||||
bool: BOOLEAN,
|
||||
NullType: OBJECT,
|
||||
none_type: OBJECT,
|
||||
Data: OBJECT,
|
||||
dict: OBJECT,
|
||||
object: OBJECT,
|
||||
Mapping: OBJECT,
|
||||
list: NESTED,
|
||||
FlatList: NESTED,
|
||||
Date: NUMBER
|
||||
}
|
||||
|
||||
if PY2:
|
||||
python_type_to_json_type[str] = STRING
|
||||
python_type_to_json_type[long] = NUMBER
|
||||
|
||||
|
||||
TYPE_PREFIX = "~" # u'\u0442\u0443\u0440\u0435-' # "туре"
|
||||
|
|
|
@ -18,19 +18,28 @@ from collections import Mapping
|
|||
import mo_dots
|
||||
from mo_dots import set_default, wrap, unwrap
|
||||
from mo_files import File
|
||||
from mo_files.url import URL
|
||||
from mo_future import text_type
|
||||
from mo_json import json2value
|
||||
from mo_json_config.convert import ini2value
|
||||
from mo_logs import Log, Except
|
||||
from mo_logs.url import URL
|
||||
|
||||
DEBUG = False
|
||||
|
||||
|
||||
def get_file(file):
|
||||
file = File(file)
|
||||
if os.sep=="\\":
|
||||
return get("file:///" + file.abspath)
|
||||
else:
|
||||
return get("file://" + file.abspath)
|
||||
|
||||
|
||||
def get(url):
|
||||
"""
|
||||
USE json.net CONVENTIONS TO LINK TO INLINE OTHER JSON
|
||||
"""
|
||||
url = str(url)
|
||||
url = text_type(url)
|
||||
if url.find("://") == -1:
|
||||
Log.error("{{url}} must have a prototcol (eg http://) declared", url=url)
|
||||
|
||||
|
@ -114,6 +123,8 @@ def _replace_ref(node, url):
|
|||
|
||||
if not output:
|
||||
output = new_value
|
||||
elif isinstance(output, text_type):
|
||||
Log.error("Can not handle set_default({{output}},{{new_value}})", output=output, new_value=new_value)
|
||||
else:
|
||||
output = unwrap(set_default(output, new_value))
|
||||
|
||||
|
@ -181,7 +192,7 @@ def _replace_locals(node, doc_path):
|
|||
## SCHEME LOADERS ARE BELOW THIS LINE
|
||||
###############################################################################
|
||||
|
||||
def get_file(ref, url):
|
||||
def _get_file(ref, url):
|
||||
|
||||
if ref.path.startswith("~"):
|
||||
home_path = os.path.expanduser("~")
|
||||
|
@ -233,17 +244,17 @@ def get_http(ref, url):
|
|||
return new_value
|
||||
|
||||
|
||||
def get_env(ref, url):
|
||||
def _get_env(ref, url):
|
||||
# GET ENVIRONMENT VARIABLES
|
||||
ref = ref.host
|
||||
try:
|
||||
new_value = json2value(os.environ[ref])
|
||||
except Exception as e:
|
||||
new_value = os.environ[ref]
|
||||
new_value = os.environ.get(ref)
|
||||
return new_value
|
||||
|
||||
|
||||
def get_param(ref, url):
|
||||
def _get_param(ref, url):
|
||||
# GET PARAMETERS FROM url
|
||||
param = url.query
|
||||
new_value = param[ref.host]
|
||||
|
@ -252,8 +263,8 @@ def get_param(ref, url):
|
|||
|
||||
scheme_loaders = {
|
||||
"http": get_http,
|
||||
"file": get_file,
|
||||
"env": get_env,
|
||||
"param": get_param
|
||||
"file": _get_file,
|
||||
"env": _get_env,
|
||||
"param": _get_param
|
||||
}
|
||||
|
||||
|
|
|
@ -1,180 +0,0 @@
|
|||
from collections import Mapping
|
||||
from urlparse import urlparse
|
||||
|
||||
from mo_dots import wrap, Data
|
||||
from mo_json import value2json, json2value
|
||||
from mo_logs import Log
|
||||
|
||||
|
||||
class URL(object):
|
||||
"""
|
||||
JUST LIKE urllib.parse() [1], BUT CAN HANDLE JSON query PARAMETERS
|
||||
|
||||
[1] https://docs.python.org/3/library/urllib.parse.html
|
||||
"""
|
||||
|
||||
def __init__(self, value):
|
||||
try:
|
||||
self.scheme = None
|
||||
self.host = None
|
||||
self.port = None
|
||||
self.path = ""
|
||||
self.query = ""
|
||||
self.fragment = ""
|
||||
|
||||
if value == None:
|
||||
return
|
||||
|
||||
if value.startswith("file://") or value.startswith("//"):
|
||||
# urlparse DOES NOT WORK IN THESE CASES
|
||||
scheme, suffix = value.split("//", 2)
|
||||
self.scheme = scheme.rstrip(":")
|
||||
parse(self, suffix, 0, 1)
|
||||
self.query = wrap(url_param2value(self.query))
|
||||
else:
|
||||
output = urlparse(value)
|
||||
self.scheme = output.scheme
|
||||
self.port = output.port
|
||||
self.host = output.netloc.split(":")[0]
|
||||
self.path = output.path
|
||||
self.query = wrap(url_param2value(output.query))
|
||||
self.fragment = output.fragment
|
||||
except Exception as e:
|
||||
Log.error("problem parsing {{value}} to URL", value=value, cause=e)
|
||||
|
||||
def __nonzero__(self):
|
||||
if self.scheme or self.host or self.port or self.path or self.query or self.fragment:
|
||||
return True
|
||||
return False
|
||||
|
||||
def __bool__(self):
|
||||
if self.scheme or self.host or self.port or self.path or self.query or self.fragment:
|
||||
return True
|
||||
return False
|
||||
|
||||
def __unicode__(self):
|
||||
return self.__str__().decode('utf8') # ASSUME chr<128 ARE VALID UNICODE
|
||||
|
||||
def __str__(self):
|
||||
url = b""
|
||||
if self.host:
|
||||
url = self.host
|
||||
if self.scheme:
|
||||
url = self.scheme + b"://"+url
|
||||
if self.port:
|
||||
url = url + b":" + str(self.port)
|
||||
if self.path:
|
||||
if self.path[0]=="/":
|
||||
url += str(self.path)
|
||||
else:
|
||||
url += b"/"+str(self.path)
|
||||
if self.query:
|
||||
url = url + b'?' + value2url_param(self.query)
|
||||
if self.fragment:
|
||||
url = url + b'#' + value2url_param(self.fragment)
|
||||
return url
|
||||
|
||||
|
||||
def int2hex(value, size):
|
||||
return (("0" * size) + hex(value)[2:])[-size:]
|
||||
|
||||
|
||||
_map2url = {chr(i): chr(i) for i in range(32, 128)}
|
||||
for c in b" {}<>;/?:@&=+$,":
|
||||
_map2url[c] = b"%" + str(int2hex(ord(c), 2))
|
||||
for i in range(128, 256):
|
||||
_map2url[chr(i)] = b"%" + str(int2hex(i, 2))
|
||||
|
||||
|
||||
names = ["path", "query", "fragment"]
|
||||
indicator = ["/", "?", "#"]
|
||||
|
||||
|
||||
def parse(output, suffix, curr, next):
|
||||
if next == len(indicator):
|
||||
output.__setattr__(names[curr], suffix)
|
||||
return
|
||||
|
||||
e = suffix.find(indicator[next])
|
||||
if e == -1:
|
||||
parse(output, suffix, curr, next + 1)
|
||||
else:
|
||||
output.__setattr__(names[curr], suffix[:e:])
|
||||
parse(output, suffix[e + 1::], next, next + 1)
|
||||
|
||||
|
||||
def url_param2value(param):
|
||||
"""
|
||||
CONVERT URL QUERY PARAMETERS INTO DICT
|
||||
"""
|
||||
if isinstance(param, text_type):
|
||||
param = param.encode("ascii")
|
||||
|
||||
def _decode(v):
|
||||
output = []
|
||||
i = 0
|
||||
while i < len(v):
|
||||
c = v[i]
|
||||
if c == "%":
|
||||
d = (v[i + 1:i + 3]).decode("hex")
|
||||
output.append(d)
|
||||
i += 3
|
||||
else:
|
||||
output.append(c)
|
||||
i += 1
|
||||
|
||||
output = (b"".join(output)).decode("latin1")
|
||||
try:
|
||||
return json2value(output)
|
||||
except Exception:
|
||||
pass
|
||||
return output
|
||||
|
||||
query = Data()
|
||||
for p in param.split(b'&'):
|
||||
if not p:
|
||||
continue
|
||||
if p.find(b"=") == -1:
|
||||
k = p
|
||||
v = True
|
||||
else:
|
||||
k, v = p.split(b"=")
|
||||
v = _decode(v)
|
||||
|
||||
u = query.get(k)
|
||||
if u is None:
|
||||
query[k] = v
|
||||
elif isinstance(u, list):
|
||||
u += [v]
|
||||
else:
|
||||
query[k] = [u, v]
|
||||
|
||||
return query
|
||||
|
||||
|
||||
def value2url_param(value):
|
||||
"""
|
||||
:param value:
|
||||
:return: ascii URL
|
||||
"""
|
||||
if value == None:
|
||||
Log.error("Can not encode None into a URL")
|
||||
|
||||
if isinstance(value, Mapping):
|
||||
value_ = wrap(value)
|
||||
output = b"&".join([
|
||||
value2url_param(k) + b"=" + (value2url_param(v) if isinstance(v, text_type) else value2url_param(value2json(v)))
|
||||
for k, v in value_.leaves()
|
||||
])
|
||||
elif isinstance(value, text_type):
|
||||
output = b"".join(_map2url[c] for c in value.encode('utf8'))
|
||||
elif isinstance(value, str):
|
||||
output = b"".join(_map2url[c] for c in value)
|
||||
elif hasattr(value, "__iter__"):
|
||||
output = b",".join(value2url_param(v) for v in value)
|
||||
else:
|
||||
output = str(value)
|
||||
return output
|
||||
|
||||
|
||||
|
|
@ -13,9 +13,8 @@ from __future__ import unicode_literals
|
|||
|
||||
from collections import Mapping
|
||||
|
||||
from mo_future import text_type, get_function_arguments, get_function_defaults, get_function_name
|
||||
from mo_dots import zip as dict_zip, get_logger, wrap
|
||||
|
||||
from mo_future import text_type, get_function_arguments, get_function_defaults, get_function_name
|
||||
from mo_logs import Except
|
||||
|
||||
|
||||
|
@ -34,77 +33,88 @@ def override(func):
|
|||
3) DEFAULT VALUES ASSIGNED IN FUNCTION DEFINITION
|
||||
"""
|
||||
|
||||
func_name = get_function_name(func)
|
||||
params = get_function_arguments(func)
|
||||
if not get_function_defaults(func):
|
||||
defaults = {}
|
||||
else:
|
||||
defaults = {k: v for k, v in zip(reversed(params), reversed(get_function_defaults(func)))}
|
||||
|
||||
def raise_error(e, packed):
|
||||
err = text_type(e)
|
||||
e = Except.wrap(e)
|
||||
if err.startswith(func_name) and ("takes at least" in err or "required positional argument" in err):
|
||||
missing = [p for p in params if str(p) not in packed]
|
||||
given = [p for p in params if str(p) in packed]
|
||||
get_logger().error(
|
||||
"Problem calling {{func_name}}: Expecting parameter {{missing}}, given {{given}}",
|
||||
func_name=func_name,
|
||||
missing=missing,
|
||||
given=given,
|
||||
stack_depth=2
|
||||
)
|
||||
get_logger().error("Error dispatching call", e)
|
||||
|
||||
if "kwargs" not in params:
|
||||
# WE ASSUME WE ARE ONLY ADDING A kwargs PARAMETER TO SOME REGULAR METHOD
|
||||
def w_settings(*args, **kwargs):
|
||||
def wo_kwargs(*args, **kwargs):
|
||||
settings = kwargs.get("kwargs")
|
||||
|
||||
params = get_function_arguments(func)
|
||||
if not get_function_defaults(func):
|
||||
defaults = {}
|
||||
else:
|
||||
defaults = {k: v for k, v in zip(reversed(params), reversed(get_function_defaults(func)))}
|
||||
|
||||
ordered_params = dict(zip(params, args))
|
||||
packed = params_pack(params, ordered_params, kwargs, settings, defaults)
|
||||
try:
|
||||
return func(**packed)
|
||||
except TypeError as e:
|
||||
raise_error(e, packed)
|
||||
return wo_kwargs
|
||||
|
||||
return func(**params_pack(params, ordered_params, kwargs, settings, defaults))
|
||||
return w_settings
|
||||
|
||||
def wrapper(*args, **kwargs):
|
||||
try:
|
||||
func_name = get_function_name(func)
|
||||
if func_name in ("__init__", "__new__") and "kwargs" in kwargs:
|
||||
elif func_name in ("__init__", "__new__"):
|
||||
def w_constructor(*args, **kwargs):
|
||||
if "kwargs" in kwargs:
|
||||
packed = params_pack(params, kwargs, dict_zip(params[1:], args[1:]), kwargs["kwargs"], defaults)
|
||||
return func(args[0], **packed)
|
||||
elif func_name in ("__init__", "__new__") and len(args) == 2 and len(kwargs) == 0 and isinstance(args[1], Mapping):
|
||||
elif len(args) == 2 and len(kwargs) == 0 and isinstance(args[1], Mapping):
|
||||
# ASSUME SECOND UNNAMED PARAM IS kwargs
|
||||
packed = params_pack(params, args[1], defaults)
|
||||
return func(args[0], **packed)
|
||||
elif func_name in ("__init__", "__new__"):
|
||||
else:
|
||||
# DO NOT INCLUDE self IN kwargs
|
||||
packed = params_pack(params, kwargs, dict_zip(params[1:], args[1:]), defaults)
|
||||
try:
|
||||
return func(args[0], **packed)
|
||||
elif params[0] == "self" and "kwargs" in kwargs:
|
||||
packed = params_pack(params, kwargs, dict_zip(params[1:], args[1:]), kwargs["kwargs"], defaults)
|
||||
return func(args[0], **packed)
|
||||
elif params[0] == "self" and len(args) == 2 and len(kwargs) == 0 and isinstance(args[1], Mapping):
|
||||
except TypeError as e:
|
||||
raise_error(e, packed)
|
||||
return w_constructor
|
||||
|
||||
elif params[0] == "self":
|
||||
def w_bound_method(*args, **kwargs):
|
||||
if len(args) == 2 and len(kwargs) == 0 and isinstance(args[1], Mapping):
|
||||
# ASSUME SECOND UNNAMED PARAM IS kwargs
|
||||
packed = params_pack(params, args[1], defaults)
|
||||
return func(args[0], **packed)
|
||||
elif params[0] == "self":
|
||||
elif "kwargs" in kwargs and isinstance(kwargs["kwargs"], Mapping):
|
||||
# PUT args INTO kwargs
|
||||
packed = params_pack(params, kwargs, dict_zip(params[1:], args[1:]), kwargs["kwargs"], defaults)
|
||||
else:
|
||||
packed = params_pack(params, kwargs, dict_zip(params[1:], args[1:]), defaults)
|
||||
try:
|
||||
return func(args[0], **packed)
|
||||
elif len(args) == 1 and len(kwargs) == 0 and isinstance(args[0], Mapping):
|
||||
# ASSUME SINGLE PARAMETER IS A SETTING
|
||||
except TypeError as e:
|
||||
raise_error(e, packed)
|
||||
return w_bound_method
|
||||
|
||||
else:
|
||||
def w_kwargs(*args, **kwargs):
|
||||
if len(args) == 1 and len(kwargs) == 0 and isinstance(args[0], Mapping):
|
||||
# ASSUME SINGLE PARAMETER IS kwargs
|
||||
packed = params_pack(params, args[0], defaults)
|
||||
return func(**packed)
|
||||
elif "kwargs" in kwargs and isinstance(kwargs["kwargs"], Mapping):
|
||||
# PUT args INTO kwargs
|
||||
packed = params_pack(params, kwargs, dict_zip(params, args), kwargs["kwargs"], defaults)
|
||||
return func(**packed)
|
||||
else:
|
||||
# PULL kwargs OUT INTO PARAMS
|
||||
packed = params_pack(params, kwargs, dict_zip(params, args), defaults)
|
||||
try:
|
||||
return func(**packed)
|
||||
except TypeError as e:
|
||||
e = Except.wrap(e)
|
||||
if e.message.startswith(func_name) and "takes at least" in e:
|
||||
missing = [p for p in params if str(p) not in packed]
|
||||
get_logger().error(
|
||||
"Problem calling {{func_name}}: Expecting parameter {{missing}}, given {{given}}",
|
||||
func_name=func_name,
|
||||
missing=missing,
|
||||
given=packed.keys(),
|
||||
stack_depth=1
|
||||
)
|
||||
get_logger().error("Error dispatching call", e)
|
||||
return wrapper
|
||||
except TypeError as e:
|
||||
raise_error(e, packed)
|
||||
return w_kwargs
|
||||
|
||||
|
||||
def params_pack(params, *args):
|
||||
|
|
|
@ -13,18 +13,23 @@ from __future__ import unicode_literals
|
|||
|
||||
import os
|
||||
import platform
|
||||
import sys
|
||||
from collections import Mapping
|
||||
from datetime import datetime
|
||||
|
||||
import sys
|
||||
|
||||
from mo_dots import coalesce, listwrap, wrap, unwrap, unwraplist, set_default, FlatList
|
||||
from mo_future import text_type, PY3, iteritems
|
||||
from mo_future import text_type, PY3
|
||||
from mo_logs import constants
|
||||
from mo_logs.exceptions import Except, suppress_exception
|
||||
from mo_logs.strings import indent
|
||||
|
||||
_Thread = None
|
||||
if PY3:
|
||||
STDOUT = sys.stdout.buffer
|
||||
else:
|
||||
STDOUT = sys.stdout
|
||||
|
||||
|
||||
|
||||
class Log(object):
|
||||
"""
|
||||
|
@ -34,8 +39,6 @@ class Log(object):
|
|||
main_log = None
|
||||
logging_multi = None
|
||||
profiler = None # simple pypy-friendly profiler
|
||||
cprofiler = None # screws up with pypy, but better than nothing
|
||||
cprofiler_stats = None
|
||||
error_mode = False # prevent error loops
|
||||
|
||||
@classmethod
|
||||
|
@ -53,7 +56,6 @@ class Log(object):
|
|||
constants - UPDATE MODULE CONSTANTS AT STARTUP (PRIMARILY INTENDED TO CHANGE DEBUG STATE)
|
||||
"""
|
||||
global _Thread
|
||||
|
||||
if not settings:
|
||||
return
|
||||
settings = wrap(settings)
|
||||
|
@ -66,40 +68,37 @@ class Log(object):
|
|||
from mo_threads import Thread as _Thread
|
||||
_ = _Thread
|
||||
|
||||
# ENABLE CPROFILE
|
||||
if settings.cprofile is False:
|
||||
settings.cprofile = {"enabled": False}
|
||||
elif settings.cprofile is True or (isinstance(settings.cprofile, Mapping) and settings.cprofile.enabled):
|
||||
elif settings.cprofile is True:
|
||||
if isinstance(settings.cprofile, bool):
|
||||
settings.cprofile = {"enabled": True, "filename": "cprofile.tab"}
|
||||
|
||||
import cProfile
|
||||
|
||||
cls.cprofiler = cProfile.Profile()
|
||||
cls.cprofiler.enable()
|
||||
if settings.cprofile.enabled:
|
||||
from mo_threads import profiles
|
||||
profiles.enable_profilers(settings.cprofile.filename)
|
||||
|
||||
if settings.profile is True or (isinstance(settings.profile, Mapping) and settings.profile.enabled):
|
||||
from mo_logs import profiles
|
||||
|
||||
if isinstance(settings.profile, bool):
|
||||
profiles.ON = True
|
||||
settings.profile = {"enabled": True, "filename": "profile.tab"}
|
||||
|
||||
if settings.profile.enabled:
|
||||
profiles.ON = True
|
||||
Log.error("REMOVED 2018-09-02, Activedata revision 3f30ff46f5971776f8ba18")
|
||||
# from mo_logs import profiles
|
||||
#
|
||||
# if isinstance(settings.profile, bool):
|
||||
# profiles.ON = True
|
||||
# settings.profile = {"enabled": True, "filename": "profile.tab"}
|
||||
#
|
||||
# if settings.profile.enabled:
|
||||
# profiles.ON = True
|
||||
|
||||
if settings.constants:
|
||||
constants.set(settings.constants)
|
||||
|
||||
if settings.log:
|
||||
cls.logging_multi = StructuredLogger_usingMulti()
|
||||
from mo_logs.log_usingThread import StructuredLogger_usingThread
|
||||
cls.main_log = StructuredLogger_usingThread(cls.logging_multi)
|
||||
|
||||
for log in listwrap(settings.log):
|
||||
Log.add_log(Log.new_instance(log))
|
||||
|
||||
if settings.cprofile.enabled == True:
|
||||
Log.alert("cprofiling is enabled, writing to {{filename}}", filename=os.path.abspath(settings.cprofile.filename))
|
||||
from mo_logs.log_usingThread import StructuredLogger_usingThread
|
||||
cls.main_log = StructuredLogger_usingThread(cls.logging_multi)
|
||||
|
||||
@classmethod
|
||||
def stop(cls):
|
||||
|
@ -108,23 +107,8 @@ class Log(object):
|
|||
EXECUTING MULUTIPLE TIMES IN A ROW IS SAFE, IT HAS NO NET EFFECT, IT STILL LOGS TO stdout
|
||||
:return: NOTHING
|
||||
"""
|
||||
|
||||
from mo_threads import profiles
|
||||
|
||||
if cls.cprofiler and hasattr(cls, "settings"):
|
||||
if cls.cprofiler == None:
|
||||
from mo_threads import Queue
|
||||
|
||||
cls.cprofiler_stats = Queue("cprofiler stats") # ACCUMULATION OF STATS FROM ALL THREADS
|
||||
|
||||
import pstats
|
||||
cls.cprofiler_stats.add(pstats.Stats(cls.cprofiler))
|
||||
write_profile(cls.settings.cprofile, cls.cprofiler_stats.pop_all())
|
||||
|
||||
if profiles.ON and hasattr(cls, "settings"):
|
||||
profiles.write(cls.settings.profile)
|
||||
cls.main_log.stop()
|
||||
cls.main_log = StructuredLogger_usingStream(sys.stdout)
|
||||
main_log, cls.main_log = cls.main_log, StructuredLogger_usingStream(STDOUT)
|
||||
main_log.stop()
|
||||
|
||||
@classmethod
|
||||
def new_instance(cls, settings):
|
||||
|
@ -148,7 +132,10 @@ class Log(object):
|
|||
return StructuredLogger_usingFile(settings.filename)
|
||||
if settings.log_type == "console":
|
||||
from mo_logs.log_usingThreadedStream import StructuredLogger_usingThreadedStream
|
||||
return StructuredLogger_usingThreadedStream(sys.stdout)
|
||||
return StructuredLogger_usingThreadedStream(STDOUT)
|
||||
if settings.log_type == "mozlog":
|
||||
from mo_logs.log_usingMozLog import StructuredLogger_usingMozLog
|
||||
return StructuredLogger_usingMozLog(STDOUT, coalesce(settings.app_name, settings.appname))
|
||||
if settings.log_type == "stream" or settings.stream:
|
||||
from mo_logs.log_usingThreadedStream import StructuredLogger_usingThreadedStream
|
||||
return StructuredLogger_usingThreadedStream(settings.stream)
|
||||
|
@ -401,7 +388,7 @@ class Log(object):
|
|||
if add_to_trace:
|
||||
cause[0].trace.extend(trace[1:])
|
||||
|
||||
e = Except(type=exceptions.ERROR, template=template, params=params, cause=cause, trace=trace)
|
||||
e = Except(type=exceptions.ERROR, template=template, params=params, cause=causes, trace=trace)
|
||||
raise_from_none(e)
|
||||
|
||||
@classmethod
|
||||
|
@ -455,31 +442,6 @@ class Log(object):
|
|||
raise NotImplementedError
|
||||
|
||||
|
||||
def write_profile(profile_settings, stats):
|
||||
from pyLibrary import convert
|
||||
from mo_files import File
|
||||
|
||||
Log.note("aggregating {{num}} profile stats", num=len(stats))
|
||||
acc = stats[0]
|
||||
for s in stats[1:]:
|
||||
acc.add(s)
|
||||
|
||||
stats = [{
|
||||
"num_calls": d[1],
|
||||
"self_time": d[2],
|
||||
"total_time": d[3],
|
||||
"self_time_per_call": d[2] / d[1],
|
||||
"total_time_per_call": d[3] / d[1],
|
||||
"file": (f[0] if f[0] != "~" else "").replace("\\", "/"),
|
||||
"line": f[1],
|
||||
"method": f[2].lstrip("<").rstrip(">")
|
||||
}
|
||||
for f, d, in iteritems(acc.stats)
|
||||
]
|
||||
stats_file = File(profile_settings.filename, suffix=convert.datetime2string(datetime.now(), "_%Y%m%d_%H%M%S"))
|
||||
stats_file.write(convert.list2tab(stats))
|
||||
|
||||
|
||||
def _same_frame(frameA, frameB):
|
||||
return (frameA.line, frameA.file) == (frameB.line, frameB.file)
|
||||
|
||||
|
@ -506,5 +468,5 @@ from mo_logs.log_usingStream import StructuredLogger_usingStream
|
|||
|
||||
|
||||
if not Log.main_log:
|
||||
Log.main_log = StructuredLogger_usingStream(sys.stdout)
|
||||
Log.main_log = StructuredLogger_usingStream(STDOUT)
|
||||
|
||||
|
|
|
@ -211,7 +211,7 @@ class Suppress(object):
|
|||
self.type = exception_type
|
||||
|
||||
def __enter__(self):
|
||||
pass
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
if not exc_val or isinstance(exc_val, self.type):
|
||||
|
|
|
@ -94,7 +94,7 @@ def make_log_from_settings(settings):
|
|||
Log.error("Can not find class {{class}}", {"class": path}, cause=e)
|
||||
|
||||
# IF WE NEED A FILE, MAKE SURE DIRECTORY EXISTS
|
||||
if settings.filename:
|
||||
if settings.filename != None:
|
||||
from mo_files import File
|
||||
|
||||
f = File(settings.filename)
|
||||
|
|
|
@ -0,0 +1,70 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from decimal import Decimal
|
||||
|
||||
from mo_dots import wrap
|
||||
from mo_json import value2json, datetime2unix
|
||||
from mo_kwargs import override
|
||||
from mo_logs import Log
|
||||
from mo_logs.exceptions import ERROR, NOTE, WARNING, ALARM
|
||||
from mo_logs.log_usingElasticSearch import _deep_json_to_string
|
||||
from mo_logs.log_usingNothing import StructuredLogger
|
||||
|
||||
|
||||
class StructuredLogger_usingMozLog(StructuredLogger):
|
||||
"""
|
||||
WRITE TO MozLog STANDARD FORMAT
|
||||
https://wiki.mozilla.org/Firefox/Services/Logging
|
||||
"""
|
||||
@override
|
||||
def __init__(self, stream, app_name):
|
||||
"""
|
||||
:param stream: MozLog IS A JSON FORMAT, WHICH IS BYTES
|
||||
:param app_name: MozLog WOULD LIKE TO KNOW WHAT APP IS MAKING THESE LOGS
|
||||
"""
|
||||
self.stream = stream
|
||||
self.app_name = app_name
|
||||
if not app_name:
|
||||
Log.error("mozlog expects an `app_name` in the config")
|
||||
if not Log.trace:
|
||||
Log.error("mozlog expects trace=True so it get s the information it requires")
|
||||
|
||||
def write(self, template, params):
|
||||
output = {
|
||||
"Timestamp": (Decimal(datetime2unix(params.timestamp)) * Decimal(1e9)).to_integral_exact(), # NANOSECONDS
|
||||
"Type": params.template,
|
||||
"Logger": params.machine.name,
|
||||
"Hostname": self.app_name,
|
||||
"EnvVersion": "2.0",
|
||||
"Severity": severity_map.get(params.context, 3), # https://en.wikipedia.org/wiki/Syslog#Severity_levels
|
||||
"Pid": params.machine.pid,
|
||||
"Fields": {
|
||||
k: _deep_json_to_string(v, 0)
|
||||
for k, v in wrap(params).leaves()
|
||||
}
|
||||
}
|
||||
self.stream.write(value2json(output).encode('utf8'))
|
||||
self.stream.write(b'\n')
|
||||
|
||||
|
||||
severity_map = {
|
||||
ERROR: 3,
|
||||
WARNING: 4,
|
||||
ALARM: 5,
|
||||
NOTE: 6
|
||||
}
|
||||
|
||||
|
||||
def datatime2decimal(value):
|
||||
return
|
|
@ -29,7 +29,7 @@ class StructuredLogger_usingMulti(StructuredLogger):
|
|||
m.write(template, params)
|
||||
except Exception as e:
|
||||
bad.append(m)
|
||||
Log.warning("Logger failed! It will be removed: {{type}}", type=m.__class__.__name__, cause=e)
|
||||
Log.warning("Logger {{type|quote}} failed! It will be removed.", type=m.__class__.__name__, cause=e)
|
||||
with suppress_exception:
|
||||
for b in bad:
|
||||
self.many.remove(b)
|
||||
|
|
|
@ -27,13 +27,8 @@ class StructuredLogger_usingStream(StructuredLogger):
|
|||
self.flush = stream.flush
|
||||
if stream in (sys.stdout, sys.stderr):
|
||||
if PY3:
|
||||
self.writer = stream.write
|
||||
else:
|
||||
self.writer = _UTF8Encoder(stream).write
|
||||
elif hasattr(stream, 'encoding') and stream.encoding:
|
||||
self.writer = _UTF8Encoder(stream).write
|
||||
else:
|
||||
self.writer = stream.write
|
||||
stream = stream.buffer
|
||||
self.writer = _UTF8Encoder(stream).write
|
||||
except Exception as _:
|
||||
sys.stderr.write("can not handle")
|
||||
|
||||
|
@ -57,5 +52,5 @@ class _UTF8Encoder(object):
|
|||
def write(self, v):
|
||||
try:
|
||||
self.stream.write(v.encode('utf8'))
|
||||
except Exception as _:
|
||||
except Exception:
|
||||
sys.stderr.write("can not handle")
|
||||
|
|
|
@ -17,6 +17,8 @@ from mo_logs import Log, Except, suppress_exception
|
|||
from mo_logs.log_usingNothing import StructuredLogger
|
||||
from mo_threads import Thread, Queue, Till, THREAD_STOP
|
||||
|
||||
DEBUG = False
|
||||
|
||||
|
||||
class StructuredLogger_usingThread(StructuredLogger):
|
||||
|
||||
|
@ -30,14 +32,17 @@ class StructuredLogger_usingThread(StructuredLogger):
|
|||
def worker(logger, please_stop):
|
||||
try:
|
||||
while not please_stop:
|
||||
Till(seconds=1).wait()
|
||||
(Till(seconds=1) | please_stop).wait()
|
||||
logs = self.queue.pop_all()
|
||||
for log in logs:
|
||||
if log is THREAD_STOP:
|
||||
please_stop.go()
|
||||
else:
|
||||
logger.write(**log)
|
||||
except Exception as e:
|
||||
print("problem in " + StructuredLogger_usingThread.__name__ + ": " + str(e))
|
||||
finally:
|
||||
Log.note("stop the child")
|
||||
logger.stop()
|
||||
|
||||
self.thread = Thread("Thread for " + self.__class__.__name__, worker, logger)
|
||||
|
@ -53,10 +58,13 @@ class StructuredLogger_usingThread(StructuredLogger):
|
|||
raise e # OH NO!
|
||||
|
||||
def stop(self):
|
||||
with suppress_exception:
|
||||
Log.warning("Stopping threaded logger")
|
||||
try:
|
||||
self.queue.add(THREAD_STOP) # BE PATIENT, LET REST OF MESSAGE BE SENT
|
||||
self.thread.join()
|
||||
self.logger.stop()
|
||||
Log.note("joined on thread")
|
||||
except Exception as e:
|
||||
Log.note("problem in threaded logger" + str(e))
|
||||
|
||||
with suppress_exception:
|
||||
self.queue.close()
|
||||
|
|
|
@ -16,7 +16,8 @@ from __future__ import unicode_literals
|
|||
import sys
|
||||
from time import time
|
||||
|
||||
from mo_future import text_type, PY2
|
||||
from mo_dots import Data
|
||||
from mo_future import text_type, PY3
|
||||
from mo_logs import Log
|
||||
from mo_logs.log_usingNothing import StructuredLogger
|
||||
from mo_logs.strings import expand_template
|
||||
|
@ -31,29 +32,24 @@ class StructuredLogger_usingThreadedStream(StructuredLogger):
|
|||
def __init__(self, stream):
|
||||
assert stream
|
||||
|
||||
use_UTF8 = False
|
||||
|
||||
if isinstance(stream, text_type):
|
||||
if stream.startswith("sys."):
|
||||
use_UTF8 = True # sys.* ARE OLD AND CAN NOT HANDLE unicode
|
||||
self.stream = eval(stream)
|
||||
name = stream
|
||||
stream = self.stream = eval(stream)
|
||||
if name.startswith("sys.") and PY3:
|
||||
self.stream = Data(write=lambda d: stream.write(d.decode('utf8')))
|
||||
else:
|
||||
self.stream = stream
|
||||
name = "stream"
|
||||
self.stream = stream
|
||||
|
||||
# WRITE TO STREAMS CAN BE *REALLY* SLOW, WE WILL USE A THREAD
|
||||
from mo_threads import Queue
|
||||
|
||||
if use_UTF8 and PY2:
|
||||
def utf8_appender(value):
|
||||
if isinstance(value, text_type):
|
||||
value = value.encode('utf8')
|
||||
self.stream.write(value)
|
||||
def utf8_appender(value):
|
||||
if isinstance(value, text_type):
|
||||
value = value.encode('utf8')
|
||||
self.stream.write(value)
|
||||
|
||||
appender = utf8_appender
|
||||
else:
|
||||
appender = self.stream.write
|
||||
appender = utf8_appender
|
||||
|
||||
self.queue = Queue("queue for " + self.__class__.__name__ + "(" + name + ")", max=10000, silent=True)
|
||||
self.thread = Thread("log to " + self.__class__.__name__ + "(" + name + ")", time_delta_pusher, appender=appender, queue=self.queue, interval=0.3)
|
||||
|
@ -93,9 +89,11 @@ def time_delta_pusher(please_stop, appender, queue, interval):
|
|||
next_run = time() + interval
|
||||
|
||||
while not please_stop:
|
||||
Thread.current().cprofiler.disable()
|
||||
profiler = Thread.current().cprofiler
|
||||
profiler.disable()
|
||||
(Till(till=next_run) | please_stop).wait()
|
||||
Thread.current().cprofiler.enable()
|
||||
profiler.enable()
|
||||
|
||||
next_run = time() + interval
|
||||
logs = queue.pop_all()
|
||||
if not logs:
|
||||
|
|
|
@ -83,7 +83,7 @@ def read_settings(filename=None, defs=None):
|
|||
Log.error("Can not read configuration file {{filename}}", {
|
||||
"filename": settings_file.abspath
|
||||
})
|
||||
settings = mo_json_config.get("file:///" + settings_file.abspath)
|
||||
settings = mo_json_config.get_file(settings_file)
|
||||
settings.args = args
|
||||
return settings
|
||||
|
||||
|
|
|
@ -22,12 +22,11 @@ from datetime import datetime as builtin_datetime
|
|||
from datetime import timedelta, date
|
||||
from json.encoder import encode_basestring
|
||||
|
||||
import sys
|
||||
|
||||
from mo_dots import coalesce, wrap, get_module, Data
|
||||
from mo_future import text_type, xrange, binary_type, round as _round, PY3, get_function_name, zip_longest
|
||||
from mo_future import text_type, xrange, binary_type, round as _round, get_function_name, zip_longest, transpose, PY3
|
||||
from mo_logs.convert import datetime2unix, datetime2string, value2json, milli2datetime, unix2datetime
|
||||
from mo_logs.url import value2url_param
|
||||
|
||||
# from mo_files.url import value2url_param
|
||||
|
||||
FORMATTERS = {}
|
||||
|
||||
|
@ -46,7 +45,7 @@ def _late_import():
|
|||
try:
|
||||
_json_encoder = get_module("mo_json.encoder").json_encoder
|
||||
except Exception:
|
||||
_json_encoder = _json.dumps
|
||||
_json_encoder = lambda value, pretty: _json.dumps(value)
|
||||
from mo_logs import Log as _Log
|
||||
from mo_logs.exceptions import Except as _Except
|
||||
from mo_times.durations import Duration as _Duration
|
||||
|
@ -111,11 +110,17 @@ def unix(value):
|
|||
return str(datetime2unix(value))
|
||||
|
||||
|
||||
value2url_param = None
|
||||
|
||||
|
||||
@formatter
|
||||
def url(value):
|
||||
"""
|
||||
convert FROM dict OR string TO URL PARAMETERS
|
||||
"""
|
||||
global value2url_param
|
||||
if not value2url_param:
|
||||
from mo_files.url import value2url_param
|
||||
return value2url_param(value)
|
||||
|
||||
|
||||
|
@ -187,7 +192,7 @@ def tab(value):
|
|||
:return:
|
||||
"""
|
||||
if isinstance(value, Mapping):
|
||||
h, d = zip(*wrap(value).leaves())
|
||||
h, d = transpose(*wrap(value).leaves())
|
||||
return (
|
||||
"\t".join(map(value2json, h)) +
|
||||
"\n" +
|
||||
|
@ -484,16 +489,20 @@ _SNIP = "...<snip>..."
|
|||
|
||||
@formatter
|
||||
def limit(value, length):
|
||||
# LIMIT THE STRING value TO GIVEN LENGTH, CHOPPING OUT THE MIDDLE IF REQUIRED
|
||||
if len(value) <= length:
|
||||
return value
|
||||
elif length < len(_SNIP) * 2:
|
||||
return value[0:length]
|
||||
else:
|
||||
lhs = int(round((length - len(_SNIP)) / 2, 0))
|
||||
rhs = length - len(_SNIP) - lhs
|
||||
return value[:lhs] + _SNIP + value[-rhs:]
|
||||
|
||||
try:
|
||||
# LIMIT THE STRING value TO GIVEN LENGTH, CHOPPING OUT THE MIDDLE IF REQUIRED
|
||||
if len(value) <= length:
|
||||
return value
|
||||
elif length < len(_SNIP) * 2:
|
||||
return value[0:length]
|
||||
else:
|
||||
lhs = int(round((length - len(_SNIP)) / 2, 0))
|
||||
rhs = length - len(_SNIP) - lhs
|
||||
return value[:lhs] + _SNIP + value[-rhs:]
|
||||
except Exception as e:
|
||||
if not _Duration:
|
||||
_late_import()
|
||||
_Log.error("Not expected", cause=e)
|
||||
|
||||
@formatter
|
||||
def split(value, sep="\n"):
|
||||
|
@ -742,19 +751,15 @@ def apply_diff(text, diff, reverse=False, verify=True):
|
|||
+Content Team Engagement & Tasks : https://appreview.etherpad.mozilla.org/40
|
||||
"""
|
||||
|
||||
output = text
|
||||
if not diff:
|
||||
return output
|
||||
|
||||
start_of_hunk = 0
|
||||
while True:
|
||||
if start_of_hunk>=len(diff):
|
||||
break
|
||||
header = diff[start_of_hunk]
|
||||
start_of_hunk += 1
|
||||
if not header.strip():
|
||||
continue
|
||||
|
||||
return text
|
||||
output = text
|
||||
hunks = [
|
||||
(new_diff[start_hunk], new_diff[start_hunk+1:end_hunk])
|
||||
for new_diff in [[d.lstrip() for d in diff if d.lstrip() and d != "\\ No newline at end of file"] + ["@@"]] # ANOTHER REPAIR
|
||||
for start_hunk, end_hunk in pairwise(i for i, l in enumerate(new_diff) if l.startswith('@@'))
|
||||
]
|
||||
for header, hunk_body in (reversed(hunks) if reverse else hunks):
|
||||
matches = DIFF_PREFIX.match(header.strip())
|
||||
if not matches:
|
||||
if not _Log:
|
||||
|
@ -762,76 +767,86 @@ def apply_diff(text, diff, reverse=False, verify=True):
|
|||
|
||||
_Log.error("Can not handle \n---\n{{diff}}\n---\n", diff=diff)
|
||||
|
||||
remove = tuple(int(i.strip()) for i in matches.group(1).split(",")) # EXPECTING start_line, length TO REMOVE
|
||||
remove = Data(start=remove[0], length=1 if len(remove) == 1 else remove[1]) # ASSUME FIRST LINE
|
||||
add = tuple(int(i.strip()) for i in matches.group(2).split(",")) # EXPECTING start_line, length TO ADD
|
||||
add = Data(start=add[0], length=1 if len(add) == 1 else add[1])
|
||||
removes = tuple(int(i.strip()) for i in matches.group(1).split(",")) # EXPECTING start_line, length TO REMOVE
|
||||
remove = Data(start=removes[0], length=1 if len(removes) == 1 else removes[1]) # ASSUME FIRST LINE
|
||||
adds = tuple(int(i.strip()) for i in matches.group(2).split(",")) # EXPECTING start_line, length TO ADD
|
||||
add = Data(start=adds[0], length=1 if len(adds) == 1 else adds[1])
|
||||
|
||||
if remove.start == 0 and remove.length == 0:
|
||||
remove.start = add.start
|
||||
if add.start == 0 and add.length == 0:
|
||||
if add.length == 0 and add.start == 0:
|
||||
add.start = remove.start
|
||||
|
||||
if remove.start != add.start:
|
||||
if not _Log:
|
||||
_late_import()
|
||||
_Log.warning("Do not know how to handle")
|
||||
|
||||
def repair_hunk(diff):
|
||||
def repair_hunk(hunk_body):
|
||||
# THE LAST DELETED LINE MAY MISS A "\n" MEANING THE FIRST
|
||||
# ADDED LINE WILL BE APPENDED TO THE LAST DELETED LINE
|
||||
# EXAMPLE: -kward has the details.+kward has the details.
|
||||
# DETECT THIS PROBLEM FOR THIS HUNK AND FIX THE DIFF
|
||||
problem_line = diff[start_of_hunk + remove.length - 1]
|
||||
if reverse:
|
||||
if add.length == 0:
|
||||
return diff
|
||||
first_added_line = output[add.start - 1]
|
||||
if problem_line.endswith('+' + first_added_line):
|
||||
split_point = len(problem_line) - len(first_added_line) - 1
|
||||
last_lines = [
|
||||
o
|
||||
for b, o in zip(reversed(hunk_body), reversed(output))
|
||||
if b != "+" + o
|
||||
]
|
||||
if not last_lines:
|
||||
return hunk_body
|
||||
|
||||
last_line = last_lines[0]
|
||||
for problem_index, problem_line in enumerate(hunk_body):
|
||||
if problem_line.startswith('-') and problem_line.endswith('+' + last_line):
|
||||
split_point = len(problem_line) - (len(last_line) + 1)
|
||||
break
|
||||
elif problem_line.startswith('+' + last_line + "-"):
|
||||
split_point = len(last_line) + 1
|
||||
break
|
||||
else:
|
||||
return diff
|
||||
return hunk_body
|
||||
else:
|
||||
if remove.length == 0:
|
||||
return diff
|
||||
last_removed_line = output[remove.start - 1]
|
||||
if problem_line.startswith('-' + last_removed_line + "+"):
|
||||
split_point = len(last_removed_line) + 1
|
||||
if not output:
|
||||
return hunk_body
|
||||
last_line = output[-1]
|
||||
for problem_index, problem_line in enumerate(hunk_body):
|
||||
if problem_line.startswith('+') and problem_line.endswith('-' + last_line):
|
||||
split_point = len(problem_line) - (len(last_line) + 1)
|
||||
break
|
||||
elif problem_line.startswith('-' + last_line + "+"):
|
||||
split_point = len(last_line) + 1
|
||||
break
|
||||
else:
|
||||
return diff
|
||||
return hunk_body
|
||||
|
||||
new_diff = (
|
||||
diff[:start_of_hunk + remove.length - 1] +
|
||||
new_hunk_body = (
|
||||
hunk_body[:problem_index] +
|
||||
[problem_line[:split_point], problem_line[split_point:]] +
|
||||
diff[start_of_hunk + remove.length:]
|
||||
hunk_body[problem_index + 1:]
|
||||
)
|
||||
return new_diff
|
||||
|
||||
diff = repair_hunk(diff)
|
||||
diff = [d for d in diff if d != "\\ no newline at end of file"] # ANOTHER REPAIR
|
||||
return new_hunk_body
|
||||
hunk_body = repair_hunk(hunk_body)
|
||||
|
||||
if reverse:
|
||||
new_output = (
|
||||
output[:add.start - 1] +
|
||||
[d[1:] for d in diff[start_of_hunk:start_of_hunk + remove.length]] +
|
||||
[d[1:] for d in hunk_body if d and d[0] == '-'] +
|
||||
output[add.start + add.length - 1:]
|
||||
)
|
||||
else:
|
||||
# APPLYING DIFF FORWARD REQUIRES WE APPLY THE HUNKS IN REVERSE TO GET THE LINE NUMBERS RIGHT?
|
||||
new_output = (
|
||||
output[:remove.start-1] +
|
||||
[d[1:] for d in diff[start_of_hunk + remove.length :start_of_hunk + remove.length + add.length ]] +
|
||||
output[remove.start + remove.length - 1:]
|
||||
output[:add.start - 1] +
|
||||
[d[1:] for d in hunk_body if d and d[0] == '+'] +
|
||||
output[add.start + remove.length - 1:]
|
||||
)
|
||||
start_of_hunk += remove.length + add.length
|
||||
output = new_output
|
||||
|
||||
if verify:
|
||||
original = apply_diff(output, diff, not reverse, False)
|
||||
if any(t!=o for t, o in zip_longest(text, original)):
|
||||
if not _Log:
|
||||
_late_import()
|
||||
_Log.error("logical verification check failed")
|
||||
if set(text) != set(original): # bugzilla-etl diffs are a jumble
|
||||
|
||||
for t, o in zip_longest(text, original):
|
||||
if t in ['reports: https://goo.gl/70o6w6\r']:
|
||||
break # KNOWN INCONSISTENCIES
|
||||
if t != o:
|
||||
if not _Log:
|
||||
_late_import()
|
||||
_Log.error("logical verification check failed")
|
||||
break
|
||||
|
||||
return output
|
||||
|
||||
|
@ -858,7 +873,7 @@ def utf82unicode(value):
|
|||
try:
|
||||
c.decode("utf8")
|
||||
except Exception as f:
|
||||
_Log.error("Can not convert charcode {{c}} in string index {{i}}", i=i, c=ord(c), cause=[e, _Except.wrap(f)])
|
||||
_Log.error("Can not convert charcode {{c}} in string index {{i}}", i=i, c=ord(c), cause=[e, _Except.wrap(f)])
|
||||
|
||||
try:
|
||||
latin1 = text_type(value.decode("latin1"))
|
||||
|
@ -880,3 +895,15 @@ def wordify(value):
|
|||
|
||||
|
||||
|
||||
|
||||
def pairwise(values):
|
||||
"""
|
||||
WITH values = [a, b, c, d, ...]
|
||||
RETURN [(a, b), (b, c), (c, d), ...]
|
||||
"""
|
||||
i = iter(values)
|
||||
a = next(i)
|
||||
|
||||
for b in i:
|
||||
yield (a, b)
|
||||
a = b
|
||||
|
|
|
@ -30,7 +30,7 @@ class Random(object):
|
|||
|
||||
@staticmethod
|
||||
def base64(length):
|
||||
return Random.string(length, string.digits + string.letters + '+/')
|
||||
return Random.string(length, SIMPLE_ALPHABET + '+/')
|
||||
|
||||
@staticmethod
|
||||
def int(*args):
|
||||
|
|
|
@ -228,7 +228,7 @@ import math
|
|||
import copy
|
||||
# from types import *
|
||||
|
||||
import pstat
|
||||
from mo_math.vendor.strangman import pstat
|
||||
|
||||
|
||||
__version__ = 0.6
|
||||
|
@ -447,7 +447,7 @@ given by inlist.
|
|||
Usage: lscoreatpercentile(inlist,percent)
|
||||
"""
|
||||
if percent > 1:
|
||||
print "\nDividing percent>1 by 100 in lscoreatpercentile().\n"
|
||||
print("\nDividing percent>1 by 100 in lscoreatpercentile().\n")
|
||||
percent = percent / 100.0
|
||||
targetcf = percent * len(inlist)
|
||||
h, lrl, binsize, extras = histogram(inlist)
|
||||
|
@ -485,8 +485,8 @@ spanning all the numbers in the inlist.
|
|||
Usage: lhistogram (inlist, numbins=10, defaultreallimits=None,suppressoutput=0)
|
||||
Returns: list of bin values, lowerreallimit, binsize, extrapoints
|
||||
"""
|
||||
if (defaultreallimits <> None):
|
||||
if type(defaultreallimits) not in [ListType, TupleType] or len(defaultreallimits) == 1: # only one limit given, assumed to be lower one & upper is calc'd
|
||||
if (defaultreallimits != None):
|
||||
if type(defaultreallimits) not in [list, tuple] or len(defaultreallimits) == 1: # only one limit given, assumed to be lower one & upper is calc'd
|
||||
lowerreallimit = defaultreallimits
|
||||
upperreallimit = 1.000001 * max(inlist)
|
||||
else: # assume both limits given
|
||||
|
@ -509,7 +509,7 @@ Returns: list of bin values, lowerreallimit, binsize, extrapoints
|
|||
except:
|
||||
extrapoints = extrapoints + 1
|
||||
if (extrapoints > 0 and printextras == 1):
|
||||
print '\nPoints outside given histogram range =', extrapoints
|
||||
print('\nPoints outside given histogram range =', extrapoints)
|
||||
return (bins, lowerreallimit, binsize, extrapoints)
|
||||
|
||||
|
||||
|
@ -572,8 +572,8 @@ Returns: transformed data for use in an ANOVA
|
|||
for j in range(k):
|
||||
if v[j] - mean(nargs[j]) > TINY:
|
||||
check = 0
|
||||
if check <> 1:
|
||||
raise ValueError, 'Problem in obrientransform.'
|
||||
if check != 1:
|
||||
raise ValueError('Problem in obrientransform.')
|
||||
else:
|
||||
return nargs
|
||||
|
||||
|
@ -751,11 +751,11 @@ Returns: appropriate statistic name, value, and probability
|
|||
"""
|
||||
samples = ''
|
||||
while samples not in ['i', 'r', 'I', 'R', 'c', 'C']:
|
||||
print '\nIndependent or related samples, or correlation (i,r,c): ',
|
||||
print('\nIndependent or related samples, or correlation (i,r,c): ',)
|
||||
samples = raw_input()
|
||||
|
||||
if samples in ['i', 'I', 'r', 'R']:
|
||||
print '\nComparing variances ...',
|
||||
print('\nComparing variances ...',)
|
||||
# USE O'BRIEN'S TEST FOR HOMOGENEITY OF VARIANCE, Maxwell & delaney, p.112
|
||||
r = obrientransform(x, y)
|
||||
f, p = F_oneway(pstat.colex(r, 0), pstat.colex(r, 1))
|
||||
|
@ -763,45 +763,44 @@ Returns: appropriate statistic name, value, and probability
|
|||
vartype = 'unequal, p=' + str(round(p, 4))
|
||||
else:
|
||||
vartype = 'equal'
|
||||
print vartype
|
||||
print(vartype)
|
||||
if samples in ['i', 'I']:
|
||||
if vartype[0] == 'e':
|
||||
t, p = ttest_ind(x, y, 0)
|
||||
print '\nIndependent samples t-test: ', round(t, 4), round(p, 4)
|
||||
print('\nIndependent samples t-test: ', round(t, 4), round(p, 4))
|
||||
else:
|
||||
if len(x) > 20 or len(y) > 20:
|
||||
z, p = ranksums(x, y)
|
||||
print '\nRank Sums test (NONparametric, n>20): ', round(z, 4), round(p, 4)
|
||||
print('\nRank Sums test (NONparametric, n>20): ', round(z, 4), round(p, 4))
|
||||
else:
|
||||
u, p = mannwhitneyu(x, y)
|
||||
print '\nMann-Whitney U-test (NONparametric, ns<20): ', round(u, 4), round(p, 4)
|
||||
|
||||
print('\nMann-Whitney U-test (NONparametric, ns<20): ', round(u, 4), round(p, 4))
|
||||
else: # RELATED SAMPLES
|
||||
if vartype[0] == 'e':
|
||||
t, p = ttest_rel(x, y, 0)
|
||||
print '\nRelated samples t-test: ', round(t, 4), round(p, 4)
|
||||
print('\nRelated samples t-test: ', round(t, 4), round(p, 4))
|
||||
else:
|
||||
t, p = ranksums(x, y)
|
||||
print '\nWilcoxon T-test (NONparametric): ', round(t, 4), round(p, 4)
|
||||
print('\nWilcoxon T-test (NONparametric): ', round(t, 4), round(p, 4))
|
||||
else: # CORRELATION ANALYSIS
|
||||
corrtype = ''
|
||||
while corrtype not in ['c', 'C', 'r', 'R', 'd', 'D']:
|
||||
print '\nIs the data Continuous, Ranked, or Dichotomous (c,r,d): ',
|
||||
print('\nIs the data Continuous, Ranked, or Dichotomous (c,r,d): ',)
|
||||
corrtype = raw_input()
|
||||
if corrtype in ['c', 'C']:
|
||||
m, b, r, p, see = linregress(x, y)
|
||||
print '\nLinear regression for continuous variables ...'
|
||||
print('\nLinear regression for continuous variables ...')
|
||||
lol = [['Slope', 'Intercept', 'r', 'Prob', 'SEestimate'], [round(m, 4), round(b, 4), round(r, 4), round(p, 4), round(see, 4)]]
|
||||
pstat.printcc(lol)
|
||||
elif corrtype in ['r', 'R']:
|
||||
r, p = spearmanr(x, y)
|
||||
print '\nCorrelation for ranked variables ...'
|
||||
print "Spearman's r: ", round(r, 4), round(p, 4)
|
||||
print('\nCorrelation for ranked variables ...')
|
||||
print("Spearman's r: ", round(r, 4), round(p, 4))
|
||||
else: # DICHOTOMOUS
|
||||
r, p = pointbiserialr(x, y)
|
||||
print '\nAssuming x contains a dichotomous variable ...'
|
||||
print 'Point Biserial r: ', round(r, 4), round(p, 4)
|
||||
print '\n\n'
|
||||
print('\nAssuming x contains a dichotomous variable ...')
|
||||
print('Point Biserial r: ', round(r, 4), round(p, 4))
|
||||
print('\n\n')
|
||||
return None
|
||||
|
||||
|
||||
|
@ -815,8 +814,8 @@ Usage: lpearsonr(x,y) where x and y are equal-length lists
|
|||
Returns: Pearson's r value, two-tailed p-value
|
||||
"""
|
||||
TINY = 1.0e-30
|
||||
if len(x) <> len(y):
|
||||
raise ValueError, 'Input values not paired in pearsonr. Aborting.'
|
||||
if len(x) != len(y):
|
||||
raise ValueError('Input values not paired in pearsonr. Aborting.')
|
||||
n = len(x)
|
||||
x = map(float, x)
|
||||
y = map(float, y)
|
||||
|
@ -854,8 +853,8 @@ Usage: lspearmanr(x,y) where x and y are equal-length lists
|
|||
Returns: Spearman's r, two-tailed p-value
|
||||
"""
|
||||
TINY = 1e-30
|
||||
if len(x) <> len(y):
|
||||
raise ValueError, 'Input values not paired in spearmanr. Aborting.'
|
||||
if len(x) != len(y):
|
||||
raise ValueError('Input values not paired in spearmanr. Aborting.')
|
||||
n = len(x)
|
||||
rankx = rankdata(x)
|
||||
ranky = rankdata(y)
|
||||
|
@ -879,12 +878,12 @@ Usage: pointbiserialr(x,y) where x,y are equal-length lists
|
|||
Returns: Point-biserial r, two-tailed p-value
|
||||
"""
|
||||
TINY = 1e-30
|
||||
if len(cats) <> len(vals):
|
||||
raise ValueError, 'INPUT VALUES NOT PAIRED IN pointbiserialr. ABORTING.'
|
||||
if len(cats) != len(vals):
|
||||
raise ValueError('INPUT VALUES NOT PAIRED IN pointbiserialr. ABORTING.')
|
||||
data = zip(cats, vals)
|
||||
categories = pstat.unique(cats)
|
||||
if len(categories) <> 2:
|
||||
raise ValueError, "Exactly 2 categories required for pointbiserialr()."
|
||||
if len(categories) != 2:
|
||||
raise ValueError("Exactly 2 categories required for pointbiserialr().")
|
||||
else: # there are 2 categories, continue
|
||||
c1 = [v for i, v in enumerate(vals) if cats[i] == categories[0]]
|
||||
c2 = [v for i, v in enumerate(vals) if cats[i] == categories[1]]
|
||||
|
@ -942,8 +941,8 @@ Usage: llinregress(x,y) x,y are equal-length lists of x-y coordinates
|
|||
Returns: slope, intercept, r, two-tailed prob, sterr-of-estimate
|
||||
"""
|
||||
TINY = 1.0e-20
|
||||
if len(x) <> len(y):
|
||||
raise ValueError, 'Input values not paired in linregress. Aborting.'
|
||||
if len(x) != len(y):
|
||||
raise ValueError('Input values not paired in linregress. Aborting.')
|
||||
n = len(x)
|
||||
x = map(float, x)
|
||||
y = map(float, y)
|
||||
|
@ -1017,8 +1016,8 @@ and prob.
|
|||
Usage: lttest_rel(a,b)
|
||||
Returns: t-value, two-tailed prob
|
||||
"""
|
||||
if len(a) <> len(b):
|
||||
raise ValueError, 'Unequal length lists in ttest_rel.'
|
||||
if len(a) != len(b):
|
||||
raise ValueError('Unequal length lists in ttest_rel.')
|
||||
x1 = mean(a)
|
||||
x2 = mean(b)
|
||||
v1 = var(a)
|
||||
|
@ -1119,7 +1118,7 @@ Returns: u-statistic, one-tailed p-value (i.e., p(z(U)))
|
|||
proportion = bigu / float(n1 * n2)
|
||||
T = math.sqrt(tiecorrect(ranked)) # correction factor for tied scores
|
||||
if T == 0:
|
||||
raise ValueError, 'All numbers are identical in lmannwhitneyu'
|
||||
raise ValueError('All numbers are identical in lmannwhitneyu')
|
||||
sd = math.sqrt(T * n1 * n2 * (n1 + n2 + 1) / 12.0)
|
||||
z = abs((bigu - n1 * n2 / 2.0) / sd) # normal approximation for prob calc
|
||||
return smallu, 1.0 - zprob(z) #, proportion
|
||||
|
@ -1180,12 +1179,12 @@ result. A non-parametric T-test.
|
|||
Usage: lwilcoxont(x,y)
|
||||
Returns: a t-statistic, two-tail probability estimate
|
||||
"""
|
||||
if len(x) <> len(y):
|
||||
raise ValueError, 'Unequal N in wilcoxont. Aborting.'
|
||||
if len(x) != len(y):
|
||||
raise ValueError('Unequal N in wilcoxont. Aborting.')
|
||||
d = []
|
||||
for i in range(len(x)):
|
||||
diff = x[i] - y[i]
|
||||
if diff <> 0:
|
||||
if diff != 0:
|
||||
d.append(diff)
|
||||
count = len(d)
|
||||
absd = map(abs, d)
|
||||
|
@ -1235,7 +1234,7 @@ Returns: H-statistic (corrected for ties), associated p-value
|
|||
h = 12.0 / (totaln * (totaln + 1)) * ssbn - 3 * (totaln + 1)
|
||||
df = len(args) - 1
|
||||
if T == 0:
|
||||
raise ValueError, 'All numbers are identical in lkruskalwallish'
|
||||
raise ValueError('All numbers are identical in lkruskalwallish')
|
||||
h = h / float(T)
|
||||
return h, chisqprob(h, df)
|
||||
|
||||
|
@ -1254,9 +1253,9 @@ Returns: chi-square statistic, associated p-value
|
|||
"""
|
||||
k = len(args)
|
||||
if k < 3:
|
||||
raise ValueError, 'Less than 3 levels. Friedman test not appropriate.'
|
||||
raise ValueError('Less than 3 levels. Friedman test not appropriate.')
|
||||
n = len(args[0])
|
||||
data = apply(zip, tuple(args))
|
||||
data = map(zip, tuple(args))
|
||||
for i in range(len(data)):
|
||||
data[i] = rankdata(data[i])
|
||||
ssbn = 0
|
||||
|
@ -1454,8 +1453,7 @@ def betacf(a, b, x):
|
|||
bz = 1.0
|
||||
if (abs(az - aold) < (EPS * abs(az))):
|
||||
return az
|
||||
print 'a or b too big, or ITMAX too small in Betacf.'
|
||||
|
||||
print('a or b too big, or ITMAX too small in Betacf.')
|
||||
|
||||
def gammln(xx):
|
||||
"""
|
||||
|
@ -1490,7 +1488,7 @@ using the betacf function. (Adapted from: Numerical Recipies in C.)
|
|||
Usage: lbetai(a,b,x)
|
||||
"""
|
||||
if (x < 0.0 or x > 1.0):
|
||||
raise ValueError, 'Bad x in lbetai'
|
||||
raise ValueError('Bad x in lbetai')
|
||||
|
||||
if (x == 0.0 or x == 1.0):
|
||||
bt = 0.0
|
||||
|
@ -1608,8 +1606,8 @@ length lists.
|
|||
|
||||
Usage: lsummult(list1,list2)
|
||||
"""
|
||||
if len(list1) <> len(list2):
|
||||
raise ValueError, "Lists not equal length in summult."
|
||||
if len(list1) != len(list2):
|
||||
raise ValueError("Lists not equal length in summult.")
|
||||
s = 0
|
||||
for item1, item2 in zip(list1, list2):
|
||||
s = s + item1 * item2
|
||||
|
@ -1684,7 +1682,7 @@ Returns: a list of length equal to inlist, containing rank scores
|
|||
for i in range(n):
|
||||
sumranks = sumranks + i
|
||||
dupcount = dupcount + 1
|
||||
if i == n - 1 or svec[i] <> svec[i + 1]:
|
||||
if i == n - 1 or svec[i] != svec[i + 1]:
|
||||
averank = sumranks / float(dupcount) + 1
|
||||
for j in range(i - dupcount + 1, i + 1):
|
||||
newlist[ivec[j]] = averank
|
||||
|
|
|
@ -15,83 +15,76 @@ af = N.array(lf)
|
|||
ll = [l]*5
|
||||
aa = N.array(ll)
|
||||
|
||||
print '\nCENTRAL TENDENCY'
|
||||
print 'geometricmean:',stats.geometricmean(l), stats.geometricmean(lf), stats.geometricmean(a), stats.geometricmean(af)
|
||||
print 'harmonicmean:',stats.harmonicmean(l), stats.harmonicmean(lf), stats.harmonicmean(a), stats.harmonicmean(af)
|
||||
print 'mean:',stats.mean(l), stats.mean(lf), stats.mean(a), stats.mean(af)
|
||||
print 'median:',stats.median(l),stats.median(lf),stats.median(a),stats.median(af)
|
||||
print 'medianscore:',stats.medianscore(l),stats.medianscore(lf),stats.medianscore(a),stats.medianscore(af)
|
||||
print 'mode:',stats.mode(l),stats.mode(a)
|
||||
|
||||
print '\nMOMENTS'
|
||||
print 'moment:',stats.moment(l),stats.moment(lf),stats.moment(a),stats.moment(af)
|
||||
print 'variation:',stats.variation(l),stats.variation(a),stats.variation(lf),stats.variation(af)
|
||||
print 'skew:',stats.skew(l),stats.skew(lf),stats.skew(a),stats.skew(af)
|
||||
print 'kurtosis:',stats.kurtosis(l),stats.kurtosis(lf),stats.kurtosis(a),stats.kurtosis(af)
|
||||
print 'mean:',stats.mean(a),stats.mean(af)
|
||||
print 'var:',stats.var(a),stats.var(af)
|
||||
print 'stdev:',stats.stdev(a),stats.stdev(af)
|
||||
print 'sem:',stats.sem(a),stats.sem(af)
|
||||
print 'describe:'
|
||||
print stats.describe(l)
|
||||
print stats.describe(lf)
|
||||
print stats.describe(a)
|
||||
print stats.describe(af)
|
||||
|
||||
print '\nFREQUENCY'
|
||||
print 'freqtable:'
|
||||
print 'itemfreq:'
|
||||
print stats.itemfreq(l)
|
||||
print stats.itemfreq(a)
|
||||
print 'scoreatpercentile:',stats.scoreatpercentile(l,40),stats.scoreatpercentile(lf,40),stats.scoreatpercentile(a,40),stats.scoreatpercentile(af,40)
|
||||
print 'percentileofscore:',stats.percentileofscore(l,12),stats.percentileofscore(lf,12),stats.percentileofscore(a,12),stats.percentileofscore(af,12)
|
||||
print 'histogram:',stats.histogram(l),stats.histogram(a)
|
||||
print 'cumfreq:'
|
||||
print stats.cumfreq(l)
|
||||
print stats.cumfreq(lf)
|
||||
print stats.cumfreq(a)
|
||||
print stats.cumfreq(af)
|
||||
print 'relfreq:'
|
||||
print stats.relfreq(l)
|
||||
print stats.relfreq(lf)
|
||||
print stats.relfreq(a)
|
||||
print stats.relfreq(af)
|
||||
|
||||
print '\nVARIATION'
|
||||
print 'obrientransform:'
|
||||
|
||||
print('\nCENTRAL TENDENCY')
|
||||
print('geometricmean:',stats.geometricmean(l), stats.geometricmean(lf), stats.geometricmean(a), stats.geometricmean(af))
|
||||
print('harmonicmean:',stats.harmonicmean(l), stats.harmonicmean(lf), stats.harmonicmean(a), stats.harmonicmean(af))
|
||||
print('mean:',stats.mean(l), stats.mean(lf), stats.mean(a), stats.mean(af))
|
||||
print('median:',stats.median(l),stats.median(lf),stats.median(a),stats.median(af))
|
||||
print('medianscore:',stats.medianscore(l),stats.medianscore(lf),stats.medianscore(a),stats.medianscore(af))
|
||||
print('mode:',stats.mode(l),stats.mode(a))
|
||||
print('\nMOMENTS')
|
||||
print('moment:',stats.moment(l),stats.moment(lf),stats.moment(a),stats.moment(af))
|
||||
print('variation:',stats.variation(l),stats.variation(a),stats.variation(lf),stats.variation(af))
|
||||
print('skew:',stats.skew(l),stats.skew(lf),stats.skew(a),stats.skew(af))
|
||||
print('kurtosis:',stats.kurtosis(l),stats.kurtosis(lf),stats.kurtosis(a),stats.kurtosis(af))
|
||||
print('mean:',stats.mean(a),stats.mean(af))
|
||||
print('var:',stats.var(a),stats.var(af))
|
||||
print('stdev:',stats.stdev(a),stats.stdev(af))
|
||||
print('sem:',stats.sem(a),stats.sem(af))
|
||||
print('describe:')
|
||||
print(stats.describe(l))
|
||||
print(stats.describe(lf))
|
||||
print(stats.describe(a))
|
||||
print(stats.describe(af))
|
||||
print('\nFREQUENCY')
|
||||
print('freqtable:')
|
||||
print('itemfreq:')
|
||||
print(stats.itemfreq(l))
|
||||
print(stats.itemfreq(a))
|
||||
print('scoreatpercentile:',stats.scoreatpercentile(l,40),stats.scoreatpercentile(lf,40),stats.scoreatpercentile(a,40),stats.scoreatpercentile(af,40))
|
||||
print('percentileofscore:',stats.percentileofscore(l,12),stats.percentileofscore(lf,12),stats.percentileofscore(a,12),stats.percentileofscore(af,12))
|
||||
print('histogram:',stats.histogram(l),stats.histogram(a))
|
||||
print('cumfreq:')
|
||||
print(stats.cumfreq(l))
|
||||
print(stats.cumfreq(lf))
|
||||
print(stats.cumfreq(a))
|
||||
print(stats.cumfreq(af))
|
||||
print('relfreq:')
|
||||
print(stats.relfreq(l))
|
||||
print(stats.relfreq(lf))
|
||||
print(stats.relfreq(a))
|
||||
print(stats.relfreq(af))
|
||||
print('\nVARIATION')
|
||||
print('obrientransform:')
|
||||
l = range(1,21)
|
||||
a = N.array(l)
|
||||
ll = [l]*5
|
||||
aa = N.array(ll)
|
||||
|
||||
print stats.obrientransform(l,l,l,l,l)
|
||||
print stats.obrientransform(a,a,a,a,a)
|
||||
|
||||
print 'samplevar:',stats.samplevar(l),stats.samplevar(a)
|
||||
print 'samplestdev:',stats.samplestdev(l),stats.samplestdev(a)
|
||||
print 'var:',stats.var(l),stats.var(a)
|
||||
print 'stdev:',stats.stdev(l),stats.stdev(a)
|
||||
print 'sterr:',stats.sterr(l),stats.sterr(a)
|
||||
print 'sem:',stats.sem(l),stats.sem(a)
|
||||
print 'z:',stats.z(l,4),stats.z(a,4)
|
||||
print 'zs:'
|
||||
print stats.zs(l)
|
||||
print stats.zs(a)
|
||||
|
||||
print '\nTRIMMING'
|
||||
print 'trimboth:'
|
||||
print stats.trimboth(l,.2)
|
||||
print stats.trimboth(lf,.2)
|
||||
print stats.trimboth(a,.2)
|
||||
print stats.trimboth(af,.2)
|
||||
print 'trim1:'
|
||||
print stats.trim1(l,.2)
|
||||
print stats.trim1(lf,.2)
|
||||
print stats.trim1(a,.2)
|
||||
print stats.trim1(af,.2)
|
||||
|
||||
print '\nCORRELATION'
|
||||
print(stats.obrientransform(l,l,l,l,l))
|
||||
print(stats.obrientransform(a,a,a,a,a))
|
||||
print('samplevar:',stats.samplevar(l),stats.samplevar(a))
|
||||
print('samplestdev:',stats.samplestdev(l),stats.samplestdev(a))
|
||||
print('var:',stats.var(l),stats.var(a))
|
||||
print('stdev:',stats.stdev(l),stats.stdev(a))
|
||||
print('sterr:',stats.sterr(l),stats.sterr(a))
|
||||
print('sem:',stats.sem(l),stats.sem(a))
|
||||
print('z:',stats.z(l,4),stats.z(a,4))
|
||||
print('zs:')
|
||||
print(stats.zs(l))
|
||||
print(stats.zs(a))
|
||||
print('\nTRIMMING')
|
||||
print('trimboth:')
|
||||
print(stats.trimboth(l,.2))
|
||||
print(stats.trimboth(lf,.2))
|
||||
print(stats.trimboth(a,.2))
|
||||
print(stats.trimboth(af,.2))
|
||||
print('trim1:')
|
||||
print(stats.trim1(l,.2))
|
||||
print(stats.trim1(lf,.2))
|
||||
print(stats.trim1(a,.2))
|
||||
print(stats.trim1(af,.2))
|
||||
print('\nCORRELATION')
|
||||
# execfile('testpairedstats.py')
|
||||
|
||||
l = range(1,21)
|
||||
|
@ -106,62 +99,58 @@ b = N.array(m)
|
|||
pb = [0]*9 + [1]*11
|
||||
apb = N.array(pb)
|
||||
|
||||
print 'paired:'
|
||||
print('paired:')
|
||||
# stats.paired(l,m)
|
||||
# stats.paired(a,b)
|
||||
|
||||
print
|
||||
print
|
||||
print 'pearsonr:'
|
||||
print stats.pearsonr(l,m)
|
||||
print stats.pearsonr(a,b)
|
||||
print 'spearmanr:'
|
||||
print stats.spearmanr(l,m)
|
||||
print stats.spearmanr(a,b)
|
||||
print 'pointbiserialr:'
|
||||
print stats.pointbiserialr(pb,l)
|
||||
print stats.pointbiserialr(apb,a)
|
||||
print 'kendalltau:'
|
||||
print stats.kendalltau(l,m)
|
||||
print stats.kendalltau(a,b)
|
||||
print 'linregress:'
|
||||
print stats.linregress(l,m)
|
||||
print stats.linregress(a,b)
|
||||
|
||||
print '\nINFERENTIAL'
|
||||
print 'ttest_1samp:'
|
||||
print stats.ttest_1samp(l,12)
|
||||
print stats.ttest_1samp(a,12)
|
||||
print 'ttest_ind:'
|
||||
print stats.ttest_ind(l,m)
|
||||
print stats.ttest_ind(a,b)
|
||||
print 'ttest_rel:'
|
||||
print stats.ttest_rel(l,m)
|
||||
print stats.ttest_rel(a,b)
|
||||
print 'chisquare:'
|
||||
print stats.chisquare(l)
|
||||
print stats.chisquare(a)
|
||||
print 'ks_2samp:'
|
||||
print stats.ks_2samp(l,m)
|
||||
print stats.ks_2samp(a,b)
|
||||
|
||||
print 'mannwhitneyu:'
|
||||
print stats.mannwhitneyu(l,m)
|
||||
print stats.mannwhitneyu(a,b)
|
||||
print 'ranksums:'
|
||||
print stats.ranksums(l,m)
|
||||
print stats.ranksums(a,b)
|
||||
print 'wilcoxont:'
|
||||
print stats.wilcoxont(l,m)
|
||||
print stats.wilcoxont(a,b)
|
||||
print 'kruskalwallish:'
|
||||
print stats.kruskalwallish(l,m,l)
|
||||
print len(l), len(m)
|
||||
print stats.kruskalwallish(a,b,a)
|
||||
print 'friedmanchisquare:'
|
||||
print stats.friedmanchisquare(l,m,l)
|
||||
print stats.friedmanchisquare(a,b,a)
|
||||
|
||||
print(print)
|
||||
print('pearsonr:')
|
||||
print(stats.pearsonr(l,m))
|
||||
print(stats.pearsonr(a,b))
|
||||
print('spearmanr:')
|
||||
print(stats.spearmanr(l,m))
|
||||
print(stats.spearmanr(a,b))
|
||||
print('pointbiserialr:')
|
||||
print(stats.pointbiserialr(pb,l))
|
||||
print(stats.pointbiserialr(apb,a))
|
||||
print('kendalltau:')
|
||||
print(stats.kendalltau(l,m))
|
||||
print(stats.kendalltau(a,b))
|
||||
print('linregress:')
|
||||
print(stats.linregress(l,m))
|
||||
print(stats.linregress(a,b))
|
||||
print('\nINFERENTIAL')
|
||||
print('ttest_1samp:')
|
||||
print(stats.ttest_1samp(l,12))
|
||||
print(stats.ttest_1samp(a,12))
|
||||
print('ttest_ind:')
|
||||
print(stats.ttest_ind(l,m))
|
||||
print(stats.ttest_ind(a,b))
|
||||
print('ttest_rel:')
|
||||
print(stats.ttest_rel(l,m))
|
||||
print(stats.ttest_rel(a,b))
|
||||
print('chisquare:')
|
||||
print(stats.chisquare(l))
|
||||
print(stats.chisquare(a))
|
||||
print('ks_2samp:')
|
||||
print(stats.ks_2samp(l,m))
|
||||
print(stats.ks_2samp(a,b))
|
||||
print('mannwhitneyu:')
|
||||
print(stats.mannwhitneyu(l,m))
|
||||
print(stats.mannwhitneyu(a,b))
|
||||
print('ranksums:')
|
||||
print(stats.ranksums(l,m))
|
||||
print(stats.ranksums(a,b))
|
||||
print('wilcoxont:')
|
||||
print(stats.wilcoxont(l,m))
|
||||
print(stats.wilcoxont(a,b))
|
||||
print('kruskalwallish:')
|
||||
print(stats.kruskalwallish(l,m,l))
|
||||
print(len(l), len(m))
|
||||
print(stats.kruskalwallish(a,b,a))
|
||||
print('friedmanchisquare:')
|
||||
print(stats.friedmanchisquare(l,m,l))
|
||||
print(stats.friedmanchisquare(a,b,a))
|
||||
l = range(1,21)
|
||||
a = N.array(l)
|
||||
ll = [l]*5
|
||||
|
@ -171,29 +160,29 @@ m = range(4,24)
|
|||
m[10] = 34
|
||||
b = N.array(m)
|
||||
|
||||
print '\n\nF_oneway:'
|
||||
print stats.F_oneway(l,m)
|
||||
print stats.F_oneway(a,b)
|
||||
print('\n\nF_oneway:')
|
||||
print(stats.F_oneway(l,m))
|
||||
print(stats.F_oneway(a,b))
|
||||
# print 'F_value:',stats.F_value(l),stats.F_value(a)
|
||||
|
||||
print '\nSUPPORT'
|
||||
print 'sum:',stats.sum(l),stats.sum(lf),stats.sum(a),stats.sum(af)
|
||||
print 'cumsum:'
|
||||
print stats.cumsum(l)
|
||||
print stats.cumsum(lf)
|
||||
print stats.cumsum(a)
|
||||
print stats.cumsum(af)
|
||||
print 'ss:',stats.ss(l),stats.ss(lf),stats.ss(a),stats.ss(af)
|
||||
print 'summult:',stats.summult(l,m),stats.summult(lf,m),stats.summult(a,b),stats.summult(af,b)
|
||||
print 'sumsquared:',stats.square_of_sums(l),stats.square_of_sums(lf),stats.square_of_sums(a),stats.square_of_sums(af)
|
||||
print 'sumdiffsquared:',stats.sumdiffsquared(l,m),stats.sumdiffsquared(lf,m),stats.sumdiffsquared(a,b),stats.sumdiffsquared(af,b)
|
||||
print 'shellsort:'
|
||||
print stats.shellsort(m)
|
||||
print stats.shellsort(b)
|
||||
print 'rankdata:'
|
||||
print stats.rankdata(m)
|
||||
print stats.rankdata(b)
|
||||
print('\nSUPPORT')
|
||||
print('sum:',stats.sum(l),stats.sum(lf),stats.sum(a),stats.sum(af))
|
||||
print('cumsum:')
|
||||
print(stats.cumsum(l))
|
||||
print(stats.cumsum(lf))
|
||||
print(stats.cumsum(a))
|
||||
print(stats.cumsum(af))
|
||||
print('ss:',stats.ss(l),stats.ss(lf),stats.ss(a),stats.ss(af))
|
||||
print('summult:',stats.summult(l,m),stats.summult(lf,m),stats.summult(a,b),stats.summult(af,b))
|
||||
print('sumsquared:',stats.square_of_sums(l),stats.square_of_sums(lf),stats.square_of_sums(a),stats.square_of_sums(af))
|
||||
print('sumdiffsquared:',stats.sumdiffsquared(l,m),stats.sumdiffsquared(lf,m),stats.sumdiffsquared(a,b),stats.sumdiffsquared(af,b))
|
||||
print('shellsort:')
|
||||
print(stats.shellsort(m))
|
||||
print(stats.shellsort(b))
|
||||
print('rankdata:')
|
||||
print(stats.rankdata(m))
|
||||
print(stats.rankdata(b))
|
||||
print('\nANOVAs')
|
||||
|
||||
print '\nANOVAs'
|
||||
execfile('testanova.py')
|
||||
|
||||
|
|
|
@ -159,7 +159,7 @@ def assertAlmostEqualValue(test, expected, digits=None, places=None, msg=None, d
|
|||
|
||||
if not Math.is_number(expected):
|
||||
# SOME SPECIAL CASES, EXPECTING EMPTY CONTAINERS IS THE SAME AS EXPECTING NULL
|
||||
if isinstance(expected, list) and len(expected)==0 and test == None:
|
||||
if isinstance(expected, list) and len(expected) == 0 and test == None:
|
||||
return
|
||||
if isinstance(expected, Mapping) and not expected.keys() and test == None:
|
||||
return
|
||||
|
|
|
@ -171,37 +171,68 @@ class Date(object):
|
|||
|
||||
def __lt__(self, other):
|
||||
try:
|
||||
if other == None:
|
||||
return False
|
||||
elif isinstance(other, Date):
|
||||
return self.unix < other.unix
|
||||
elif isinstance(other, (float, int)):
|
||||
return self.unix < other
|
||||
other = Date(other)
|
||||
return self.unix < other.unix
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
return self.unix < other.unix
|
||||
|
||||
def __eq__(self, other):
|
||||
if other == None or other == '':
|
||||
return Null
|
||||
|
||||
try:
|
||||
return other.unix == self.unix
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
return Date(other).unix == self.unix
|
||||
if other == None:
|
||||
return False
|
||||
elif isinstance(other, Date):
|
||||
return self.unix == other.unix
|
||||
elif isinstance(other, (float, int)):
|
||||
return self.unix == other
|
||||
other = Date(other)
|
||||
return self.unix == other.unix
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
def __le__(self, other):
|
||||
other = Date(other)
|
||||
return self.unix <= other.unix
|
||||
try:
|
||||
if other == None:
|
||||
return False
|
||||
elif isinstance(other, Date):
|
||||
return self.unix <= other.unix
|
||||
elif isinstance(other, (float, int)):
|
||||
return self.unix <= other
|
||||
other = Date(other)
|
||||
return self.unix <= other.unix
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
def __gt__(self, other):
|
||||
other = Date(other)
|
||||
return self.unix > other.unix
|
||||
try:
|
||||
if other == None:
|
||||
return False
|
||||
elif isinstance(other, Date):
|
||||
return self.unix > other.unix
|
||||
elif isinstance(other, (float, int)):
|
||||
return self.unix > other
|
||||
other = Date(other)
|
||||
return self.unix > other.unix
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
def __ge__(self, other):
|
||||
other = Date(other)
|
||||
return self.unix >= other.unix
|
||||
try:
|
||||
if other == None:
|
||||
return False
|
||||
elif isinstance(other, Date):
|
||||
return self.unix >= other.unix
|
||||
elif isinstance(other, (float, int)):
|
||||
return self.unix >= other
|
||||
other = Date(other)
|
||||
return self.unix >= other.unix
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
def __add__(self, other):
|
||||
return self.add(other)
|
||||
|
|
|
@ -32,20 +32,19 @@ class Timer(object):
|
|||
debug - SET TO False TO DISABLE THIS TIMER
|
||||
"""
|
||||
|
||||
def __init__(self, description, param=None, debug=True, silent=False):
|
||||
def __init__(self, description, param=None, silent=False, too_long=0):
|
||||
self.template = description
|
||||
self.param = wrap(coalesce(param, {}))
|
||||
self.debug = debug
|
||||
self.silent = silent
|
||||
self.agg = 0
|
||||
self.too_long = too_long # ONLY SHOW TIMING FOR DURATIONS THAT ARE too_long
|
||||
self.start = 0
|
||||
self.end = 0
|
||||
self.interval = None
|
||||
|
||||
def __enter__(self):
|
||||
if self.debug:
|
||||
if not self.silent:
|
||||
Log.note("Timer start: " + self.template, stack_depth=1, **self.param)
|
||||
if not self.silent and self.too_long == 0:
|
||||
Log.note("Timer start: " + self.template, stack_depth=1, **self.param)
|
||||
self.start = time()
|
||||
return self
|
||||
|
||||
|
@ -53,12 +52,12 @@ class Timer(object):
|
|||
self.end = time()
|
||||
self.interval = self.end - self.start
|
||||
self.agg += self.interval
|
||||
|
||||
if self.debug:
|
||||
param = wrap(self.param)
|
||||
param.duration = timedelta(seconds=self.interval)
|
||||
if not self.silent:
|
||||
Log.note("Timer end : " + self.template + " (took {{duration}})", self.param, stack_depth=1)
|
||||
self.param.duration = timedelta(seconds=self.interval)
|
||||
if not self.silent:
|
||||
if self.too_long == 0:
|
||||
Log.note("Timer end : " + self.template + " (took {{duration}})", default_params=self.param, stack_depth=1)
|
||||
elif self.interval >= self.too_long:
|
||||
Log.note("Time too long: " + self.template + " ({{duration}})", default_params=self.param, stack_depth=1)
|
||||
|
||||
@property
|
||||
def duration(self):
|
||||
|
|
|
@ -20,11 +20,11 @@ from boto.s3.connection import Location
|
|||
from bs4 import BeautifulSoup
|
||||
|
||||
from mo_dots import wrap, Null, coalesce, unwrap, Data
|
||||
from mo_files.url import value2url_param
|
||||
from mo_future import text_type, StringIO
|
||||
from mo_kwargs import override
|
||||
from mo_logs import Log, Except
|
||||
from mo_logs.strings import utf82unicode, unicode2utf8
|
||||
from mo_logs.url import value2url_param
|
||||
from mo_times.dates import Date
|
||||
from mo_times.timer import Timer
|
||||
from pyLibrary import convert
|
||||
|
@ -362,7 +362,7 @@ class Bucket(object):
|
|||
retry = 3
|
||||
while retry:
|
||||
try:
|
||||
with Timer("Sending {{count}} lines in {{file_length|comma}} bytes", {"file_length": file_length, "count": count}, debug=self.settings.debug):
|
||||
with Timer("Sending {{count}} lines in {{file_length|comma}} bytes", {"file_length": file_length, "count": count}, silent=not self.settings.debug):
|
||||
buff.seek(0)
|
||||
storage.set_contents_from_file(buff)
|
||||
break
|
||||
|
|
|
@ -390,7 +390,7 @@ def value2intlist(value):
|
|||
elif isinstance(value, int):
|
||||
return [value]
|
||||
elif value.strip() == "":
|
||||
return None
|
||||
return []
|
||||
else:
|
||||
return [int(value)]
|
||||
|
||||
|
|
|
@ -32,6 +32,7 @@ DEBUG = False
|
|||
MIN_READ_SIZE = 8 * 1024
|
||||
MAX_STRING_SIZE = 1 * 1024 * 1024
|
||||
|
||||
|
||||
class FileString(text_type):
|
||||
"""
|
||||
ACTS LIKE A STRING, BUT IS A FILE
|
||||
|
|
|
@ -18,8 +18,9 @@ from copy import deepcopy
|
|||
from jx_python import jx
|
||||
from jx_python.expressions import jx_expression_to_function
|
||||
from jx_python.meta import Column
|
||||
from mo_dots import wrap, FlatList, coalesce, Null, Data, set_default, listwrap, literal_field, ROOT_PATH, concat_field, split_field
|
||||
from mo_future import text_type, binary_type
|
||||
from mo_dots import wrap, FlatList, coalesce, Null, Data, set_default, listwrap, literal_field, ROOT_PATH, concat_field, split_field, SLOT
|
||||
from mo_files.url import URL
|
||||
from mo_future import text_type, binary_type, items
|
||||
from mo_json import value2json, json2value
|
||||
from mo_json.typed_encoder import EXISTS_TYPE, BOOLEAN_TYPE, STRING_TYPE, NUMBER_TYPE, NESTED_TYPE, TYPE_PREFIX, json_type_to_inserter_type
|
||||
from mo_kwargs import override
|
||||
|
@ -111,8 +112,7 @@ class Index(Features):
|
|||
# EXPLORING (get_metadata()) IS NOT ALLOWED ON THE PUBLIC CLUSTER
|
||||
Log.error("not expected", cause=e)
|
||||
|
||||
if self.debug:
|
||||
Log.alert("elasticsearch debugging for {{url}} is on", url=self.url)
|
||||
self.debug and Log.alert("elasticsearch debugging for {{url}} is on", url=self.url)
|
||||
|
||||
props = self.get_properties()
|
||||
if not props:
|
||||
|
@ -138,7 +138,7 @@ class Index(Features):
|
|||
|
||||
@property
|
||||
def url(self):
|
||||
return self.cluster.path.rstrip("/") + "/" + self.path.lstrip("/")
|
||||
return self.cluster.url / self.path
|
||||
|
||||
def get_properties(self, retry=True):
|
||||
if self.settings.explore_metadata:
|
||||
|
@ -228,12 +228,13 @@ class Index(Features):
|
|||
self.cluster.post("/" + self.settings.index + "/_refresh")
|
||||
|
||||
def delete_record(self, filter):
|
||||
filter = wrap(filter)
|
||||
|
||||
if self.settings.read_only:
|
||||
Log.error("Index opened in read only mode, no changes allowed")
|
||||
self.cluster.get_metadata()
|
||||
|
||||
if self.debug:
|
||||
Log.note("Delete bugs:\n{{query}}", query=filter)
|
||||
self.debug and Log.note("Delete bugs:\n{{query}}", query=filter)
|
||||
|
||||
if self.cluster.info.version.number.startswith("0.90"):
|
||||
query = {"filtered": {
|
||||
|
@ -269,6 +270,8 @@ class Index(Features):
|
|||
|
||||
elif self.cluster.info.version.number.startswith(("5.", "6.")):
|
||||
query = {"query": filter}
|
||||
if filter.terms.bug_id['~n~'] != None:
|
||||
Log.warning("filter is not typed")
|
||||
|
||||
wait_for_active_shards = coalesce( # EARLIER VERSIONS USED "consistency" AS A PARAMETER
|
||||
self.settings.wait_for_active_shards,
|
||||
|
@ -310,7 +313,7 @@ class Index(Features):
|
|||
if not lines:
|
||||
return
|
||||
|
||||
with Timer("Add {{num}} documents to {{index}}", {"num": len(lines) / 2, "index":self.settings.index}, debug=self.debug):
|
||||
with Timer("Add {{num}} documents to {{index}}", {"num": int(len(lines) / 2), "index": self.settings.index}, silent=not self.debug):
|
||||
try:
|
||||
data_string = "\n".join(l for l in lines) + "\n"
|
||||
except Exception as e:
|
||||
|
@ -388,8 +391,7 @@ class Index(Features):
|
|||
self.extend([record])
|
||||
|
||||
def add_property(self, name, details):
|
||||
if self.debug:
|
||||
Log.note("Adding property {{prop}} to {{index}}", prop=name, index=self.settings.index)
|
||||
self.debug and Log.note("Adding property {{prop}} to {{index}}", prop=name, index=self.settings.index)
|
||||
for n in jx.reverse(split_field(name)):
|
||||
if n == NESTED_TYPE:
|
||||
details = {"properties": {n: set_default(details, {"type": "nested", "dynamic": True})}}
|
||||
|
@ -500,7 +502,6 @@ class Index(Features):
|
|||
)
|
||||
|
||||
|
||||
|
||||
HOPELESS = [
|
||||
"Document contains at least one immense term",
|
||||
"400 MapperParsingException",
|
||||
|
@ -509,17 +510,16 @@ HOPELESS = [
|
|||
"JsonParseException"
|
||||
]
|
||||
|
||||
known_clusters = {} # MAP FROM (host, port) PAIR TO CLUSTER INSTANCE
|
||||
|
||||
|
||||
known_clusters = {}
|
||||
|
||||
class Cluster(object):
|
||||
|
||||
@override
|
||||
def __new__(cls, host, port=9200, kwargs=None):
|
||||
if not isinstance(port, int):
|
||||
if not Math.is_integer(port):
|
||||
Log.error("port must be integer")
|
||||
cluster = known_clusters.get((host, port))
|
||||
cluster = known_clusters.get((host, int(port)))
|
||||
if cluster:
|
||||
return cluster
|
||||
|
||||
|
@ -544,7 +544,7 @@ class Cluster(object):
|
|||
self.metatdata_last_updated = Date.now()
|
||||
self.debug = debug
|
||||
self._version = None
|
||||
self.path = kwargs.host + ":" + text_type(kwargs.port)
|
||||
self.url = URL(host, port=port)
|
||||
|
||||
@override
|
||||
def get_or_create_index(
|
||||
|
@ -726,7 +726,7 @@ class Cluster(object):
|
|||
elif isinstance(schema, text_type):
|
||||
Log.error("Expecting a JSON schema")
|
||||
|
||||
for k, m in list(schema.mappings.items()):
|
||||
for k, m in items(schema.mappings):
|
||||
m.date_detection = False # DISABLE DATE DETECTION
|
||||
|
||||
if typed:
|
||||
|
@ -737,7 +737,8 @@ class Cluster(object):
|
|||
DEFAULT_DYNAMIC_TEMPLATES +
|
||||
m.dynamic_templates
|
||||
)
|
||||
|
||||
if self.version.startswith("6."):
|
||||
m.dynamic_templates = [t for t in m.dynamic_templates if "default_integer" not in t]
|
||||
if self.version.startswith("5."):
|
||||
schema.settings.index.max_inner_result_window = None # NOT ACCEPTED BY ES5
|
||||
schema = json2value(value2json(schema), leaves=True)
|
||||
|
@ -785,8 +786,7 @@ class Cluster(object):
|
|||
if not isinstance(index_name, text_type):
|
||||
Log.error("expecting an index name")
|
||||
|
||||
if self.debug:
|
||||
Log.note("Deleting index {{index}}", index=index_name)
|
||||
self.debug and Log.note("Deleting index {{index}}", index=index_name)
|
||||
|
||||
# REMOVE ALL ALIASES TOO
|
||||
aliases = [a for a in self.get_aliases() if a.index == index_name and a.alias != None]
|
||||
|
@ -802,8 +802,7 @@ class Cluster(object):
|
|||
if response.status_code != 200:
|
||||
Log.error("Expecting a 200, got {{code}}", code=response.status_code)
|
||||
details = json2value(utf82unicode(response.content))
|
||||
if self.debug:
|
||||
Log.note("delete response {{response}}", response=details)
|
||||
self.debug and Log.note("delete response {{response}}", response=details)
|
||||
return response
|
||||
except Exception as e:
|
||||
Log.error("Problem with call to {{url}}", url=url, cause=e)
|
||||
|
@ -861,7 +860,7 @@ class Cluster(object):
|
|||
return self._version
|
||||
|
||||
def post(self, path, **kwargs):
|
||||
url = self.settings.host + ":" + text_type(self.settings.port) + path
|
||||
url = self.url / path # self.settings.host + ":" + text_type(self.settings.port) + path
|
||||
|
||||
try:
|
||||
heads = wrap(kwargs).headers
|
||||
|
@ -872,23 +871,26 @@ class Cluster(object):
|
|||
if data == None:
|
||||
pass
|
||||
elif isinstance(data, Mapping):
|
||||
kwargs[DATA_KEY] = unicode2utf8(value2json(data))
|
||||
data = kwargs[DATA_KEY] = unicode2utf8(value2json(data))
|
||||
elif isinstance(data, text_type):
|
||||
kwargs[DATA_KEY] = unicode2utf8(data)
|
||||
data = kwargs[DATA_KEY] = unicode2utf8(data)
|
||||
elif hasattr(data, str("__iter__")):
|
||||
pass # ASSUME THIS IS AN ITERATOR OVER BYTES
|
||||
else:
|
||||
Log.error("data must be utf8 encoded string")
|
||||
|
||||
if self.debug:
|
||||
sample = kwargs.get(DATA_KEY, b"")[:300]
|
||||
Log.note("{{url}}:\n{{data|indent}}", url=url, data=sample)
|
||||
if isinstance(data, binary_type):
|
||||
sample = kwargs.get(DATA_KEY, b"")[:300]
|
||||
Log.note("{{url}}:\n{{data|indent}}", url=url, data=sample)
|
||||
else:
|
||||
Log.note("{{url}}:\n\t<stream>", url=url)
|
||||
|
||||
if self.debug:
|
||||
Log.note("POST {{url}}", url=url)
|
||||
self.debug and Log.note("POST {{url}}", url=url)
|
||||
response = http.post(url, **kwargs)
|
||||
if response.status_code not in [200, 201]:
|
||||
Log.error(text_type(response.reason) + ": " + strings.limit(response.content.decode("latin1"), 100 if self.debug else 10000))
|
||||
if self.debug:
|
||||
Log.note("response: {{response}}", response=utf82unicode(response.content)[:130])
|
||||
self.debug and Log.note("response: {{response}}", response=utf82unicode(response.content)[:130])
|
||||
details = json2value(utf82unicode(response.content))
|
||||
if details.error:
|
||||
Log.error(convert.quote2string(details.error))
|
||||
|
@ -900,7 +902,7 @@ class Cluster(object):
|
|||
return details
|
||||
except Exception as e:
|
||||
e = Except.wrap(e)
|
||||
if url[0:4] != "http":
|
||||
if url.scheme != "http":
|
||||
suggestion = " (did you forget \"http://\" prefix on the host name?)"
|
||||
else:
|
||||
suggestion = ""
|
||||
|
@ -909,7 +911,7 @@ class Cluster(object):
|
|||
Log.error(
|
||||
"Problem with call to {{url}}" + suggestion + "\n{{body|left(10000)}}",
|
||||
url=url,
|
||||
body=strings.limit(kwargs[DATA_KEY], 100 if self.debug else 10000),
|
||||
body=strings.limit(utf82unicode(kwargs[DATA_KEY]), 100 if self.debug else 10000),
|
||||
cause=e
|
||||
)
|
||||
else:
|
||||
|
@ -921,8 +923,7 @@ class Cluster(object):
|
|||
response = http.delete(url, **kwargs)
|
||||
if response.status_code not in [200]:
|
||||
Log.error(response.reason+": "+response.all_content)
|
||||
if self.debug:
|
||||
Log.note("response: {{response}}", response=strings.limit(utf82unicode(response.all_content), 130))
|
||||
self.debug and Log.note("response: {{response}}", response=strings.limit(utf82unicode(response.all_content), 130))
|
||||
details = wrap(json2value(utf82unicode(response.all_content)))
|
||||
if details.error:
|
||||
Log.error(details.error)
|
||||
|
@ -933,13 +934,11 @@ class Cluster(object):
|
|||
def get(self, path, **kwargs):
|
||||
url = self.settings.host + ":" + text_type(self.settings.port) + path
|
||||
try:
|
||||
if self.debug:
|
||||
Log.note("GET {{url}}", url=url)
|
||||
self.debug and Log.note("GET {{url}}", url=url)
|
||||
response = http.get(url, **kwargs)
|
||||
if response.status_code not in [200]:
|
||||
Log.error(response.reason + ": " + response.all_content)
|
||||
if self.debug:
|
||||
Log.note("response: {{response}}", response=strings.limit(utf82unicode(response.all_content), 130))
|
||||
self.debug and Log.note("response: {{response}}", response=strings.limit(utf82unicode(response.all_content), 130))
|
||||
details = wrap(json2value(utf82unicode(response.all_content)))
|
||||
if details.error:
|
||||
Log.error(details.error)
|
||||
|
@ -953,8 +952,7 @@ class Cluster(object):
|
|||
response = http.head(url, **kwargs)
|
||||
if response.status_code not in [200]:
|
||||
Log.error(response.reason+": "+response.all_content)
|
||||
if self.debug:
|
||||
Log.note("response: {{response}}", response=strings.limit(utf82unicode(response.all_content), 130))
|
||||
self.debug and Log.note("response: {{response}}", response=strings.limit(utf82unicode(response.all_content), 130))
|
||||
if response.all_content:
|
||||
details = wrap(json2value(utf82unicode(response.all_content)))
|
||||
if details.error:
|
||||
|
@ -988,9 +986,8 @@ class Cluster(object):
|
|||
try:
|
||||
response = http.put(url, **kwargs)
|
||||
if response.status_code not in [200]:
|
||||
Log.error(response.reason + ": " + utf82unicode(response.all_content))
|
||||
if self.debug:
|
||||
Log.note("response: {{response}}", response=utf82unicode(response.all_content)[0:300:])
|
||||
Log.error(response.reason + ": " + utf82unicode(response.content))
|
||||
self.debug and Log.note("response: {{response}}", response=utf82unicode(response.content)[0:300:])
|
||||
|
||||
details = json2value(utf82unicode(response.content))
|
||||
if details.error:
|
||||
|
@ -1038,7 +1035,7 @@ def _scrub(r):
|
|||
return convert.value2number(r)
|
||||
elif isinstance(r, Mapping):
|
||||
if isinstance(r, Data):
|
||||
r = object.__getattribute__(r, "_dict")
|
||||
r = object.__getattribute__(r, SLOT)
|
||||
output = {}
|
||||
for k, v in r.items():
|
||||
v = _scrub(v)
|
||||
|
@ -1079,8 +1076,7 @@ class Alias(Features):
|
|||
kwargs=None
|
||||
):
|
||||
self.debug = debug
|
||||
if self.debug:
|
||||
Log.alert("Elasticsearch debugging on {{index|quote}} is on", index= kwargs.index)
|
||||
self.debug and Log.alert("Elasticsearch debugging on {{index|quote}} is on", index= kwargs.index)
|
||||
if alias == None:
|
||||
Log.error("Alias can not be None")
|
||||
self.settings = kwargs
|
||||
|
@ -1112,7 +1108,7 @@ class Alias(Features):
|
|||
|
||||
@property
|
||||
def url(self):
|
||||
return self.cluster.path.rstrip("/") + "/" + self.path.lstrip("/")
|
||||
return self.cluster.url / self.path
|
||||
|
||||
def get_snowflake(self, retry=True):
|
||||
if self.settings.explore_metadata:
|
||||
|
@ -1172,8 +1168,7 @@ class Alias(Features):
|
|||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
if self.debug:
|
||||
Log.note("Delete documents:\n{{query}}", query=query)
|
||||
self.debug and Log.note("Delete documents:\n{{query}}", query=query)
|
||||
|
||||
keep_trying = True
|
||||
while keep_trying:
|
||||
|
@ -1269,8 +1264,15 @@ def parse_properties(parent_index_name, parent_name, esProperties):
|
|||
continue
|
||||
if not property.type:
|
||||
continue
|
||||
|
||||
|
||||
cardinality = 0 if not property.store and not name != '_id' else None
|
||||
|
||||
if property.fields:
|
||||
child_columns = parse_properties(index_name, column_name, property.fields)
|
||||
if cardinality is None:
|
||||
for cc in child_columns:
|
||||
cc.cardinality = None
|
||||
columns.extend(child_columns)
|
||||
|
||||
if property.type in es_type_to_json_type.keys():
|
||||
|
@ -1279,6 +1281,7 @@ def parse_properties(parent_index_name, parent_name, esProperties):
|
|||
es_column=column_name,
|
||||
names={".": jx_name},
|
||||
nested_path=ROOT_PATH,
|
||||
cardinality=cardinality,
|
||||
es_type=property.type
|
||||
))
|
||||
if property.index_name and name != property.index_name:
|
||||
|
@ -1287,6 +1290,7 @@ def parse_properties(parent_index_name, parent_name, esProperties):
|
|||
es_column=column_name,
|
||||
names={".": jx_name},
|
||||
nested_path=ROOT_PATH,
|
||||
cardinality=0 if property.store else None,
|
||||
es_type=property.type
|
||||
))
|
||||
elif property.enabled == None or property.enabled == False:
|
||||
|
@ -1295,10 +1299,11 @@ def parse_properties(parent_index_name, parent_name, esProperties):
|
|||
es_column=column_name,
|
||||
names={".": jx_name},
|
||||
nested_path=ROOT_PATH,
|
||||
cardinality=0 if property.store else None,
|
||||
es_type="source" if property.enabled == False else "object"
|
||||
))
|
||||
else:
|
||||
Log.warning("unknown type {{type}} for property {{path}}", type=property.type, path=query_path)
|
||||
Log.warning("unknown type {{type}} for property {{path}}", type=property.type, path=parent_name)
|
||||
|
||||
return columns
|
||||
|
||||
|
@ -1482,6 +1487,8 @@ def diff_schema(A, B):
|
|||
output =[]
|
||||
def _diff_schema(path, A, B):
|
||||
for k, av in A.items():
|
||||
if k == "_id" and path == ".":
|
||||
continue # DO NOT ADD _id TO ANY SCHEMA DIFF
|
||||
bv = B[k]
|
||||
if bv == None:
|
||||
output.append((concat_field(path, k), av))
|
||||
|
@ -1534,6 +1541,24 @@ DEFAULT_DYNAMIC_TEMPLATES = wrap([
|
|||
"mapping": {"type": "keyword", "store": True},
|
||||
"match_mapping_type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"default_long": {
|
||||
"mapping": {"type": "long", "store": True},
|
||||
"match_mapping_type": "long"
|
||||
}
|
||||
},
|
||||
{
|
||||
"default_double": {
|
||||
"mapping": {"type": "double", "store": True},
|
||||
"match_mapping_type": "double"
|
||||
}
|
||||
},
|
||||
{
|
||||
"default_integer": {
|
||||
"mapping": {"type": "integer", "store": True},
|
||||
"match_mapping_type": "integer"
|
||||
}
|
||||
}
|
||||
])
|
||||
|
||||
|
|
|
@ -12,9 +12,12 @@ from __future__ import unicode_literals
|
|||
|
||||
import flask
|
||||
from flask import Response
|
||||
from mo_dots import coalesce
|
||||
|
||||
from mo_future import binary_type
|
||||
from mo_dots import coalesce
|
||||
from mo_files import File
|
||||
from mo_json import value2json
|
||||
from mo_logs import Log
|
||||
from mo_logs.strings import unicode2utf8
|
||||
from pyLibrary.env.big_data import ibytes2icompressed
|
||||
|
||||
TOO_SMALL_TO_COMPRESS = 510 # DO NOT COMPRESS DATA WITH LESS THAN THIS NUMBER OF BYTES
|
||||
|
@ -63,4 +66,53 @@ def cors_wrapper(func):
|
|||
return output
|
||||
|
||||
|
||||
def dockerflow(flask_app, backend_check):
|
||||
"""
|
||||
ADD ROUTING TO HANDLE DOCKERFLOW APP REQUIREMENTS
|
||||
(see https://github.com/mozilla-services/Dockerflow#containerized-app-requirements)
|
||||
:param flask_app: THE (Flask) APP
|
||||
:param backend_check: METHOD THAT WILL CHECK THE BACKEND IS WORKING AND RAISE AN EXCEPTION IF NOT
|
||||
:return:
|
||||
"""
|
||||
global VERSION_JSON
|
||||
|
||||
try:
|
||||
VERSION_JSON = File("version.json").read_bytes()
|
||||
|
||||
@cors_wrapper
|
||||
def version():
|
||||
return Response(
|
||||
VERSION_JSON,
|
||||
status=200,
|
||||
headers={
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
)
|
||||
|
||||
@cors_wrapper
|
||||
def heartbeat():
|
||||
try:
|
||||
backend_check()
|
||||
return Response(status=200)
|
||||
except Exception as e:
|
||||
Log.warning("heartbeat failure", cause=e)
|
||||
return Response(
|
||||
unicode2utf8(value2json(e)),
|
||||
status=500,
|
||||
headers={
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
)
|
||||
|
||||
@cors_wrapper
|
||||
def lbheartbeat():
|
||||
return Response(status=200)
|
||||
|
||||
flask_app.add_url_rule(str('/__version__'), None, version, defaults={}, methods=[str('GET'), str('POST')])
|
||||
flask_app.add_url_rule(str('/__heartbeat__'), None, heartbeat, defaults={}, methods=[str('GET'), str('POST')])
|
||||
flask_app.add_url_rule(str('/__lbheartbeat__'), None, lbheartbeat, defaults={}, methods=[str('GET'), str('POST')])
|
||||
except Exception as e:
|
||||
Log.error("Problem setting up listeners for dockerflow", cause=e)
|
||||
|
||||
|
||||
VERSION_JSON = None
|
||||
|
|
|
@ -8,18 +8,17 @@
|
|||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import division
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import mo_threads
|
||||
from mo_logs.exceptions import suppress_exception
|
||||
from pyLibrary.meta import cache
|
||||
from mo_threads import Process
|
||||
from pyLibrary.meta import cache
|
||||
|
||||
|
||||
@cache
|
||||
def get_git_revision():
|
||||
def get_revision():
|
||||
"""
|
||||
GET THE CURRENT GIT REVISION
|
||||
"""
|
||||
|
@ -36,13 +35,12 @@ def get_git_revision():
|
|||
with suppress_exception:
|
||||
proc.join()
|
||||
|
||||
|
||||
@cache
|
||||
def get_remote_revision(url, branch):
|
||||
"""
|
||||
GET REVISION OF A REMOTE BRANCH
|
||||
"""
|
||||
|
||||
mo_threads.DEBUG = True
|
||||
proc = Process("git remote revision", ["git", "ls-remote", url, "refs/heads/" + branch])
|
||||
|
||||
try:
|
||||
|
@ -58,5 +56,22 @@ def get_remote_revision(url, branch):
|
|||
except Exception:
|
||||
pass
|
||||
|
||||
return None
|
||||
|
||||
@cache
|
||||
def get_branch():
|
||||
"""
|
||||
GET THE CURRENT GIT BRANCH
|
||||
"""
|
||||
proc = Process("git status", ["git", "status"])
|
||||
|
||||
try:
|
||||
while True:
|
||||
raw_line = proc.stdout.pop()
|
||||
line = raw_line.decode('utf8').strip()
|
||||
if line.startswith("On branch "):
|
||||
return line[10:]
|
||||
finally:
|
||||
try:
|
||||
proc.join()
|
||||
except Exception:
|
||||
pass
|
||||
|
|
|
@ -29,6 +29,7 @@ from requests import sessions, Response
|
|||
|
||||
from jx_python import jx
|
||||
from mo_dots import Data, coalesce, wrap, set_default, unwrap, Null
|
||||
from mo_files.url import URL
|
||||
from mo_future import text_type, PY2
|
||||
from mo_json import value2json, json2value
|
||||
from mo_logs import Log
|
||||
|
@ -54,14 +55,13 @@ DEFAULTS = {
|
|||
"verify": True,
|
||||
"timeout": 600,
|
||||
"zip": False,
|
||||
"retry": {"times": 1, "sleep": 0}
|
||||
"retry": {"times": 1, "sleep": 0, "http": False}
|
||||
}
|
||||
|
||||
_warning_sent = False
|
||||
request_count = 0
|
||||
|
||||
|
||||
def request(method, url, zip=None, retry=None, **kwargs):
|
||||
def request(method, url, headers=None, zip=None, retry=None, **kwargs):
|
||||
"""
|
||||
JUST LIKE requests.request() BUT WITH DEFAULT HEADERS AND FIXES
|
||||
DEMANDS data IS ONE OF:
|
||||
|
@ -81,14 +81,14 @@ def request(method, url, zip=None, retry=None, **kwargs):
|
|||
global _warning_sent
|
||||
global request_count
|
||||
|
||||
if not default_headers and not _warning_sent:
|
||||
_warning_sent = True
|
||||
if not _warning_sent and not default_headers:
|
||||
Log.warning(text_type(
|
||||
"The pyLibrary.env.http module was meant to add extra " +
|
||||
"default headers to all requests, specifically the 'Referer' " +
|
||||
"header with a URL to the project. Use the `pyLibrary.debug.constants.set()` " +
|
||||
"function to set `pyLibrary.env.http.default_headers`"
|
||||
))
|
||||
_warning_sent = True
|
||||
|
||||
if isinstance(url, list):
|
||||
# TRY MANY URLS
|
||||
|
@ -111,42 +111,42 @@ def request(method, url, zip=None, retry=None, **kwargs):
|
|||
sess = Null
|
||||
else:
|
||||
sess = session = sessions.Session()
|
||||
session.headers.update(default_headers)
|
||||
|
||||
with closing(sess):
|
||||
if PY2 and isinstance(url, text_type):
|
||||
# httplib.py WILL **FREAK OUT** IF IT SEES ANY UNICODE
|
||||
url = url.encode('ascii')
|
||||
|
||||
if retry == None:
|
||||
retry = Data(times=1, sleep=0)
|
||||
elif isinstance(retry, Number):
|
||||
retry = Data(times=retry, sleep=1)
|
||||
else:
|
||||
retry = wrap(retry)
|
||||
try:
|
||||
set_default(kwargs, {"zip":zip, "retry": retry}, DEFAULTS)
|
||||
_to_ascii_dict(kwargs)
|
||||
|
||||
# HEADERS
|
||||
headers = kwargs['headers'] = unwrap(set_default(headers, session.headers, default_headers))
|
||||
_to_ascii_dict(headers)
|
||||
del kwargs['headers']
|
||||
|
||||
# RETRY
|
||||
retry = wrap(kwargs['retry'])
|
||||
if isinstance(retry, Number):
|
||||
retry = set_default({"times":retry}, DEFAULTS['retry'])
|
||||
if isinstance(retry.sleep, Duration):
|
||||
retry.sleep = retry.sleep.seconds
|
||||
set_default(retry, {"times": 1, "sleep": 0})
|
||||
del kwargs['retry']
|
||||
|
||||
_to_ascii_dict(kwargs)
|
||||
set_default(kwargs, DEFAULTS)
|
||||
# JSON
|
||||
if 'json' in kwargs:
|
||||
kwargs['data'] = value2json(kwargs['json']).encode('utf8')
|
||||
del kwargs['json']
|
||||
|
||||
if 'json' in kwargs:
|
||||
kwargs['data'] = value2json(kwargs['json']).encode('utf8')
|
||||
del kwargs['json']
|
||||
|
||||
try:
|
||||
headers = kwargs['headers'] = unwrap(coalesce(kwargs.get('headers'), {}))
|
||||
# ZIP
|
||||
set_default(headers, {'Accept-Encoding': 'compress, gzip'})
|
||||
|
||||
if kwargs['zip'] and len(coalesce(kwargs.get('data'))) > 1000:
|
||||
compressed = convert.bytes2zip(kwargs['data'])
|
||||
headers['content-encoding'] = 'gzip'
|
||||
kwargs['data'] = compressed
|
||||
|
||||
_to_ascii_dict(headers)
|
||||
else:
|
||||
_to_ascii_dict(headers)
|
||||
del kwargs['zip']
|
||||
except Exception as e:
|
||||
Log.error(u"Request setup failure on {{url}}", url=url, cause=e)
|
||||
|
||||
|
@ -158,12 +158,13 @@ def request(method, url, zip=None, retry=None, **kwargs):
|
|||
try:
|
||||
DEBUG and Log.note(u"http {{method|upper}} to {{url}}", method=method, url=text_type(url))
|
||||
request_count += 1
|
||||
|
||||
del kwargs['retry']
|
||||
del kwargs['zip']
|
||||
return session.request(method=method, url=url, **kwargs)
|
||||
return session.request(method=method, headers=headers, url=str(url), **kwargs)
|
||||
except Exception as e:
|
||||
errors.append(Except.wrap(e))
|
||||
e = Except.wrap(e)
|
||||
if retry['http'] and str(url).startswith("https://") and "EOF occurred in violation of protocol" in e:
|
||||
url = URL("http://" + str(url)[8:])
|
||||
Log.note("Changed {{url}} to http due to SSL EOF violation.", url=str(url))
|
||||
errors.append(e)
|
||||
|
||||
if " Read timed out." in errors[0]:
|
||||
Log.error(u"Tried {{times}} times: Timeout failure (timeout was {{timeout}}", timeout=kwargs['timeout'], times=retry.times, cause=errors[0])
|
||||
|
|
|
@ -214,7 +214,7 @@ class ModifiedGenericConsumer(GenericConsumer):
|
|||
while True:
|
||||
try:
|
||||
self.connection.drain_events(timeout=self.timeout)
|
||||
except socket_timeout, e:
|
||||
except socket_timeout as e:
|
||||
Log.warning("timeout! Restarting {{name}} pulse consumer.", name=self.exchange, cause=e)
|
||||
try:
|
||||
self.disconnect()
|
||||
|
|
|
@ -120,7 +120,7 @@ class RolloverIndex(object):
|
|||
self.cluster.delete_index(c.index)
|
||||
except Exception as e:
|
||||
Log.warning("could not delete index {{index}}", index=c.index, cause=e)
|
||||
for t, q in list(self.known_queues.items()):
|
||||
for t, q in items(self.known_queues):
|
||||
if unix2Date(t) + self.rollover_interval < Date.today() - self.rollover_max:
|
||||
with self.locker:
|
||||
del self.known_queues[t]
|
||||
|
@ -189,7 +189,7 @@ class RolloverIndex(object):
|
|||
queue = None
|
||||
pending = [] # FOR WHEN WE DO NOT HAVE QUEUE YET
|
||||
for key in keys:
|
||||
timer = Timer("Process {{key}}", param={"key": key}, debug=DEBUG)
|
||||
timer = Timer("Process {{key}}", param={"key": key}, silent=not DEBUG)
|
||||
try:
|
||||
with timer:
|
||||
for rownum, line in enumerate(source.read_lines(strip_extension(key))):
|
||||
|
|
|
@ -13,14 +13,13 @@ from __future__ import unicode_literals
|
|||
|
||||
from collections import Mapping
|
||||
|
||||
from jx_base import NESTED, OBJECT
|
||||
from jx_python.expressions import jx_expression_to_function
|
||||
from mo_dots import Data, unwrap
|
||||
from pyLibrary.env.elasticsearch import parse_properties, random_id
|
||||
|
||||
from mo_json import json2value
|
||||
from mo_json.encoder import UnicodeBuilder
|
||||
from mo_json.typed_encoder import typed_encode
|
||||
from mo_json.typed_encoder import typed_encode, OBJECT, NESTED
|
||||
|
||||
|
||||
class TypedInserter(object):
|
||||
|
|
|
@ -1,43 +0,0 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import division
|
||||
from __future__ import absolute_import
|
||||
|
||||
|
||||
class Graph(object):
|
||||
def __init__(self, node_type=None):
|
||||
self.nodes = []
|
||||
self.edges = []
|
||||
self.node_type = node_type
|
||||
|
||||
def add_edge(self, edge):
|
||||
self.edges.append(edge)
|
||||
|
||||
def remove_children(self, node):
|
||||
self.edges = [e for e in self.edges if e[0] != node]
|
||||
|
||||
def get_children(self, node):
|
||||
#FIND THE REVISION
|
||||
#
|
||||
return [c for p, c in self.edges if p == node]
|
||||
|
||||
def get_parents(self, node):
|
||||
return [p for p, c in self.edges if c == node]
|
||||
|
||||
def get_edges(self, node):
|
||||
return [(p, c) for p, c in self.edges if p == node or c == node]
|
||||
|
||||
def get_family(self, node):
|
||||
"""
|
||||
RETURN ALL ADJACENT NODES
|
||||
"""
|
||||
return set([p if c == node else c for p, c in self.edges])
|
||||
|
|
@ -1,101 +0,0 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import division
|
||||
from __future__ import absolute_import
|
||||
from collections import deque
|
||||
from mo_math import INTERSECT
|
||||
from pyLibrary.graphs.paths import Step, Path
|
||||
from mo_dots import Data
|
||||
|
||||
|
||||
def dfs(graph, func, head, reverse=None):
|
||||
"""
|
||||
DEPTH FIRST SEARCH
|
||||
|
||||
IF func RETURNS FALSE, THEN PATH IS NO LONGER TAKEN
|
||||
|
||||
IT'S EXPECTED func TAKES 3 ARGUMENTS
|
||||
node - THE CURRENT NODE IN THE
|
||||
path - PATH FROM head TO node
|
||||
graph - THE WHOLE GRAPH
|
||||
"""
|
||||
todo = deque()
|
||||
todo.append(head)
|
||||
path = deque()
|
||||
done = set()
|
||||
while todo:
|
||||
node = todo.popleft()
|
||||
if node in done:
|
||||
path.pop()
|
||||
continue
|
||||
|
||||
done.add(node)
|
||||
path.append(node)
|
||||
result = func(node, path, graph)
|
||||
if result:
|
||||
if reverse:
|
||||
children = graph.get_parents(node)
|
||||
else:
|
||||
children = graph.get_children(node)
|
||||
todo.extend(children)
|
||||
|
||||
|
||||
def bfs(graph, func, head, reverse=None):
|
||||
"""
|
||||
BREADTH FIRST SEARCH
|
||||
|
||||
IF func RETURNS FALSE, THEN NO MORE PATHS DOWN THE BRANCH ARE TAKEN
|
||||
|
||||
IT'S EXPECTED func TAKES 3 ARGUMENTS
|
||||
node - THE CURRENT NODE IN THE
|
||||
path - PATH FROM head TO node
|
||||
graph - THE WHOLE GRAPH
|
||||
todo - WHAT'S IN THE QUEUE TO BE DONE
|
||||
"""
|
||||
|
||||
todo = deque() # LIST OF PATHS
|
||||
todo.append(Step(None, head))
|
||||
|
||||
while True:
|
||||
path = todo.popleft()
|
||||
keep_going = func(path.node, Path(path), graph, todo)
|
||||
if keep_going:
|
||||
todo.extend(Step(path, c) for c in graph.get_children(path.node))
|
||||
|
||||
|
||||
def dominator(graph, head):
|
||||
# WE WOULD NEED DOMINATORS IF WE DO NOT KNOW THE TOPOLOGICAL ORDERING
|
||||
# DOMINATORS ALLOW US TO USE A REFERENCE TEST RESULT: EVERYTHING BETWEEN
|
||||
# dominator(node) AND node CAN BE TREATED AS PARALLEL-APPLIED CHANGESETS
|
||||
#
|
||||
# INSTEAD OF DOMINATORS, WE COULD USE MANY PERF RESULTS, FROM EACH OF THE
|
||||
# PARENT BRANCHES, AND AS LONG AS THEY ALL ARE PART OF A LONG LINE OF
|
||||
# STATISTICALLY IDENTICAL PERF RESULTS, WE CAN ASSUME THEY ARE A DOMINATOR
|
||||
|
||||
visited = set()
|
||||
dom = Data(output=None)
|
||||
|
||||
def find_dominator(node, path, graph, todo):
|
||||
if dom.output:
|
||||
return False
|
||||
if not todo:
|
||||
dom.output = node
|
||||
return False
|
||||
if node in visited:
|
||||
common = INTERSECT(p[1::] for p in todo) # DO NOT INCLUDE head
|
||||
if node in common:
|
||||
dom.output = node #ALL REMAINING PATHS HAVE node IN COMMON TOO
|
||||
return False
|
||||
return True
|
||||
|
||||
bfs(graph, find_dominator, head)
|
||||
|
||||
return dom.output
|
|
@ -1,123 +0,0 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import division
|
||||
from __future__ import absolute_import
|
||||
from collections import namedtuple, deque
|
||||
from mo_logs import Log
|
||||
|
||||
|
||||
Step = namedtuple("Step", ["parent", "node"])
|
||||
|
||||
|
||||
class Path(list):
|
||||
"""
|
||||
USES Steps TO DEFINE A LIST
|
||||
Steps POINT TO parent, SO THIS CLASS HANDLES THE REVERSE NATURE
|
||||
"""
|
||||
def __init__(self, last_step):
|
||||
self.last = last_step
|
||||
self.list = None
|
||||
|
||||
def _build_list(self):
|
||||
output = deque()
|
||||
s = self.last
|
||||
while s:
|
||||
output.appendleft(s.node)
|
||||
s = s.parent
|
||||
self.list = list(output)
|
||||
|
||||
def __getitem__(self, index):
|
||||
if index < 0:
|
||||
return None
|
||||
|
||||
if not self.list:
|
||||
self._build_list()
|
||||
|
||||
if index>=len(self.list):
|
||||
return None
|
||||
return self.list[index]
|
||||
|
||||
def __setitem__(self, i, y):
|
||||
if not self.list:
|
||||
self._build_list()
|
||||
self.list[i]=y
|
||||
|
||||
def __iter__(self):
|
||||
if not self.list:
|
||||
self._build_list()
|
||||
return self.list.__iter__()
|
||||
|
||||
def __contains__(self, item):
|
||||
if not self.list:
|
||||
self._build_list()
|
||||
return item in self.list
|
||||
|
||||
def append(self, val):
|
||||
Log.error("not implemented")
|
||||
|
||||
def __str__(self):
|
||||
Log.error("not implemented")
|
||||
|
||||
def __len__(self):
|
||||
if not self.list:
|
||||
self._build_list()
|
||||
return len(self.list)
|
||||
|
||||
def __getslice__(self, i, j):
|
||||
Log.error("slicing is broken in Python 2.7: a[i:j] == a[i+len(a), j] sometimes. Use [start:stop:step]")
|
||||
|
||||
def copy(self):
|
||||
if not self.list:
|
||||
self._build_list()
|
||||
return self.list.copy()
|
||||
|
||||
def remove(self, x):
|
||||
Log.error("not implemented")
|
||||
|
||||
def extend(self, values):
|
||||
Log.error("not implemented")
|
||||
|
||||
def pop(self):
|
||||
Log.error("not implemented")
|
||||
|
||||
def right(self, num=None):
|
||||
"""
|
||||
WITH SLICES BEING FLAT, WE NEED A SIMPLE WAY TO SLICE FROM THE RIGHT [-num:]
|
||||
"""
|
||||
if num == None:
|
||||
return self.last.node
|
||||
if num <= 0:
|
||||
return []
|
||||
|
||||
if not self.list:
|
||||
self._build_list()
|
||||
return self.list[-num:]
|
||||
|
||||
def not_right(self, num):
|
||||
"""
|
||||
WITH SLICES BEING FLAT, WE NEED A SIMPLE WAY TO SLICE FROM THE LEFT [:-num:]
|
||||
"""
|
||||
if not self.list:
|
||||
self._build_list()
|
||||
|
||||
if num == None:
|
||||
return self.list[:-1:]
|
||||
if num <= 0:
|
||||
return []
|
||||
|
||||
return self.list[:-num:]
|
||||
|
||||
def last(self):
|
||||
"""
|
||||
RETURN LAST ELEMENT IN FlatList [-1]
|
||||
"""
|
||||
return self.last.node
|
||||
|
|
@ -11,6 +11,7 @@ from __future__ import absolute_import
|
|||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from collections import namedtuple
|
||||
from types import FunctionType
|
||||
|
||||
import mo_json
|
||||
|
@ -135,7 +136,10 @@ def wrap_function(cache_store, func_):
|
|||
using_self = False
|
||||
func = lambda self, *args: func_(*args)
|
||||
|
||||
def output(*args):
|
||||
def output(*args, **kwargs):
|
||||
if kwargs:
|
||||
Log.error("Sorry, caching only works with ordered parameter, not keyword arguments")
|
||||
|
||||
with cache_store.locker:
|
||||
if using_self:
|
||||
self = args[0]
|
||||
|
@ -152,7 +156,7 @@ def wrap_function(cache_store, func_):
|
|||
|
||||
if Random.int(100) == 0:
|
||||
# REMOVE OLD CACHE
|
||||
_cache = {k: v for k, v in _cache.items() if v[0]==None or v[0] > now}
|
||||
_cache = {k: v for k, v in _cache.items() if v.timeout == None or v.timeout > now}
|
||||
setattr(self, attr_name, _cache)
|
||||
|
||||
timeout, key, value, exception = _cache.get(args, (Null, Null, Null, Null))
|
||||
|
@ -160,7 +164,7 @@ def wrap_function(cache_store, func_):
|
|||
if now >= timeout:
|
||||
value = func(self, *args)
|
||||
with cache_store.locker:
|
||||
_cache[args] = (now + cache_store.timeout, args, value, None)
|
||||
_cache[args] = CacheElement(now + cache_store.timeout, args, value, None)
|
||||
return value
|
||||
|
||||
if value == None:
|
||||
|
@ -168,12 +172,12 @@ def wrap_function(cache_store, func_):
|
|||
try:
|
||||
value = func(self, *args)
|
||||
with cache_store.locker:
|
||||
_cache[args] = (now + cache_store.timeout, args, value, None)
|
||||
_cache[args] = CacheElement(now + cache_store.timeout, args, value, None)
|
||||
return value
|
||||
except Exception as e:
|
||||
e = Except.wrap(e)
|
||||
with cache_store.locker:
|
||||
_cache[args] = (now + cache_store.timeout, args, None, e)
|
||||
_cache[args] = CacheElement(now + cache_store.timeout, args, None, e)
|
||||
raise e
|
||||
else:
|
||||
raise exception
|
||||
|
@ -183,9 +187,10 @@ def wrap_function(cache_store, func_):
|
|||
return output
|
||||
|
||||
|
||||
CacheElement = namedtuple("CacheElement", ("timeout", "key", "value", "exception"))
|
||||
|
||||
|
||||
class _FakeLock():
|
||||
|
||||
|
||||
def __enter__(self):
|
||||
pass
|
||||
|
||||
|
@ -193,7 +198,6 @@ class _FakeLock():
|
|||
pass
|
||||
|
||||
|
||||
|
||||
def value2quote(value):
|
||||
# RETURN PRETTY PYTHON CODE FOR THE SAME
|
||||
if isinstance(value, text_type):
|
||||
|
|
|
@ -12,10 +12,15 @@ from __future__ import absolute_import
|
|||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from itertools import groupby
|
||||
from operator import itemgetter
|
||||
|
||||
from mo_future import text_type, PY3
|
||||
from mo_logs import Log
|
||||
from mo_logs.strings import expand_template
|
||||
|
||||
import pyLibrary.sql
|
||||
|
||||
|
||||
class SQL(text_type):
|
||||
"""
|
||||
|
@ -127,9 +132,14 @@ def sql_concat(list_):
|
|||
return SQL(" || ").join(sql_iso(l) for l in list_)
|
||||
|
||||
|
||||
def quote_set(list_):
|
||||
return sql_iso(sql_list(map(pyLibrary.sql.sqlite.quote_value, list_)))
|
||||
|
||||
|
||||
def sql_alias(value, alias):
|
||||
return SQL(value.template + " AS " + alias.template)
|
||||
|
||||
|
||||
def sql_coalesce(list_):
|
||||
return "COALESCE(" + SQL_COMMA.join(list_) + ")"
|
||||
|
||||
|
|
|
@ -15,23 +15,20 @@ from __future__ import unicode_literals
|
|||
import subprocess
|
||||
from collections import Mapping
|
||||
from datetime import datetime
|
||||
from zipfile import ZipFile
|
||||
|
||||
from pymysql import connect, InterfaceError, cursors
|
||||
|
||||
import mo_json
|
||||
from jx_python import jx
|
||||
from mo_dots import coalesce, wrap, listwrap, unwrap
|
||||
from mo_dots import coalesce, wrap, listwrap, unwrap, split_field
|
||||
from mo_files import File
|
||||
from mo_future import text_type, utf8_json_encoder, binary_type
|
||||
from mo_future import text_type, utf8_json_encoder, binary_type, transpose
|
||||
from mo_kwargs import override
|
||||
from mo_logs import Log
|
||||
from mo_logs.exceptions import Except, suppress_exception
|
||||
from mo_logs.strings import expand_template, indent, outdent
|
||||
from mo_math import Math
|
||||
from mo_times import Date
|
||||
from pyLibrary.convert import zip2bytes
|
||||
from pyLibrary.env.big_data import ibytes2ilines
|
||||
from pyLibrary.sql import SQL, SQL_NULL, SQL_SELECT, SQL_LIMIT, SQL_WHERE, SQL_LEFT_JOIN, SQL_FROM, SQL_AND, sql_list, sql_iso, SQL_ASC, SQL_TRUE, SQL_ONE, SQL_DESC, SQL_IS_NULL, sql_alias
|
||||
from pyLibrary.sql.sqlite import join_column
|
||||
|
||||
|
@ -106,11 +103,12 @@ class MySQL(object):
|
|||
)
|
||||
except Exception as e:
|
||||
if self.settings.host.find("://") == -1:
|
||||
Log.error(u"Failure to connect to {{host}}:{{port}}",
|
||||
host=self.settings.host,
|
||||
port=self.settings.port,
|
||||
cause=e
|
||||
)
|
||||
Log.error(
|
||||
u"Failure to connect to {{host}}:{{port}}",
|
||||
host=self.settings.host,
|
||||
port=self.settings.port,
|
||||
cause=e
|
||||
)
|
||||
else:
|
||||
Log.error(u"Failure to connect. PROTOCOL PREFIX IS PROBABLY BAD", e)
|
||||
self.cursor = None
|
||||
|
@ -180,7 +178,8 @@ class MySQL(object):
|
|||
try:
|
||||
self.db.close()
|
||||
except Exception as e:
|
||||
if e.message.find("Already closed") >= 0:
|
||||
e = Except.wrap(e)
|
||||
if "Already closed" in e:
|
||||
return
|
||||
|
||||
Log.warning("can not close()", e)
|
||||
|
@ -262,8 +261,7 @@ class MySQL(object):
|
|||
if param:
|
||||
sql = expand_template(sql, quote_param(param))
|
||||
sql = self.preamble + outdent(sql)
|
||||
if self.debug:
|
||||
Log.note("Execute SQL:\n{{sql}}", sql=indent(sql))
|
||||
self.debug and Log.note("Execute SQL:\n{{sql}}", sql=indent(sql))
|
||||
|
||||
self.cursor.execute(sql)
|
||||
if row_tuples:
|
||||
|
@ -301,13 +299,12 @@ class MySQL(object):
|
|||
if param:
|
||||
sql = expand_template(sql, quote_param(param))
|
||||
sql = self.preamble + outdent(sql)
|
||||
if self.debug:
|
||||
Log.note("Execute SQL:\n{{sql}}", sql=indent(sql))
|
||||
self.debug and Log.note("Execute SQL:\n{{sql}}", sql=indent(sql))
|
||||
|
||||
self.cursor.execute(sql)
|
||||
grid = [[utf8_to_unicode(c) for c in row] for row in self.cursor]
|
||||
# columns = [utf8_to_unicode(d[0]) for d in coalesce(self.cursor.description, [])]
|
||||
result = zip(*grid)
|
||||
result = transpose(*grid)
|
||||
|
||||
if not old_cursor: # CLEANUP AFTER NON-TRANSACTIONAL READS
|
||||
self.cursor.close()
|
||||
|
@ -333,8 +330,7 @@ class MySQL(object):
|
|||
if param:
|
||||
sql = expand_template(sql, quote_param(param))
|
||||
sql = self.preamble + outdent(sql)
|
||||
if self.debug:
|
||||
Log.note("Execute SQL:\n{{sql}}", sql=indent(sql))
|
||||
self.debug and Log.note("Execute SQL:\n{{sql}}", sql=indent(sql))
|
||||
self.cursor.execute(sql)
|
||||
|
||||
columns = tuple([utf8_to_unicode(d[0]) for d in self.cursor.description])
|
||||
|
@ -362,85 +358,6 @@ class MySQL(object):
|
|||
if self.debug or len(self.backlog) >= MAX_BATCH_SIZE:
|
||||
self._execute_backlog()
|
||||
|
||||
@staticmethod
|
||||
@override
|
||||
def execute_sql(
|
||||
host,
|
||||
username,
|
||||
password,
|
||||
sql,
|
||||
schema=None,
|
||||
param=None,
|
||||
kwargs=None
|
||||
):
|
||||
"""EXECUTE MANY LINES OF SQL (FROM SQLDUMP FILE, MAYBE?"""
|
||||
kwargs.schema = coalesce(kwargs.schema, kwargs.database)
|
||||
|
||||
if param:
|
||||
with MySQL(kwargs) as temp:
|
||||
sql = expand_template(sql, quote_param(param))
|
||||
|
||||
# We have no way to execute an entire SQL file in bulk, so we
|
||||
# have to shell out to the commandline client.
|
||||
args = [
|
||||
"mysql",
|
||||
"-h{0}".format(host),
|
||||
"-u{0}".format(username),
|
||||
"-p{0}".format(password)
|
||||
]
|
||||
if schema:
|
||||
args.append("{0}".format(schema))
|
||||
|
||||
try:
|
||||
proc = subprocess.Popen(
|
||||
args,
|
||||
stdin=subprocess.PIPE,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT,
|
||||
bufsize=-1
|
||||
)
|
||||
if isinstance(sql, text_type):
|
||||
sql = sql.encode("utf8")
|
||||
(output, _) = proc.communicate(sql)
|
||||
except Exception as e:
|
||||
raise Log.error("Can not call \"mysql\"", e)
|
||||
|
||||
if proc.returncode:
|
||||
if len(sql) > 10000:
|
||||
sql = "<" + text_type(len(sql)) + " bytes of sql>"
|
||||
Log.error(
|
||||
"Unable to execute sql: return code {{return_code}}, {{output}}:\n {{sql}}\n",
|
||||
sql=indent(sql),
|
||||
return_code=proc.returncode,
|
||||
output=output
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
@override
|
||||
def execute_file(
|
||||
filename,
|
||||
host,
|
||||
username,
|
||||
password,
|
||||
schema=None,
|
||||
param=None,
|
||||
ignore_errors=False,
|
||||
kwargs=None
|
||||
):
|
||||
# MySQLdb provides no way to execute an entire SQL file in bulk, so we
|
||||
# have to shell out to the commandline client.
|
||||
file = File(filename)
|
||||
if file.extension == 'zip':
|
||||
sql = file.read_zipfile()
|
||||
else:
|
||||
sql = File(filename).read()
|
||||
|
||||
if ignore_errors:
|
||||
with suppress_exception:
|
||||
MySQL.execute_sql(sql=sql, param=param, kwargs=kwargs)
|
||||
else:
|
||||
MySQL.execute_sql(sql=sql, param=param, kwargs=kwargs)
|
||||
|
||||
def _execute_backlog(self):
|
||||
if not self.backlog: return
|
||||
|
||||
|
@ -451,8 +368,7 @@ class MySQL(object):
|
|||
for b in backlog:
|
||||
sql = self.preamble + b
|
||||
try:
|
||||
if self.debug:
|
||||
Log.note("Execute SQL:\n{{sql|indent}}", sql=sql)
|
||||
self.debug and Log.note("Execute SQL:\n{{sql|indent}}", sql=sql)
|
||||
self.cursor.execute(b)
|
||||
except Exception as e:
|
||||
Log.error("Can not execute sql:\n{{sql}}", sql=sql, cause=e)
|
||||
|
@ -463,8 +379,7 @@ class MySQL(object):
|
|||
for i, g in jx.groupby(backlog, size=MAX_BATCH_SIZE):
|
||||
sql = self.preamble + ";\n".join(g)
|
||||
try:
|
||||
if self.debug:
|
||||
Log.note("Execute block of SQL:\n{{sql|indent}}", sql=sql)
|
||||
self.debug and Log.note("Execute block of SQL:\n{{sql|indent}}", sql=sql)
|
||||
self.cursor.execute(sql)
|
||||
self.cursor.close()
|
||||
self.cursor = self.db.cursor()
|
||||
|
@ -567,19 +482,93 @@ class MySQL(object):
|
|||
sort = jx.normalize_sort_parameters(sort)
|
||||
return sql_list([quote_column(s.field) + (SQL_DESC if s.sort == -1 else SQL_ASC) for s in sort])
|
||||
|
||||
@override
|
||||
def execute_sql(
|
||||
host,
|
||||
username,
|
||||
password,
|
||||
sql,
|
||||
schema=None,
|
||||
param=None,
|
||||
kwargs=None
|
||||
):
|
||||
"""EXECUTE MANY LINES OF SQL (FROM SQLDUMP FILE, MAYBE?"""
|
||||
kwargs.schema = coalesce(kwargs.schema, kwargs.database)
|
||||
|
||||
if param:
|
||||
with MySQL(kwargs) as temp:
|
||||
sql = expand_template(sql, quote_param(param))
|
||||
|
||||
# We have no way to execute an entire SQL file in bulk, so we
|
||||
# have to shell out to the commandline client.
|
||||
args = [
|
||||
"mysql",
|
||||
"-h{0}".format(host),
|
||||
"-u{0}".format(username),
|
||||
"-p{0}".format(password)
|
||||
]
|
||||
if schema:
|
||||
args.append("{0}".format(schema))
|
||||
|
||||
try:
|
||||
proc = subprocess.Popen(
|
||||
args,
|
||||
stdin=subprocess.PIPE,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT,
|
||||
bufsize=-1
|
||||
)
|
||||
if isinstance(sql, text_type):
|
||||
sql = sql.encode("utf8")
|
||||
(output, _) = proc.communicate(sql)
|
||||
except Exception as e:
|
||||
raise Log.error("Can not call \"mysql\"", e)
|
||||
|
||||
if proc.returncode:
|
||||
if len(sql) > 10000:
|
||||
sql = "<" + text_type(len(sql)) + " bytes of sql>"
|
||||
Log.error(
|
||||
"Unable to execute sql: return code {{return_code}}, {{output}}:\n {{sql}}\n",
|
||||
sql=indent(sql),
|
||||
return_code=proc.returncode,
|
||||
output=output
|
||||
)
|
||||
|
||||
@override
|
||||
def execute_file(
|
||||
filename,
|
||||
host,
|
||||
username,
|
||||
password,
|
||||
schema=None,
|
||||
param=None,
|
||||
ignore_errors=False,
|
||||
kwargs=None
|
||||
):
|
||||
# MySQLdb provides no way to execute an entire SQL file in bulk, so we
|
||||
# have to shell out to the commandline client.
|
||||
file = File(filename)
|
||||
if file.extension == 'zip':
|
||||
sql = file.read_zipfile()
|
||||
else:
|
||||
sql = File(filename).read()
|
||||
|
||||
if ignore_errors:
|
||||
with suppress_exception:
|
||||
execute_sql(sql=sql, kwargs=kwargs)
|
||||
else:
|
||||
execute_sql(sql=sql, kwargs=kwargs)
|
||||
|
||||
ESCAPE_DCT = {
|
||||
u"\\": u"\\\\",
|
||||
# u"\0": u"\\0",
|
||||
# u"\"": u'\\"',
|
||||
u"\0": u"\\0",
|
||||
u"\"": u'\\"',
|
||||
u"\'": u"''",
|
||||
# u"\b": u"\\b",
|
||||
# u"\f": u"\\f",
|
||||
# u"\n": u"\\n",
|
||||
# u"\r": u"\\r",
|
||||
# u"\t": u"\\t",
|
||||
# u"%": u"\\%",
|
||||
# u"_": u"\\_"
|
||||
u"\b": u"\\b",
|
||||
u"\f": u"\\f",
|
||||
u"\n": u"\\n",
|
||||
u"\r": u"\\r",
|
||||
u"\t": u"\\t"
|
||||
}
|
||||
|
||||
|
||||
|
@ -615,13 +604,12 @@ def quote_column(column_name, table=None):
|
|||
if column_name == None:
|
||||
Log.error("missing column_name")
|
||||
elif isinstance(column_name, text_type):
|
||||
if table:
|
||||
column_name = join_column(table, column_name)
|
||||
return SQL("`" + column_name.replace(".", "`.`") + "`") # MY SQL QUOTE OF COLUMN NAMES
|
||||
elif isinstance(column_name, binary_type):
|
||||
if table:
|
||||
column_name = join_column(table, column_name)
|
||||
return SQL("`" + column_name.decode('utf8').replace(".", "`.`") + "`")
|
||||
return join_column(table, column_name)
|
||||
else:
|
||||
return SQL("`" + '`.`'.join(split_field(column_name)) + "`") # MYSQL QUOTE OF COLUMN NAMES
|
||||
elif isinstance(column_name, binary_type):
|
||||
return quote_column(column_name.decode('utf8'), table)
|
||||
elif isinstance(column_name, list):
|
||||
if table:
|
||||
return sql_list(join_column(table, c) for c in column_name)
|
||||
|
@ -631,10 +619,6 @@ def quote_column(column_name, table=None):
|
|||
return SQL(sql_alias(column_name.value, quote_column(column_name.name)))
|
||||
|
||||
|
||||
def quote_list(value):
|
||||
return sql_iso(sql_list(map(quote_value, value)))
|
||||
|
||||
|
||||
def quote_sql(value, param=None):
|
||||
"""
|
||||
USED TO EXPAND THE PARAMETERS TO THE SQL() OBJECT
|
||||
|
@ -650,7 +634,7 @@ def quote_sql(value, param=None):
|
|||
elif isinstance(value, Mapping):
|
||||
return quote_value(json_encode(value))
|
||||
elif hasattr(value, '__iter__'):
|
||||
return sql_iso(sql_list(map(quote_value, value)))
|
||||
return quote_list(value)
|
||||
else:
|
||||
return text_type(value)
|
||||
except Exception as e:
|
||||
|
|
|
@ -17,7 +17,8 @@ import re
|
|||
import sys
|
||||
from collections import Mapping, namedtuple
|
||||
|
||||
from mo_dots import Data, coalesce, unwraplist
|
||||
from jx_base.expressions import jx_expression
|
||||
from mo_dots import Data, coalesce, unwraplist, Null
|
||||
from mo_files import File
|
||||
from mo_future import allocate_lock as _allocate_lock, text_type
|
||||
from mo_kwargs import override
|
||||
|
@ -25,21 +26,23 @@ from mo_logs import Log
|
|||
from mo_logs.exceptions import Except, extract_stack, ERROR, format_trace
|
||||
from mo_logs.strings import quote
|
||||
from mo_math.stats import percentile
|
||||
from mo_threads import Queue, Signal, Thread, Lock, Till
|
||||
from mo_threads import Queue, Thread, Lock, Till
|
||||
from mo_times import Date, Duration
|
||||
from mo_times.timer import Timer
|
||||
from pyLibrary import convert
|
||||
from pyLibrary.sql import DB, SQL, SQL_TRUE, SQL_FALSE, SQL_NULL, SQL_SELECT, sql_iso
|
||||
from pyLibrary.sql import DB, SQL, SQL_TRUE, SQL_FALSE, SQL_NULL, SQL_SELECT, sql_iso, sql_list
|
||||
|
||||
DEBUG = False
|
||||
TRACE = True
|
||||
|
||||
FORMAT_COMMAND = "Running command\n{{command|limit(100)|indent}}"
|
||||
DOUBLE_TRANSACTION_ERROR = "You can not query outside a transaction you have open already"
|
||||
TOO_LONG_TO_HOLD_TRANSACTION = 10
|
||||
|
||||
sqlite3 = None
|
||||
_load_extension_warning_sent = False
|
||||
_upgraded = False
|
||||
known_databases = {Null: None}
|
||||
|
||||
|
||||
class Sqlite(DB):
|
||||
|
@ -63,12 +66,18 @@ class Sqlite(DB):
|
|||
|
||||
self.settings = kwargs
|
||||
self.filename = File(filename).abspath
|
||||
if known_databases.get(self.filename):
|
||||
Log.error("Not allowed to create more than one Sqlite instance for {{file}}", file=self.filename)
|
||||
|
||||
# SETUP DATABASE
|
||||
DEBUG and Log.note("Sqlite version {{version}}", version=sqlite3.sqlite_version)
|
||||
try:
|
||||
if db == None:
|
||||
self.db = sqlite3.connect(coalesce(self.filename, ':memory:'), check_same_thread=False, isolation_level=None)
|
||||
self.db = sqlite3.connect(
|
||||
database=coalesce(self.filename, ":memory:"),
|
||||
check_same_thread=False,
|
||||
isolation_level=None
|
||||
)
|
||||
else:
|
||||
self.db = db
|
||||
except Exception as e:
|
||||
|
@ -76,7 +85,7 @@ class Sqlite(DB):
|
|||
load_functions and self._load_functions()
|
||||
|
||||
self.locker = Lock()
|
||||
self.available_transactions = []
|
||||
self.available_transactions = [] # LIST OF ALL THE TRANSACTIONS BEING MANAGED
|
||||
self.queue = Queue("sql commands") # HOLD (command, result, signal, stacktrace) TUPLES
|
||||
|
||||
self.get_trace = coalesce(get_trace, TRACE)
|
||||
|
@ -136,8 +145,17 @@ class Sqlite(DB):
|
|||
signal.acquire()
|
||||
result = Data()
|
||||
trace = extract_stack(1) if self.get_trace else None
|
||||
|
||||
if self.get_trace:
|
||||
current_thread = Thread.current()
|
||||
with self.locker:
|
||||
for t in self.available_transactions:
|
||||
if t.thread is current_thread:
|
||||
Log.error(DOUBLE_TRANSACTION_ERROR)
|
||||
|
||||
self.queue.add(CommandItem(command, result, signal, trace, None))
|
||||
signal.acquire()
|
||||
|
||||
if result.exception:
|
||||
Log.error("Problem with Sqlite call", cause=result.exception)
|
||||
return result
|
||||
|
@ -151,7 +169,7 @@ class Sqlite(DB):
|
|||
self.closed = True
|
||||
signal = _allocate_lock()
|
||||
signal.acquire()
|
||||
self.queue.add((COMMIT, None, signal, None))
|
||||
self.queue.add(CommandItem(COMMIT, None, signal, None, None))
|
||||
signal.acquire()
|
||||
self.worker.please_stop.go()
|
||||
return
|
||||
|
@ -189,44 +207,62 @@ class Sqlite(DB):
|
|||
|
||||
self.db.create_function("REGEXP", 2, regexp)
|
||||
|
||||
def show_warning(self):
|
||||
blocked = (self.delayed_queries+self.delayed_transactions)[0]
|
||||
def show_transactions_blocked_warning(self):
|
||||
blocker = self.last_command_item
|
||||
blocked = (self.delayed_queries+self.delayed_transactions)[0]
|
||||
|
||||
Log.warning(
|
||||
"Query for thread {{blocked_thread|quote}} at\n{{blocked_trace|indent}}is blocked by {{blocker_thread|quote}} at\n{{blocker_trace|indent}}this message brought to you by....",
|
||||
blocker_thread=blocker.thread.name,
|
||||
"Query on thread {{blocked_thread|json}} at\n"
|
||||
"{{blocked_trace|indent}}"
|
||||
"is blocked by {{blocker_thread|json}} at\n"
|
||||
"{{blocker_trace|indent}}"
|
||||
"this message brought to you by....",
|
||||
blocker_trace=format_trace(blocker.trace),
|
||||
blocked_thread=blocked.thread.name,
|
||||
blocked_trace=format_trace(blocked.trace)
|
||||
blocked_trace=format_trace(blocked.trace),
|
||||
blocker_thread=blocker.transaction.thread.name if blocker.transaction is not None else None,
|
||||
blocked_thread=blocked.transaction.thread.name if blocked.transaction is not None else None
|
||||
)
|
||||
|
||||
def _close_transaction(self, command_item):
|
||||
query, result, signal, trace, transaction = command_item
|
||||
|
||||
transaction.end_of_life = True
|
||||
DEBUG and Log.note(FORMAT_COMMAND, command=query)
|
||||
with self.locker:
|
||||
self.available_transactions.remove(transaction)
|
||||
assert transaction not in self.available_transactions
|
||||
|
||||
old_length = len(self.transaction_stack)
|
||||
old_trans = self.transaction_stack[-1]
|
||||
del self.transaction_stack[-1]
|
||||
|
||||
assert old_length - 1 == len(self.transaction_stack)
|
||||
assert old_trans
|
||||
assert old_trans not in self.transaction_stack
|
||||
if not self.transaction_stack:
|
||||
# NESTED TRANSACTIONS NOT ALLOWED IN sqlite3
|
||||
DEBUG and Log.note(FORMAT_COMMAND, command=query)
|
||||
self.db.execute(query)
|
||||
|
||||
# PUT delayed BACK ON THE QUEUE, IN THE ORDER FOUND, BUT WITH QUERIES FIRST
|
||||
if self.too_long is not None:
|
||||
with self.too_long.lock:
|
||||
self.too_long.job_queue.clear()
|
||||
self.too_long = None
|
||||
has_been_too_long = False
|
||||
with self.locker:
|
||||
if self.too_long is not None:
|
||||
self.too_long, too_long = None, self.too_long
|
||||
# WE ARE CHEATING HERE: WE REACH INTO THE Signal MEMBERS AND REMOVE WHAT WE ADDED TO THE INTERNAL job_queue
|
||||
with too_long.lock:
|
||||
has_been_too_long = bool(too_long)
|
||||
too_long.job_queue = None
|
||||
|
||||
if self.delayed_transactions:
|
||||
for c in reversed(self.delayed_transactions):
|
||||
self.queue.push(c)
|
||||
del self.delayed_transactions[:]
|
||||
if self.delayed_queries:
|
||||
for c in reversed(self.delayed_queries):
|
||||
self.queue.push(c)
|
||||
del self.delayed_queries[:]
|
||||
# PUT delayed BACK ON THE QUEUE, IN THE ORDER FOUND, BUT WITH QUERIES FIRST
|
||||
if self.delayed_transactions:
|
||||
for c in reversed(self.delayed_transactions):
|
||||
self.queue.push(c)
|
||||
del self.delayed_transactions[:]
|
||||
if self.delayed_queries:
|
||||
for c in reversed(self.delayed_queries):
|
||||
self.queue.push(c)
|
||||
del self.delayed_queries[:]
|
||||
if has_been_too_long:
|
||||
Log.note("Transaction blockage cleared")
|
||||
|
||||
def _worker(self, please_stop):
|
||||
try:
|
||||
|
@ -235,11 +271,14 @@ class Sqlite(DB):
|
|||
command_item = self.queue.pop(till=please_stop)
|
||||
if command_item is None:
|
||||
break
|
||||
self._process_command_item(command_item)
|
||||
try:
|
||||
self._process_command_item(command_item)
|
||||
except Exception as e:
|
||||
Log.warning("worker can not execute command", cause=e)
|
||||
except Exception as e:
|
||||
e = Except.wrap(e)
|
||||
if not please_stop:
|
||||
Log.warning("Problem with sql thread", cause=e)
|
||||
Log.warning("Problem with sql", cause=e)
|
||||
finally:
|
||||
self.closed = True
|
||||
DEBUG and Log.note("Database is closed")
|
||||
|
@ -248,31 +287,34 @@ class Sqlite(DB):
|
|||
def _process_command_item(self, command_item):
|
||||
query, result, signal, trace, transaction = command_item
|
||||
|
||||
with Timer("SQL Timing", debug=DEBUG):
|
||||
with Timer("SQL Timing", silent=not DEBUG):
|
||||
if transaction is None:
|
||||
# THIS IS A TRANSACTIONLESS QUERY, DELAY IT IF THERE IS A CURRENT TRANSACTION
|
||||
if self.transaction_stack:
|
||||
if self.too_long is None:
|
||||
self.too_long = Till(seconds=TOO_LONG_TO_HOLD_TRANSACTION)
|
||||
self.too_long.on_go(self.show_warning)
|
||||
self.delayed_queries.append(command_item)
|
||||
with self.locker:
|
||||
if self.too_long is None:
|
||||
self.too_long = Till(seconds=TOO_LONG_TO_HOLD_TRANSACTION)
|
||||
self.too_long.on_go(self.show_transactions_blocked_warning)
|
||||
self.delayed_queries.append(command_item)
|
||||
return
|
||||
elif self.transaction_stack and self.transaction_stack[-1] not in [transaction, transaction.parent]:
|
||||
# THIS TRANSACTION IS NOT THE CURRENT TRANSACTION, DELAY IT
|
||||
if self.too_long is None:
|
||||
self.too_long = Till(seconds=TOO_LONG_TO_HOLD_TRANSACTION)
|
||||
self.too_long.on_go(self.show_warning)
|
||||
self.delayed_transactions.append(command_item)
|
||||
with self.locker:
|
||||
if self.too_long is None:
|
||||
self.too_long = Till(seconds=TOO_LONG_TO_HOLD_TRANSACTION)
|
||||
self.too_long.on_go(self.show_transactions_blocked_warning)
|
||||
self.delayed_transactions.append(command_item)
|
||||
return
|
||||
else:
|
||||
# ENSURE THE CURRENT TRANSACTION IS UP TO DATE FOR THIS query
|
||||
if not self.transaction_stack:
|
||||
# sqlite3 ALLOWS ONLY ONE TRANSACTION AT A TIME
|
||||
DEBUG and Log.note(FORMAT_COMMAND, command=BEGIN)
|
||||
self.db.execute(BEGIN)
|
||||
self.transaction_stack.append(transaction)
|
||||
elif transaction != self.transaction_stack[-1]:
|
||||
elif transaction is not self.transaction_stack[-1]:
|
||||
self.transaction_stack.append(transaction)
|
||||
elif transaction.exception:
|
||||
elif transaction.exception and query is not ROLLBACK:
|
||||
result.exception = Except(
|
||||
type=ERROR,
|
||||
template="Not allowed to continue using a transaction that failed",
|
||||
|
@ -374,10 +416,12 @@ class Transaction(object):
|
|||
|
||||
def do_all(self):
|
||||
# ENSURE PARENT TRANSACTION IS UP TO DATE
|
||||
if self.parent:
|
||||
self.parent.do_all()
|
||||
|
||||
c = None
|
||||
try:
|
||||
if self.parent == self:
|
||||
Log.warning("Transactions parent is equal to itself.")
|
||||
if self.parent:
|
||||
self.parent.do_all()
|
||||
# GET THE REMAINING COMMANDS
|
||||
with self.locker:
|
||||
todo = self.todo[self.complete:]
|
||||
|
@ -387,8 +431,6 @@ class Transaction(object):
|
|||
for c in todo:
|
||||
DEBUG and Log.note(FORMAT_COMMAND, command=c.command)
|
||||
self.db.db.execute(c.command)
|
||||
if c.command in [COMMIT, ROLLBACK]:
|
||||
Log.error("logic error")
|
||||
except Exception as e:
|
||||
Log.error("problem running commands", current=c, cause=e)
|
||||
|
||||
|
@ -453,6 +495,9 @@ def quote_value(value):
|
|||
return SQL(text_type(value))
|
||||
|
||||
|
||||
def quote_list(list):
|
||||
return sql_iso(sql_list(map(quote_value, list)))
|
||||
|
||||
def join_column(a, b):
|
||||
a = quote_column(a)
|
||||
b = quote_column(b)
|
||||
|
|
|
@ -12,57 +12,58 @@ from __future__ import division
|
|||
from __future__ import unicode_literals
|
||||
|
||||
import mo_json
|
||||
from mo_files import File
|
||||
from mo_logs import Log
|
||||
from mo_dots import Data
|
||||
from mo_dots import unwrap, wrap
|
||||
from pyLibrary import convert
|
||||
from pyLibrary.env.elasticsearch import Index, Cluster
|
||||
from mo_kwargs import override
|
||||
from jx_python import jx
|
||||
from mo_dots import Data, Null, unwrap, wrap
|
||||
from mo_files import File
|
||||
from mo_kwargs import override
|
||||
from mo_logs import Log
|
||||
from pyLibrary.env.elasticsearch import Cluster
|
||||
|
||||
|
||||
def make_test_instance(name, settings):
|
||||
if settings.filename:
|
||||
File(settings.filename).delete()
|
||||
return open_test_instance(name, settings)
|
||||
@override
|
||||
def make_test_instance(name, filename=None, kwargs=None):
|
||||
if filename != None:
|
||||
File(filename).delete()
|
||||
return open_test_instance(kwargs)
|
||||
|
||||
|
||||
def open_test_instance(name, settings):
|
||||
if settings.filename:
|
||||
@override
|
||||
def open_test_instance(name, filename=None, es=None, kwargs=None):
|
||||
if filename != None:
|
||||
Log.note(
|
||||
"Using {{filename}} as {{type}}",
|
||||
filename=settings.filename,
|
||||
filename=filename,
|
||||
type=name
|
||||
)
|
||||
return FakeES(settings)
|
||||
return FakeES(filename=filename)
|
||||
else:
|
||||
Log.note(
|
||||
"Using ES cluster at {{host}} as {{type}}",
|
||||
host=settings.host,
|
||||
host=es.host,
|
||||
type=name
|
||||
)
|
||||
cluster = Cluster(settings)
|
||||
cluster = Cluster(es)
|
||||
try:
|
||||
old_index = cluster.get_index(kwargs=settings)
|
||||
old_index = cluster.get_index(es)
|
||||
cluster.delete_index(old_index.settings.index)
|
||||
except Exception as e:
|
||||
if "Can not find index" not in e:
|
||||
Log.error("unexpected", cause=e)
|
||||
|
||||
es = cluster.create_index(limit_replicas=True, limit_replicas_warning=False, kwargs=settings)
|
||||
es.delete_all_but_self()
|
||||
es.add_alias(settings.index)
|
||||
return es
|
||||
output = cluster.create_index(limit_replicas=True, limit_replicas_warning=False, kwargs=es)
|
||||
output.delete_all_but_self()
|
||||
output.add_alias(es.index)
|
||||
return output
|
||||
|
||||
|
||||
class FakeES():
|
||||
@override
|
||||
def __init__(self, filename, host="fake", index="fake", kwargs=None):
|
||||
self.settings = kwargs
|
||||
self.filename = filename
|
||||
self.file = File(filename)
|
||||
self.cluster= Null
|
||||
try:
|
||||
self.data = mo_json.json2value(File(self.filename).read())
|
||||
self.data = mo_json.json2value(self.file.read())
|
||||
except Exception as e:
|
||||
self.data = Data()
|
||||
|
||||
|
@ -85,11 +86,8 @@ class FakeES():
|
|||
}
|
||||
|
||||
unwrap(self.data).update(records)
|
||||
|
||||
data_as_json = mo_json.value2json(self.data, pretty=True)
|
||||
|
||||
File(self.filename).write(data_as_json)
|
||||
Log.note("{{num}} documents added", num= len(records))
|
||||
self.refresh()
|
||||
Log.note("{{num}} documents added", num=len(records))
|
||||
|
||||
def add(self, record):
|
||||
if isinstance(record, list):
|
||||
|
@ -97,9 +95,14 @@ class FakeES():
|
|||
return self.extend([record])
|
||||
|
||||
def delete_record(self, filter):
|
||||
f = convert.esfilter2where(filter)
|
||||
f = esfilter2where(filter)
|
||||
self.data = wrap({k: v for k, v in self.data.items() if not f(v)})
|
||||
|
||||
def refresh(self, *args, **kwargs):
|
||||
data_as_json = mo_json.value2json(self.data, pretty=True)
|
||||
self.file.write(data_as_json)
|
||||
|
||||
|
||||
def set_refresh_interval(self, seconds):
|
||||
pass
|
||||
|
||||
|
|
|
@ -17,7 +17,7 @@ from flask import Flask, Response
|
|||
|
||||
from mo_dots import listwrap, coalesce, unwraplist
|
||||
from mo_json import value2json, json2value
|
||||
from mo_logs import Log, constants, startup
|
||||
from mo_logs import Log, constants, startup, Except
|
||||
from mo_logs.strings import utf82unicode, unicode2utf8
|
||||
from mo_times import Timer, Date
|
||||
from pyLibrary.env.flask_wrappers import cors_wrapper
|
||||
|
@ -26,6 +26,8 @@ from tuid.util import map_to_array
|
|||
|
||||
OVERVIEW = None
|
||||
QUERY_SIZE_LIMIT = 10 * 1000 * 1000
|
||||
EXPECTING_QUERY = b"expecting query\r\n"
|
||||
TOO_BUSY = 10
|
||||
|
||||
class TUIDApp(Flask):
|
||||
|
||||
|
@ -51,7 +53,7 @@ def tuid_endpoint(path):
|
|||
if flask.request.headers.get("content-length", "") in ["", "0"]:
|
||||
# ASSUME A BROWSER HIT THIS POINT, SEND text/html RESPONSE BACK
|
||||
return Response(
|
||||
unicode2utf8("expecting query"),
|
||||
EXPECTING_QUERY,
|
||||
status=400,
|
||||
headers={
|
||||
"Content-Type": "text/html"
|
||||
|
@ -85,19 +87,31 @@ def tuid_endpoint(path):
|
|||
|
||||
rev = None
|
||||
paths = None
|
||||
branch_name = None
|
||||
for a in ands:
|
||||
rev = coalesce(rev, a.eq.revision)
|
||||
paths = unwraplist(coalesce(paths, a['in'].path, a.eq.path))
|
||||
branch_name = coalesce(rev, a.eq.branch)
|
||||
|
||||
branch_name = coalesce(branch_name, a.eq.branch)
|
||||
paths = listwrap(paths)
|
||||
if len(paths) <= 0:
|
||||
Log.warning("Can't find file paths found in request: {{request}}", request=request_body)
|
||||
response = [("Error in app.py - no paths found", [])]
|
||||
|
||||
if len(paths) == 0:
|
||||
response, completed = [], True
|
||||
elif service.conn.pending_transactions > TOO_BUSY: # CHECK IF service IS VERY BUSY
|
||||
# TODO: BE SURE TO UPDATE STATS TOO
|
||||
Log.note("Too many open transactions")
|
||||
response, completed = [], False
|
||||
else:
|
||||
# RETURN TUIDS
|
||||
with Timer("tuid internal response time for {{num}} files", {"num": len(paths)}):
|
||||
response = service.get_tuids_from_files(revision=rev, files=paths, going_forward=True, repo=branch_name)
|
||||
response, completed = service.get_tuids_from_files(
|
||||
revision=rev, files=paths, going_forward=True, repo=branch_name
|
||||
)
|
||||
|
||||
if not completed:
|
||||
Log.note(
|
||||
"Request for {{num}} files is incomplete for revision {{rev}}.",
|
||||
num=len(paths), rev=rev
|
||||
)
|
||||
|
||||
if query.meta.format == 'list':
|
||||
formatter = _stream_list
|
||||
|
@ -106,12 +120,13 @@ def tuid_endpoint(path):
|
|||
|
||||
return Response(
|
||||
formatter(response),
|
||||
status=200,
|
||||
status=200 if completed else 202,
|
||||
headers={
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
)
|
||||
except Exception as e:
|
||||
e = Except.wrap(e)
|
||||
Log.warning("could not handle request", cause=e)
|
||||
return Response(
|
||||
unicode2utf8(value2json(e, pretty=True)),
|
||||
|
@ -130,6 +145,10 @@ def _stream_table(files):
|
|||
|
||||
|
||||
def _stream_list(files):
|
||||
if not files:
|
||||
yield b'{"format":"list", "data":[]}'
|
||||
return
|
||||
|
||||
sep = b'{"format":"list", "data":['
|
||||
for f, pairs in files:
|
||||
yield sep
|
||||
|
@ -168,7 +187,7 @@ if __name__ in ("__main__",):
|
|||
Log.start(config.debug)
|
||||
|
||||
service = TUIDService(config.tuid)
|
||||
Log.note("Started TUID Service.")
|
||||
Log.note("Started TUID Service")
|
||||
except BaseException as e: # MUST CATCH BaseException BECAUSE argparse LIKES TO EXIT THAT WAY, AND gunicorn WILL NOT REPORT
|
||||
try:
|
||||
Log.error("Serious problem with TUID service construction! Shutdown!", cause=e)
|
||||
|
@ -178,7 +197,7 @@ if __name__ in ("__main__",):
|
|||
if config.flask:
|
||||
if config.flask.port and config.args.process_num:
|
||||
config.flask.port += config.args.process_num
|
||||
Log.note("Running Service.")
|
||||
Log.note("Running Flask...")
|
||||
flask_app.run(**config.flask)
|
||||
|
||||
|
||||
|
|
|
@ -19,7 +19,7 @@ from mo_times import Timer, Date
|
|||
from pyLibrary import aws
|
||||
from pyLibrary.env import http
|
||||
from pyLibrary.sql import sql_iso, sql_list
|
||||
from pyLibrary.sql.sqlite import Sqlite, quote_value
|
||||
from pyLibrary.sql.sqlite import Sqlite, quote_value, quote_list
|
||||
|
||||
DEBUG = True
|
||||
SLEEP_ON_ERROR = 30
|
||||
|
@ -84,7 +84,7 @@ class TuidClient(object):
|
|||
):
|
||||
response = self.db.query(
|
||||
"SELECT file, tuids FROM tuid WHERE revision=" + quote_value(revision) +
|
||||
" AND file IN " + sql_iso(sql_list(map(quote_value, files)))
|
||||
" AND file IN " + quote_list(files)
|
||||
)
|
||||
found = {file: json2value(tuids) for file, tuids in response.data}
|
||||
|
||||
|
@ -124,7 +124,7 @@ class TuidClient(object):
|
|||
|
||||
with self.db.transaction() as transaction:
|
||||
command = "INSERT INTO tuid (revision, file, tuids) VALUES " + sql_list(
|
||||
sql_iso(sql_list(map(quote_value, (revision, r.path, value2json(r.tuids)))))
|
||||
quote_list((revision, r.path, value2json(r.tuids)))
|
||||
for r in new_response.data
|
||||
if r.tuids != None
|
||||
)
|
||||
|
|
|
@ -0,0 +1,764 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import time
|
||||
|
||||
# Use import as follows to prevent
|
||||
# circular dependency conflict for
|
||||
# TUIDService, which makes use of the
|
||||
# Clogger
|
||||
import tuid.service
|
||||
from jx_python import jx
|
||||
from mo_dots import Null, coalesce
|
||||
from mo_hg.hg_mozilla_org import HgMozillaOrg
|
||||
from mo_logs import Log
|
||||
from mo_threads import Till, Thread, Lock, Queue, Signal
|
||||
from mo_times.durations import DAY
|
||||
from pyLibrary.env import http
|
||||
from pyLibrary.sql import sql_list, quote_set
|
||||
from tuid import sql
|
||||
|
||||
RETRY = {"times": 3, "sleep": 5}
|
||||
SQL_CSET_BATCH_SIZE = 500
|
||||
CSET_TIP_WAIT_TIME = 5 * 60 # seconds
|
||||
CSET_BACKFILL_WAIT_TIME = 1 * 60 # seconds
|
||||
CSET_MAINTENANCE_WAIT_TIME = 30 * 60 # seconds
|
||||
CSET_DELETION_WAIT_TIME = 1 * 60 # seconds
|
||||
TUID_EXISTENCE_WAIT_TIME = 1 * 60 # seconds
|
||||
TIME_TO_KEEP_ANNOTATIONS = 5 * DAY
|
||||
MAX_TIPFILL_CLOGS = 60 # changeset logs
|
||||
MAX_BACKFILL_CLOGS = 200 # changeset logs
|
||||
CHANGESETS_PER_CLOG = 20 # changesets
|
||||
BACKFILL_REVNUM_TIMEOUT = int(MAX_BACKFILL_CLOGS * 2.5) # Assume 2.5 seconds per clog
|
||||
MINIMUM_PERMANENT_CSETS = 1000 # changesets
|
||||
MAXIMUM_NONPERMANENT_CSETS = 20000 # changesets
|
||||
SIGNAL_MAINTENACE_CSETS = MAXIMUM_NONPERMANENT_CSETS + (0.1 * MAXIMUM_NONPERMANENT_CSETS)
|
||||
UPDATE_VERY_OLD_FRONTIERS = False
|
||||
|
||||
|
||||
class Clogger:
|
||||
def __init__(self, conn=None, tuid_service=None, kwargs=None):
|
||||
try:
|
||||
self.config = kwargs
|
||||
|
||||
self.conn = conn if conn else sql.Sql(self.config.database.name)
|
||||
self.hg_cache = HgMozillaOrg(kwargs=self.config.hg_cache, use_cache=True) if self.config.hg_cache else Null
|
||||
|
||||
self.tuid_service = tuid_service if tuid_service else tuid.service.TUIDService(
|
||||
database=None, hg=None, kwargs=self.config, conn=self.conn, clogger=self
|
||||
)
|
||||
self.rev_locker = Lock()
|
||||
self.working_locker = Lock()
|
||||
|
||||
self.init_db()
|
||||
self.next_revnum = coalesce(self.conn.get_one("SELECT max(revnum)+1 FROM csetLog")[0], 1)
|
||||
self.csets_todo_backwards = Queue(name="Clogger.csets_todo_backwards")
|
||||
self.deletions_todo = Queue(name="Clogger.deletions_todo")
|
||||
self.maintenance_signal = Signal(name="Clogger.maintenance_signal")
|
||||
self.config = self.config.tuid
|
||||
|
||||
self.disable_backfilling = False
|
||||
self.disable_tipfilling = False
|
||||
self.disable_deletion = False
|
||||
self.disable_maintenance = False
|
||||
|
||||
# Make sure we are filled before allowing queries
|
||||
numrevs = self.conn.get_one("SELECT count(revnum) FROM csetLog")[0]
|
||||
if numrevs < MINIMUM_PERMANENT_CSETS:
|
||||
Log.note("Filling in csets to hold {{minim}} csets.", minim=MINIMUM_PERMANENT_CSETS)
|
||||
oldest_rev = 'tip'
|
||||
with self.conn.transaction() as t:
|
||||
tmp = t.query("SELECT min(revnum), revision FROM csetLog").data[0][1]
|
||||
if tmp:
|
||||
oldest_rev = tmp
|
||||
self._fill_in_range(
|
||||
MINIMUM_PERMANENT_CSETS - numrevs,
|
||||
oldest_rev,
|
||||
timestamp=False
|
||||
)
|
||||
|
||||
Log.note(
|
||||
"Table is filled with atleast {{minim}} entries. Starting workers...",
|
||||
minim=MINIMUM_PERMANENT_CSETS
|
||||
)
|
||||
|
||||
Thread.run('clogger-tip', self.fill_forward_continuous)
|
||||
Thread.run('clogger-backfill', self.fill_backward_with_list)
|
||||
Thread.run('clogger-maintenance', self.csetLog_maintenance)
|
||||
Thread.run('clogger-deleter', self.csetLog_deleter)
|
||||
|
||||
Log.note("Started clogger workers.")
|
||||
except Exception as e:
|
||||
Log.warning("Cannot setup clogger: {{cause}}", cause=str(e))
|
||||
|
||||
|
||||
def init_db(self):
|
||||
with self.conn.transaction() as t:
|
||||
t.execute('''
|
||||
CREATE TABLE IF NOT EXISTS csetLog (
|
||||
revnum INTEGER PRIMARY KEY,
|
||||
revision CHAR(12) NOT NULL,
|
||||
timestamp INTEGER
|
||||
);''')
|
||||
|
||||
|
||||
def revnum(self):
|
||||
"""
|
||||
:return: max revnum that was added
|
||||
"""
|
||||
return coalesce(self.conn.get_one("SELECT max(revnum) as revnum FROM csetLog")[0], 0)
|
||||
|
||||
|
||||
def get_tip(self, transaction):
|
||||
return transaction.get_one(
|
||||
"SELECT max(revnum) as revnum, revision FROM csetLog"
|
||||
)
|
||||
|
||||
|
||||
def get_tail(self, transaction):
|
||||
return transaction.get_one(
|
||||
"SELECT min(revnum) as revnum, revision FROM csetLog"
|
||||
)
|
||||
|
||||
|
||||
def _get_clog(self, clog_url):
|
||||
try:
|
||||
Log.note("Searching through changelog {{url}}", url=clog_url)
|
||||
clog_obj = http.get_json(clog_url, retry=RETRY)
|
||||
return clog_obj
|
||||
except Exception as e:
|
||||
Log.error(
|
||||
"Unexpected error getting changset-log for {{url}}: {{error}}",
|
||||
url=clog_url,
|
||||
error=e
|
||||
)
|
||||
|
||||
|
||||
def _get_one_revision(self, transaction, cset_entry):
|
||||
# Returns a single revision if it exists
|
||||
_, rev, _ = cset_entry
|
||||
return transaction.get_one("SELECT revision FROM csetLog WHERE revision=?", (rev,))
|
||||
|
||||
|
||||
def _get_one_revnum(self, transaction, rev):
|
||||
# Returns a single revnum if it exists
|
||||
return transaction.get_one("SELECT revnum FROM csetLog WHERE revision=?", (rev,))
|
||||
|
||||
|
||||
def _get_revnum_range(self, transaction, revnum1, revnum2):
|
||||
# Returns a range of revision numbers (that is inclusive)
|
||||
high_num = max(revnum1, revnum2)
|
||||
low_num = min(revnum1, revnum2)
|
||||
|
||||
return transaction.query(
|
||||
"SELECT revnum, revision FROM csetLog WHERE "
|
||||
"revnum >= " + str(low_num) + " AND revnum <= " + str(high_num)
|
||||
).data
|
||||
|
||||
|
||||
def recompute_table_revnums(self):
|
||||
'''
|
||||
Recomputes the revnums for the csetLog table
|
||||
by creating a new table, and copying csetLog to
|
||||
it. The INTEGER PRIMARY KEY in the temp table auto increments
|
||||
as rows are added.
|
||||
|
||||
IMPORTANT: Only call this after acquiring the
|
||||
lock `self.working_locker`.
|
||||
:return:
|
||||
'''
|
||||
with self.conn.transaction() as t:
|
||||
t.execute('''
|
||||
CREATE TABLE temp (
|
||||
revnum INTEGER PRIMARY KEY,
|
||||
revision CHAR(12) NOT NULL,
|
||||
timestamp INTEGER
|
||||
);''')
|
||||
|
||||
t.execute(
|
||||
"INSERT INTO temp (revision, timestamp) "
|
||||
"SELECT revision, timestamp FROM csetlog ORDER BY revnum ASC"
|
||||
)
|
||||
|
||||
t.execute("DROP TABLE csetLog;")
|
||||
t.execute("ALTER TABLE temp RENAME TO csetLog;")
|
||||
|
||||
|
||||
def check_for_maintenance(self):
|
||||
'''
|
||||
Returns True if the maintenance worker should be run now,
|
||||
and False otherwise.
|
||||
:return:
|
||||
'''
|
||||
numrevs = self.conn.get_one("SELECT count(revnum) FROM csetLog")[0]
|
||||
if numrevs >= SIGNAL_MAINTENACE_CSETS:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def add_cset_entries(self, ordered_rev_list, timestamp=False, number_forward=True):
|
||||
'''
|
||||
Adds a list of revisions to the table. Assumes ordered_rev_list is an ordered
|
||||
based on how changesets are found in the changelog. Going forwards or backwards is dealt
|
||||
with by flipping the list
|
||||
:param ordered_cset_list: Order given from changeset log searching.
|
||||
:param timestamp: If false, records are kept indefinitely
|
||||
but if holes exist: (delete, None, delete, None)
|
||||
those delete's with None's around them
|
||||
will not be deleted.
|
||||
:param numbered: If True, this function will number the revision list
|
||||
by going forward from max(revNum), else it'll go backwards
|
||||
from revNum, then add X to all revnums and self.next_revnum
|
||||
where X is the length of ordered_rev_list
|
||||
:return:
|
||||
'''
|
||||
with self.conn.transaction() as t:
|
||||
current_min = t.get_one("SELECT min(revnum) FROM csetlog")[0]
|
||||
current_max = t.get_one("SELECT max(revnum) FROM csetlog")[0]
|
||||
if not current_min or not current_max:
|
||||
current_min = 0
|
||||
current_max = 0
|
||||
|
||||
direction = -1
|
||||
start = current_min - 1
|
||||
if number_forward:
|
||||
direction = 1
|
||||
start = current_max + 1
|
||||
ordered_rev_list = ordered_rev_list[::-1]
|
||||
|
||||
insert_list = [
|
||||
(
|
||||
start + direction * count,
|
||||
rev,
|
||||
int(time.time()) if timestamp else -1
|
||||
)
|
||||
for count, rev in enumerate(ordered_rev_list)
|
||||
]
|
||||
|
||||
# In case of overlapping requests
|
||||
fmt_insert_list = []
|
||||
for cset_entry in insert_list:
|
||||
tmp = self._get_one_revision(t, cset_entry)
|
||||
if not tmp:
|
||||
fmt_insert_list.append(cset_entry)
|
||||
|
||||
for _, tmp_insert_list in jx.groupby(fmt_insert_list, size=SQL_CSET_BATCH_SIZE):
|
||||
t.execute(
|
||||
"INSERT INTO csetLog (revnum, revision, timestamp)" +
|
||||
" VALUES " +
|
||||
sql_list(
|
||||
quote_set((revnum, revision, timestamp))
|
||||
for revnum, revision, timestamp in tmp_insert_list
|
||||
)
|
||||
)
|
||||
|
||||
# Move the revision numbers forward if needed
|
||||
self.recompute_table_revnums()
|
||||
|
||||
# Start a maintenance run if needed
|
||||
if self.check_for_maintenance():
|
||||
self.maintenance_signal.go()
|
||||
|
||||
|
||||
def _fill_in_range(self, parent_cset, child_cset, timestamp=False, number_forward=True):
|
||||
'''
|
||||
Fills cset logs in a certain range. 'parent_cset' can be an int and in that case,
|
||||
we get that many changesets instead. If parent_cset is an int, then we consider
|
||||
that we are going backwards (number_forward is False) and we ignore the first
|
||||
changeset of the first log, and we ignore the setting for number_forward.
|
||||
Otherwise, we continue until we find the given 'parent_cset'.
|
||||
:param parent_cset:
|
||||
:param child_cset:
|
||||
:param timestamp:
|
||||
:param number_forward:
|
||||
:return:
|
||||
'''
|
||||
csets_to_add = []
|
||||
found_parent = False
|
||||
find_parent = False
|
||||
if type(parent_cset) != int:
|
||||
find_parent = True
|
||||
elif parent_cset >= MAX_BACKFILL_CLOGS * CHANGESETS_PER_CLOG:
|
||||
Log.warning(
|
||||
"Requested number of new changesets {{num}} is too high. "
|
||||
"Max number that can be requested is {{maxnum}}.",
|
||||
num=parent_cset,
|
||||
maxnum=MAX_BACKFILL_CLOGS * CHANGESETS_PER_CLOG
|
||||
)
|
||||
return None
|
||||
|
||||
csets_found = 0
|
||||
clogs_seen = 0
|
||||
final_rev = child_cset
|
||||
while not found_parent and clogs_seen < MAX_BACKFILL_CLOGS:
|
||||
clog_url = self.tuid_service.hg_url / self.config.hg.branch / 'json-log' / final_rev
|
||||
clog_obj = self._get_clog(clog_url)
|
||||
clog_csets_list = list(clog_obj['changesets'])
|
||||
for clog_cset in clog_csets_list[:-1]:
|
||||
if not number_forward and csets_found <= 0:
|
||||
# Skip this entry it already exists
|
||||
csets_found += 1
|
||||
continue
|
||||
|
||||
nodes_cset = clog_cset['node'][:12]
|
||||
if find_parent:
|
||||
if nodes_cset == parent_cset:
|
||||
found_parent = True
|
||||
if not number_forward:
|
||||
# When going forward this entry is
|
||||
# the given parent
|
||||
csets_to_add.append(nodes_cset)
|
||||
break
|
||||
else:
|
||||
if csets_found + 1 > parent_cset:
|
||||
found_parent = True
|
||||
if not number_forward:
|
||||
# When going forward this entry is
|
||||
# the given parent (which is supposed
|
||||
# to already exist)
|
||||
csets_to_add.append(nodes_cset)
|
||||
break
|
||||
csets_found += 1
|
||||
csets_to_add.append(nodes_cset)
|
||||
if found_parent == True:
|
||||
break
|
||||
|
||||
clogs_seen += 1
|
||||
final_rev = clog_csets_list[-1]['node'][:12]
|
||||
|
||||
if found_parent:
|
||||
self.add_cset_entries(csets_to_add, timestamp=timestamp, number_forward=number_forward)
|
||||
else:
|
||||
Log.warning(
|
||||
"Couldn't find the end of the request for {{request}}. "
|
||||
"Max number that can be requested through _fill_in_range is {{maxnum}}.",
|
||||
request={
|
||||
'parent_cset': parent_cset,
|
||||
'child_cset':child_cset,
|
||||
'number_forward': number_forward
|
||||
},
|
||||
maxnum=MAX_BACKFILL_CLOGS * CHANGESETS_PER_CLOG
|
||||
)
|
||||
return None
|
||||
return csets_to_add
|
||||
|
||||
|
||||
def fill_backward_with_list(self, please_stop=None):
|
||||
'''
|
||||
Expects requests of the tuple form: (parent_cset, timestamp)
|
||||
parent_cset can be an int X to go back by X changesets, or
|
||||
a string to search for going backwards in time. If timestamp
|
||||
is false, no timestamps will be added to the entries.
|
||||
:param please_stop:
|
||||
:return:
|
||||
'''
|
||||
while not please_stop:
|
||||
try:
|
||||
request = self.csets_todo_backwards.pop(till=please_stop)
|
||||
if please_stop:
|
||||
break
|
||||
|
||||
# If backfilling is disabled, all requests
|
||||
# are ignored.
|
||||
if self.disable_backfilling:
|
||||
Till(till=CSET_BACKFILL_WAIT_TIME).wait()
|
||||
continue
|
||||
|
||||
if request:
|
||||
parent_cset, timestamp = request
|
||||
else:
|
||||
continue
|
||||
|
||||
with self.working_locker:
|
||||
with self.conn.transaction() as t:
|
||||
parent_revnum = self._get_one_revnum(t, parent_cset)
|
||||
if parent_revnum:
|
||||
continue
|
||||
|
||||
with self.conn.transaction() as t:
|
||||
_, oldest_revision = self.get_tail(t)
|
||||
|
||||
self._fill_in_range(
|
||||
parent_cset,
|
||||
oldest_revision,
|
||||
timestamp=timestamp,
|
||||
number_forward=False
|
||||
)
|
||||
Log.note("Finished {{cset}}", cset=parent_cset)
|
||||
except Exception as e:
|
||||
Log.warning("Unknown error occurred during backfill: ", cause=e)
|
||||
|
||||
|
||||
def update_tip(self):
|
||||
'''
|
||||
Returns False if the tip is already at the newest, or True
|
||||
if an update has taken place.
|
||||
:return:
|
||||
'''
|
||||
clog_obj = self._get_clog(self.tuid_service.hg_url / self.config.hg.branch / 'json-log' / 'tip')
|
||||
|
||||
# Get current tip in DB
|
||||
with self.conn.transaction() as t:
|
||||
_, newest_known_rev = self.get_tip(t)
|
||||
|
||||
# If we are still at the newest, wait for CSET_TIP_WAIT_TIME seconds
|
||||
# before checking again.
|
||||
first_clog_entry = clog_obj['changesets'][0]['node'][:12]
|
||||
if newest_known_rev == first_clog_entry:
|
||||
return False
|
||||
|
||||
csets_to_gather = None
|
||||
if not newest_known_rev:
|
||||
Log.note(
|
||||
"No revisions found in table, adding {{minim}} entries...",
|
||||
minim=MINIMUM_PERMANENT_CSETS
|
||||
)
|
||||
csets_to_gather = MINIMUM_PERMANENT_CSETS
|
||||
|
||||
found_newest_known = False
|
||||
csets_to_add = []
|
||||
csets_found = 0
|
||||
clogs_seen = 0
|
||||
Log.note("Found new revisions. Updating csetLog tip to {{rev}}...", rev=first_clog_entry)
|
||||
while not found_newest_known and clogs_seen < MAX_TIPFILL_CLOGS:
|
||||
clog_csets_list = list(clog_obj['changesets'])
|
||||
for clog_cset in clog_csets_list[:-1]:
|
||||
nodes_cset = clog_cset['node'][:12]
|
||||
if not csets_to_gather:
|
||||
if nodes_cset == newest_known_rev:
|
||||
found_newest_known = True
|
||||
break
|
||||
else:
|
||||
if csets_found >= csets_to_gather:
|
||||
found_newest_known = True
|
||||
break
|
||||
csets_found += 1
|
||||
csets_to_add.append(nodes_cset)
|
||||
if not found_newest_known:
|
||||
# Get the next page
|
||||
clogs_seen += 1
|
||||
final_rev = clog_csets_list[-1]['node'][:12]
|
||||
clog_url = self.tuid_service.hg_url / self.config.hg.branch / 'json-log' / final_rev
|
||||
clog_obj = self._get_clog(clog_url)
|
||||
|
||||
if clogs_seen >= MAX_TIPFILL_CLOGS:
|
||||
Log.error(
|
||||
"Too many changesets, can't find last tip or the number is too high: {{rev}}. "
|
||||
"Maximum possible to request is {{maxnum}}",
|
||||
rev=coalesce(newest_known_rev, csets_to_gather),
|
||||
maxnum=MAX_TIPFILL_CLOGS * CHANGESETS_PER_CLOG
|
||||
)
|
||||
return False
|
||||
|
||||
with self.working_locker:
|
||||
Log.note("Adding {{csets}}", csets=csets_to_add)
|
||||
self.add_cset_entries(csets_to_add, timestamp=False)
|
||||
return True
|
||||
|
||||
|
||||
def fill_forward_continuous(self, please_stop=None):
|
||||
while not please_stop:
|
||||
try:
|
||||
waiting_a_bit = False
|
||||
if self.disable_tipfilling:
|
||||
waiting_a_bit = True
|
||||
|
||||
if not waiting_a_bit:
|
||||
# If an update was done, check if there are
|
||||
# more changesets that have arrived just in case,
|
||||
# otherwise, we wait.
|
||||
did_an_update = self.update_tip()
|
||||
if not did_an_update:
|
||||
waiting_a_bit = True
|
||||
|
||||
if waiting_a_bit:
|
||||
(please_stop | Till(seconds=CSET_TIP_WAIT_TIME)).wait()
|
||||
continue
|
||||
except Exception as e:
|
||||
Log.warning("Unknown error occurred during tip maintenance:", cause=e)
|
||||
|
||||
|
||||
def csetLog_maintenance(self, please_stop=None):
|
||||
'''
|
||||
Handles deleting old csetLog entries and timestamping
|
||||
revisions once they pass the length for permanent
|
||||
storage for deletion later.
|
||||
:param please_stop:
|
||||
:return:
|
||||
'''
|
||||
while not please_stop:
|
||||
try:
|
||||
# Wait until something signals the maintenance cycle
|
||||
# to begin (or end).
|
||||
(self.maintenance_signal | please_stop).wait()
|
||||
|
||||
if please_stop:
|
||||
break
|
||||
if self.disable_maintenance:
|
||||
continue
|
||||
|
||||
# Reset signal so we don't request
|
||||
# maintenance infinitely.
|
||||
with self.maintenance_signal.lock:
|
||||
self.maintenance_signal._go = False
|
||||
|
||||
with self.working_locker:
|
||||
all_data = None
|
||||
with self.conn.transaction() as t:
|
||||
all_data = sorted(
|
||||
t.get("SELECT revnum, revision, timestamp FROM csetLog"),
|
||||
key=lambda x: int(x[0])
|
||||
)
|
||||
|
||||
# Restore maximum permanents (if overflowing)
|
||||
new_data = []
|
||||
modified = False
|
||||
for count, (revnum, revision, timestamp) in enumerate(all_data[::-1]):
|
||||
if count < MINIMUM_PERMANENT_CSETS:
|
||||
if timestamp != -1:
|
||||
modified = True
|
||||
new_data.append((revnum, revision, -1))
|
||||
else:
|
||||
new_data.append((revnum, revision, timestamp))
|
||||
elif type(timestamp) != int or timestamp == -1:
|
||||
modified = True
|
||||
new_data.append((revnum, revision, int(time.time())))
|
||||
else:
|
||||
new_data.append((revnum, revision, timestamp))
|
||||
|
||||
# Delete annotations at revisions with timestamps
|
||||
# that are too old. The csetLog entries will have
|
||||
# their timestamps reset here.
|
||||
new_data1 = []
|
||||
annrevs_to_del = []
|
||||
current_time = time.time()
|
||||
for count, (revnum, revision, timestamp) in enumerate(new_data[::-1]):
|
||||
new_timestamp = timestamp
|
||||
if timestamp != -1:
|
||||
if current_time >= timestamp + TIME_TO_KEEP_ANNOTATIONS.seconds:
|
||||
modified = True
|
||||
new_timestamp = current_time
|
||||
annrevs_to_del.append(revision)
|
||||
new_data1.append((revnum, revision, new_timestamp))
|
||||
|
||||
if len(annrevs_to_del) > 0:
|
||||
# Delete any latestFileMod and annotation entries
|
||||
# that are too old.
|
||||
Log.note(
|
||||
"Deleting annotations and latestFileMod for revisions for being "
|
||||
"older than {{oldest}}: {{revisions}}",
|
||||
oldest=TIME_TO_KEEP_ANNOTATIONS,
|
||||
revisions=annrevs_to_del
|
||||
)
|
||||
with self.conn.transaction() as t:
|
||||
t.execute(
|
||||
"DELETE FROM latestFileMod WHERE revision IN " +
|
||||
quote_set(annrevs_to_del)
|
||||
)
|
||||
t.execute(
|
||||
"DELETE FROM annotations WHERE revision IN " +
|
||||
quote_set(annrevs_to_del)
|
||||
)
|
||||
|
||||
# Delete any overflowing entries
|
||||
new_data2 = new_data1
|
||||
reved_all_data = all_data[::-1]
|
||||
deleted_data = reved_all_data[MAXIMUM_NONPERMANENT_CSETS:]
|
||||
delete_overflowing_revstart = None
|
||||
if len(deleted_data) > 0:
|
||||
_, delete_overflowing_revstart, _ = deleted_data[0]
|
||||
new_data2 = set(all_data) - set(deleted_data)
|
||||
|
||||
# Update old frontiers if requested, otherwise
|
||||
# they will all get deleted by the csetLog_deleter
|
||||
# worker
|
||||
if UPDATE_VERY_OLD_FRONTIERS:
|
||||
_, max_revision, _ = all_data[-1]
|
||||
for _, revision, _ in deleted_data:
|
||||
with self.conn.transaction() as t:
|
||||
old_files = t.get(
|
||||
"SELECT file FROM latestFileMod WHERE revision=?",
|
||||
(revision,)
|
||||
)
|
||||
if old_files is None or len(old_files) <= 0:
|
||||
continue
|
||||
|
||||
self.tuid_service.get_tuids_from_files(
|
||||
old_files,
|
||||
max_revision,
|
||||
going_forward=True,
|
||||
)
|
||||
|
||||
still_exist = True
|
||||
while still_exist and not please_stop:
|
||||
Till(seconds=TUID_EXISTENCE_WAIT_TIME).wait()
|
||||
with self.conn.transaction() as t:
|
||||
old_files = t.get(
|
||||
"SELECT file FROM latestFileMod WHERE revision=?",
|
||||
(revision,)
|
||||
)
|
||||
if old_files is None or len(old_files) <= 0:
|
||||
still_exist = False
|
||||
|
||||
# Update table and schedule a deletion
|
||||
if modified:
|
||||
with self.conn.transaction() as t:
|
||||
t.execute(
|
||||
"INSERT OR REPLACE INTO csetLog (revnum, revision, timestamp) VALUES " +
|
||||
sql_list(
|
||||
quote_set(cset_entry)
|
||||
for cset_entry in new_data2
|
||||
)
|
||||
)
|
||||
if not deleted_data:
|
||||
continue
|
||||
|
||||
Log.note("Scheduling {{num_csets}} for deletion", num_csets=len(deleted_data))
|
||||
self.deletions_todo.add(delete_overflowing_revstart)
|
||||
except Exception as e:
|
||||
Log.warning("Unexpected error occured while maintaining csetLog, continuing to try: ", cause=e)
|
||||
return
|
||||
|
||||
|
||||
def csetLog_deleter(self, please_stop=None):
|
||||
'''
|
||||
Deletes changesets from the csetLog table
|
||||
and also changesets from the annotation table
|
||||
that have revisions matching the given changesets.
|
||||
Accepts lists of csets from self.deletions_todo.
|
||||
:param please_stop:
|
||||
:return:
|
||||
'''
|
||||
while not please_stop:
|
||||
try:
|
||||
request = self.deletions_todo.pop(till=please_stop)
|
||||
if please_stop:
|
||||
break
|
||||
|
||||
# If deletion is disabled, ignore the current
|
||||
# request - it will need to be re-requested.
|
||||
if self.disable_deletion:
|
||||
Till(till=CSET_DELETION_WAIT_TIME).wait()
|
||||
continue
|
||||
|
||||
with self.working_locker:
|
||||
first_cset = request
|
||||
|
||||
# Since we are deleting and moving stuff around in the
|
||||
# TUID tables, we need everything to be contained in
|
||||
# one transaction with no interruptions.
|
||||
with self.conn.transaction() as t:
|
||||
revnum = self._get_one_revnum(t, first_cset)[0]
|
||||
csets_to_del = t.get(
|
||||
"SELECT revnum, revision FROM csetLog WHERE revnum <= ?", (revnum,)
|
||||
)
|
||||
csets_to_del = [cset for _, cset in csets_to_del]
|
||||
existing_frontiers = t.query(
|
||||
"SELECT revision FROM latestFileMod WHERE revision IN " +
|
||||
quote_set(csets_to_del)
|
||||
).data
|
||||
|
||||
existing_frontiers = [existing_frontiers[i][0] for i, _ in enumerate(existing_frontiers)]
|
||||
Log.note(
|
||||
"Deleting all annotations and changeset log entries with revisions in the list: {{csets}}",
|
||||
csets=csets_to_del
|
||||
)
|
||||
|
||||
if len(existing_frontiers) > 0:
|
||||
# This handles files which no longer exist anymore in
|
||||
# the main branch.
|
||||
Log.note(
|
||||
"Deleting existing frontiers for revisions: {{revisions}}",
|
||||
revisions=existing_frontiers
|
||||
)
|
||||
t.execute(
|
||||
"DELETE FROM latestFileMod WHERE revision IN " +
|
||||
quote_set(existing_frontiers)
|
||||
)
|
||||
|
||||
Log.note("Deleting annotations...")
|
||||
t.execute(
|
||||
"DELETE FROM annotations WHERE revision IN " +
|
||||
quote_set(csets_to_del)
|
||||
)
|
||||
|
||||
Log.note(
|
||||
"Deleting {{num_entries}} csetLog entries...",
|
||||
num_entries=len(csets_to_del)
|
||||
)
|
||||
t.execute(
|
||||
"DELETE FROM csetLog WHERE revision IN " +
|
||||
quote_set(csets_to_del)
|
||||
)
|
||||
|
||||
# Recalculate the revnums
|
||||
self.recompute_table_revnums()
|
||||
except Exception as e:
|
||||
Log.warning("Unexpected error occured while deleting from csetLog:", cause=e)
|
||||
Till(seconds=CSET_DELETION_WAIT_TIME).wait()
|
||||
return
|
||||
|
||||
|
||||
def get_old_cset_revnum(self, revision):
|
||||
self.csets_todo_backwards.add((revision, True))
|
||||
|
||||
revnum = None
|
||||
timeout = Till(seconds=BACKFILL_REVNUM_TIMEOUT)
|
||||
while not timeout:
|
||||
with self.conn.transaction() as t:
|
||||
revnum = self._get_one_revnum(t, revision)
|
||||
|
||||
if revnum and revnum[0] >= 0:
|
||||
break
|
||||
elif revnum[0] < 0:
|
||||
Log.note("Waiting for table to recompute...")
|
||||
else:
|
||||
Log.note("Waiting for backfill to complete...")
|
||||
Till(seconds=CSET_BACKFILL_WAIT_TIME).wait()
|
||||
|
||||
if timeout:
|
||||
Log.error(
|
||||
"Cannot find revision {{rev}} after waiting {{timeout}} seconds",
|
||||
rev=revision,
|
||||
timeout=BACKFILL_REVNUM_TIMEOUT
|
||||
)
|
||||
return revnum
|
||||
|
||||
|
||||
def get_revnnums_from_range(self, revision1, revision2):
|
||||
with self.conn.transaction() as t:
|
||||
revnum1 = self._get_one_revnum(t, revision1)
|
||||
revnum2 = self._get_one_revnum(t, revision2)
|
||||
if not revnum1 or not revnum2:
|
||||
did_an_update = self.update_tip()
|
||||
if did_an_update:
|
||||
with self.conn.transaction() as t:
|
||||
revnum1 = self._get_one_revnum(t, revision1)
|
||||
revnum2 = self._get_one_revnum(t, revision2)
|
||||
|
||||
if not revnum1:
|
||||
revnum1 = self.get_old_cset_revnum(revision1)
|
||||
# Refresh the second entry
|
||||
with self.conn.transaction() as t:
|
||||
revnum2 = self._get_one_revnum(t, revision2)
|
||||
|
||||
if not revnum2:
|
||||
revnum2 = self.get_old_cset_revnum(revision2)
|
||||
|
||||
# The first revnum might change also
|
||||
with self.conn.transaction() as t:
|
||||
revnum1 = self._get_one_revnum(t, revision1)
|
||||
|
||||
with self.conn.transaction() as t:
|
||||
result = self._get_revnum_range(t, revnum1[0], revnum2[0])
|
||||
return sorted(
|
||||
result,
|
||||
key=lambda x: int(x[0])
|
||||
)
|
|
@ -0,0 +1,50 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
from mo_logs import Log
|
||||
from mo_threads import Till, Lock, Thread
|
||||
from mo_times.durations import MINUTE
|
||||
|
||||
DAEMON_WAIT_FOR_PC = 5 * MINUTE # Time until a percent complete log message is emitted.
|
||||
|
||||
class PercentCompleteLogger:
|
||||
|
||||
def __init__(self):
|
||||
self.total_locker = Lock()
|
||||
self.total_files_requested = 0
|
||||
self.total_tuids_mapped = 0
|
||||
Thread.run("pc-daemon", self.run_daemon)
|
||||
|
||||
|
||||
def update_totals(self, num_files_req, num_tuids_mapped):
|
||||
with self.total_locker:
|
||||
self.total_files_requested += num_files_req
|
||||
self.total_tuids_mapped += num_tuids_mapped
|
||||
|
||||
|
||||
def reset_totals(self):
|
||||
with self.total_locker:
|
||||
self.total_files_requested = 0
|
||||
self.total_tuids_mapped = 0
|
||||
|
||||
|
||||
def run_daemon(self, please_stop=None):
|
||||
while not please_stop:
|
||||
try:
|
||||
with self.total_locker:
|
||||
requested = self.total_files_requested
|
||||
if requested != 0:
|
||||
mapped = self.total_tuids_mapped
|
||||
Log.note(
|
||||
"Percent complete {{mapped}}/{{requested}} = {{percent|percent(0)}}",
|
||||
requested=requested,
|
||||
mapped=mapped,
|
||||
percent=mapped/requested
|
||||
)
|
||||
(Till(seconds=DAEMON_WAIT_FOR_PC.seconds) | please_stop).wait()
|
||||
except Exception as e:
|
||||
Log.warning("Unexpected error in pc-daemon: {{cause}}", cause=e)
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше
Загрузка…
Ссылка в новой задаче