This commit is contained in:
Kyle Lahnakoski 2018-09-20 14:21:52 -04:00
Родитель 4040b0254c
Коммит 0ddc8ea11a
101 изменённых файлов: 3944 добавлений и 2386 удалений

48
vendor/jx_base/__init__.py поставляемый
Просмотреть файл

@ -11,50 +11,14 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from collections import Mapping
from uuid import uuid4
from mo_dots import NullType, Data, FlatList, wrap, coalesce, listwrap
from mo_future import text_type, none_type, PY2
from mo_dots import wrap, coalesce, listwrap
from mo_future import text_type
from mo_json import value2json
from mo_logs import Log
from mo_logs.strings import expand_template, quote
from mo_times import Date
IS_NULL = '0'
BOOLEAN = 'boolean'
INTEGER = 'integer'
NUMBER = 'number'
STRING = 'string'
OBJECT = 'object'
NESTED = "nested"
EXISTS = "exists"
JSON_TYPES = [BOOLEAN, INTEGER, NUMBER, STRING, OBJECT]
PRIMITIVE = [EXISTS, BOOLEAN, INTEGER, NUMBER, STRING]
STRUCT = [EXISTS, OBJECT, NESTED]
python_type_to_json_type = {
int: NUMBER,
text_type: STRING,
float: NUMBER,
None: OBJECT,
bool: BOOLEAN,
NullType: OBJECT,
none_type: OBJECT,
Data: OBJECT,
dict: OBJECT,
object: OBJECT,
Mapping: OBJECT,
list: NESTED,
FlatList: NESTED,
Date: NUMBER
}
if PY2:
python_type_to_json_type[str]=STRING
python_type_to_json_type[long]=NUMBER
def generateGuid():
"""Gets a random GUID.
@ -65,9 +29,8 @@ def generateGuid():
a=GenerateGuid()
import uuid
print a
print uuid.UUID(a).hex
print(a)
print(uuid.UUID(a).hex)
"""
return text_type(uuid4())
@ -234,11 +197,10 @@ class TableDesc(DataClass(
)):
@property
def columns(self):
Log.error("not implemented")
raise NotImplementedError()
# return singlton.get_columns(table_name=self.name)
Column = DataClass(
"Column",
[

19
vendor/jx_base/container.py поставляемый
Просмотреть файл

@ -52,7 +52,6 @@ class Container(object):
METADATA FOR A Container IS CALL A Namespace
"""
__slots__ = ["data", "namespaces"]
@classmethod
def new_instance(type, frum, schema=None):
@ -100,40 +99,40 @@ class Container(object):
def query(self, query):
if query.frum != self:
Log.error("not expected")
Log.error("Not implemented")
raise NotImplementedError()
def filter(self, where):
return self.where(where)
def where(self, where):
_ = where
Log.error("not implemented")
raise NotImplementedError()
def sort(self, sort):
_ = sort
Log.error("not implemented")
raise NotImplementedError()
def select(self, select):
_ = select
Log.error("not implemented")
raise NotImplementedError()
def window(self, window):
Log.error("not implemented")
raise NotImplementedError()
def having(self, having):
_ = having
Log.error("not implemented")
raise NotImplementedError()
def format(self, format):
_ = format
Log.error("not implemented")
raise NotImplementedError()
def get_columns(self, table_name):
"""
USE THE frum TO DETERMINE THE COLUMNS
"""
Log.error("Not implemented")
raise NotImplementedError()
@property
def schema(self):
Log.error("Not implemented")
raise NotImplementedError()

5
vendor/jx_base/dimensions.py поставляемый
Просмотреть файл

@ -14,6 +14,7 @@ from __future__ import unicode_literals
from collections import Mapping
import mo_dots as dot
from jx_base.domains import Domain, ALGEBRAIC, KNOWN
from mo_dots import Null, coalesce, join_field, split_field, Data
from mo_dots import wrap, listwrap
from mo_dots.lists import FlatList
@ -21,8 +22,6 @@ from mo_logs import Log
from mo_math import SUM
from mo_times.timer import Timer
from jx_base.domains import Domain, ALGEBRAIC, KNOWN
DEFAULT_QUERY_LIMIT = 20
@ -138,7 +137,7 @@ class Dimension(object):
def edges2value(*values):
if isinstance(fields, Mapping):
output = Data()
for e, v in zip(edges, values):
for e, v in transpose(edges, values):
output[e.name] = v
return output
else:

2
vendor/jx_base/domains.py поставляемый
Просмотреть файл

@ -93,7 +93,7 @@ class Domain(object):
return output
def getDomain(self):
Log.error("Not implemented")
raise NotImplementedError()
def verify_attributes_not_null(self, attribute_names):
for name in attribute_names:

13
vendor/jx_base/expressions.py поставляемый
Просмотреть файл

@ -16,11 +16,11 @@ from collections import Mapping
from decimal import Decimal
import mo_json
from jx_base import OBJECT, python_type_to_json_type, BOOLEAN, NUMBER, INTEGER, STRING, IS_NULL
from jx_base.queries import is_variable_name, get_property_name
from mo_dots import coalesce, wrap, Null, split_field
from mo_future import text_type, utf8_json_encoder, get_function_name, zip_longest
from mo_json import scrub
from mo_json.typed_encoder import IS_NULL, OBJECT, BOOLEAN, python_type_to_json_type, NUMBER, INTEGER, STRING
from mo_logs import Log, Except
from mo_math import Math, MAX, MIN, UNION
from mo_times.dates import Date, unicode2Date
@ -80,7 +80,7 @@ def _jx_expression(expr):
elif isinstance(expr, text_type):
return Variable(expr)
elif isinstance(expr, (list, tuple)):
return TupleOp("tuple", map(jx_expression, expr)) # FORMALIZE
return TupleOp("tuple", list(map(jx_expression, expr))) # FORMALIZE
expr = wrap(expr)
try:
@ -874,6 +874,11 @@ class InequalityOp(Expression):
else:
return {self.op: [self.lhs.__data__(), self.rhs.__data__()]}
def __eq__(self, other):
if not isinstance(other, InequalityOp):
return False
return self.op == other.op and self.lhs == other.lhs and self.rhs == other.rhs
def vars(self):
return self.lhs.vars() | self.rhs.vars()
@ -2558,7 +2563,9 @@ class InOp(Expression):
def partial_eval(self):
value = self.value.partial_eval()
superset = self.superset.partial_eval()
if isinstance(value, Literal) and isinstance(superset, Literal):
if superset is NULL:
return FALSE
elif isinstance(value, Literal) and isinstance(superset, Literal):
return Literal(None, self())
else:
return self

1
vendor/jx_base/namespace.py поставляемый
Просмотреть файл

@ -28,7 +28,6 @@ class Namespace(object):
def get_schema(self, name):
raise NotImplementedError()
def convert(self, expr):
raise NotImplementedError()

3
vendor/jx_base/query.py поставляемый
Просмотреть файл

@ -15,7 +15,6 @@ from collections import Mapping
from copy import copy
import jx_base
from jx_base import STRUCT
from jx_base.dimensions import Dimension
from jx_base.domains import Domain, SetDomain, DefaultDomain
from jx_base.expressions import jx_expression, Expression, Variable, LeavesOp, ScriptOp, OffsetOp, TRUE, FALSE
@ -25,7 +24,7 @@ from mo_dots import coalesce, Null, set_default, unwraplist, literal_field
from mo_dots import wrap, unwrap, listwrap
from mo_dots.lists import FlatList
from mo_future import text_type
from mo_json.typed_encoder import untype_path
from mo_json.typed_encoder import untype_path, STRUCT
from mo_logs import Log
from mo_math import AND, UNION, Math

3
vendor/jx_base/schema.py поставляемый
Просмотреть файл

@ -13,9 +13,8 @@ from __future__ import unicode_literals
from copy import copy
from jx_base import STRUCT, NESTED, OBJECT, EXISTS
from mo_dots import Null, startswith_field, set_default, wrap
from mo_json.typed_encoder import unnest_path, untype_path
from mo_json.typed_encoder import unnest_path, untype_path, STRUCT, EXISTS, OBJECT, NESTED
from mo_logs import Log

8
vendor/jx_elasticsearch/__init__.py поставляемый
Просмотреть файл

@ -11,15 +11,12 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from mo_future import text_type
from jx_base.container import type2container
from mo_files.url import URL
from mo_kwargs import override
from mo_logs import Log
from mo_logs.url import URL
from pyLibrary.env import http
DEBUG = False
known_hosts = {}
@ -30,7 +27,6 @@ def new_instance(
host,
index,
type=None,
alias=None,
name=None,
port=9200,
read_only=True,
@ -46,7 +42,7 @@ def new_instance(
url = URL(host)
url.port = port
status = http.get_json(text_type(url), stream=False)
status = http.get_json(url, stream=False)
version = status.version.number
if version.startswith("1."):
from jx_elasticsearch.es14 import ES14

4
vendor/jx_elasticsearch/es09/expressions.py поставляемый
Просмотреть файл

@ -172,9 +172,9 @@ class _MVEL(object):
if len(split_field(self.fromData.name)) == 1 and fields:
if isinstance(fields, Mapping):
# CONVERT UNORDERED FIELD DEFS
jx_fields, es_fields = zip(*[(k, fields[k]) for k in sorted(fields.keys())])
jx_fields, es_fields = transpose(*[(k, fields[k]) for k in sorted(fields.keys())])
else:
jx_fields, es_fields = zip(*[(i, e) for i, e in enumerate(fields)])
jx_fields, es_fields = transpose(*[(i, e) for i, e in enumerate(fields)])
# NO LOOPS BECAUSE QUERY IS SHALLOW
# DOMAIN IS FROM A DIMENSION, USE IT'S FIELD DEFS TO PULL

4
vendor/jx_elasticsearch/es09/setop.py поставляемый
Просмотреть файл

@ -175,7 +175,7 @@ def es_setop(es, mvel, query):
if not data_list:
cube = Cube(select, [], {s.name: Matrix.wrap([]) for s in select})
else:
output = zip(*data_list)
output = transpose(*data_list)
cube = Cube(select, [], {s.name: Matrix(list=output[i]) for i, s in enumerate(select)})
return Data(
@ -221,7 +221,7 @@ def es_deepop(es, mvel, query):
data = es_post(es, FromES, query.limit)
rows = unpack_terms(data.facets.mvel, query.edges)
terms = zip(*rows)
terms = transpose(*rows)
# NUMBER ALL EDGES FOR JSON EXPRESSION INDEXING
edges = query.edges

3
vendor/jx_elasticsearch/es14/__init__.py поставляемый
Просмотреть файл

@ -52,7 +52,6 @@ class ES14(Container):
host,
index,
type=None,
alias=None,
name=None,
port=9200,
read_only=True,
@ -161,7 +160,7 @@ class ES14(Container):
except Exception as e:
e = Except.wrap(e)
if "Data too large, data for" in e:
http.post(self.es.cluster.path+"/_cache/clear")
http.post(self.es.cluster.url / "_cache/clear")
Log.error("Problem (Tried to clear Elasticsearch cache)", e)
Log.error("problem", e)

3
vendor/jx_elasticsearch/es14/aggs.py поставляемый
Просмотреть файл

@ -11,7 +11,6 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from jx_base import EXISTS
from jx_base.domains import SetDomain
from jx_base.expressions import TupleOp, NULL
from jx_base.query import DEFAULT_LIMIT, MAX_LIMIT
@ -24,6 +23,7 @@ from jx_python import jx
from jx_python.expressions import jx_expression_to_function
from mo_dots import listwrap, Data, wrap, literal_field, set_default, coalesce, Null, split_field, FlatList, unwrap, unwraplist
from mo_future import text_type
from mo_json.typed_encoder import EXISTS
from mo_json.typed_encoder import encode_property
from mo_logs import Log
from mo_math import Math, MAX, UNION
@ -175,6 +175,7 @@ def es_aggsop(es, frum, query):
es_query.aggs[key].percentiles.field = columns[0].es_column
es_query.aggs[key].percentiles.percents += [percent]
es_query.aggs[key].percentiles.compression = 2
s.pull = jx_expression_to_function(key + ".values." + literal_field(text_type(percent)))
elif s.aggregate == "cardinality":
canonical_names = []

12
vendor/jx_elasticsearch/es14/decoders.py поставляемый
Просмотреть файл

@ -13,7 +13,6 @@ from __future__ import unicode_literals
from collections import Mapping
from jx_base import STRING, NUMBER, BOOLEAN
from jx_base.dimensions import Dimension
from jx_base.domains import SimpleSetDomain, DefaultDomain, PARTITION
from jx_base.expressions import TupleOp, TRUE
@ -22,6 +21,7 @@ from jx_elasticsearch.es14.expressions import Variable, NotOp, InOp, Literal, An
from jx_python import jx
from mo_dots import wrap, set_default, coalesce, literal_field, Data, relative_field, unwraplist
from mo_future import text_type
from mo_json.typed_encoder import STRING, NUMBER, BOOLEAN
from mo_json.typed_encoder import untype_path
from mo_logs import Log
from mo_logs.strings import quote, expand_template
@ -123,13 +123,13 @@ class AggsDecoder(object):
pass
def get_value_from_row(self, row):
Log.error("Not implemented")
raise NotImplementedError()
def get_value(self, index):
Log.error("Not implemented")
raise NotImplementedError()
def get_index(self, row):
Log.error("Not implemented")
raise NotImplementedError()
@property
def num_columns(self):
@ -159,7 +159,7 @@ class SetDecoder(AggsDecoder):
domain = self.domain
domain_key = domain.key
include, text_include = zip(*(
include, text_include = transpose(*(
(
float(v) if isinstance(v, (int, float)) else v,
text_type(float(v)) if isinstance(v, (int, float)) else v
@ -497,7 +497,7 @@ class ObjectDecoder(AggsDecoder):
prefix = edge.value.var
flatter = lambda k: relative_field(k, prefix)
self.put, self.fields = zip(*[
self.put, self.fields = transpose(*[
(flatter(untype_path(c.names["."])), c.es_column)
for c in query.frum.schema.leaves(prefix)
])

2
vendor/jx_elasticsearch/es14/deep.py поставляемый
Просмотреть файл

@ -11,7 +11,6 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from jx_base import NESTED
from jx_base.expressions import NULL
from jx_base.query import DEFAULT_LIMIT
from jx_elasticsearch import post as es_post
@ -20,6 +19,7 @@ from jx_elasticsearch.es14.setop import format_dispatch, get_pull_function, get_
from jx_elasticsearch.es14.util import jx_sort_to_es_sort, es_query_template
from jx_python.expressions import compile_expression, jx_expression_to_function
from mo_dots import split_field, FlatList, listwrap, literal_field, coalesce, Data, concat_field, set_default, relative_field, startswith_field
from mo_json.typed_encoder import NESTED
from mo_json.typed_encoder import untype_path, EXISTS_TYPE
from mo_logs import Log
from mo_threads import Thread

4
vendor/jx_elasticsearch/es14/expressions.py поставляемый
Просмотреть файл

@ -13,15 +13,15 @@ from __future__ import unicode_literals
import itertools
from jx_base import NUMBER, STRING, BOOLEAN, OBJECT, INTEGER
from jx_base.expressions import Variable, TupleOp, LeavesOp, BinaryOp, OrOp, ScriptOp, \
WhenOp, InequalityOp, extend, Literal, NullOp, TrueOp, FalseOp, DivOp, FloorOp, \
EqOp, NeOp, NotOp, LengthOp, NumberOp, StringOp, CountOp, MultiOp, RegExpOp, CoalesceOp, MissingOp, ExistsOp, \
PrefixOp, NotLeftOp, InOp, CaseOp, AndOp, \
ConcatOp, IsNumberOp, Expression, BasicIndexOfOp, MaxOp, MinOp, BasicEqOp, BooleanOp, IntegerOp, BasicSubstringOp, ZERO, NULL, FirstOp, FALSE, TRUE, SuffixOp, simplified, ONE, LeftOp
ConcatOp, IsNumberOp, Expression, BasicIndexOfOp, MaxOp, MinOp, BasicEqOp, BooleanOp, IntegerOp, BasicSubstringOp, ZERO, NULL, FirstOp, FALSE, TRUE, SuffixOp, simplified, ONE
from jx_elasticsearch.es14.util import es_not, es_script, es_or, es_and, es_missing
from mo_dots import coalesce, wrap, Null, set_default, literal_field
from mo_future import text_type
from mo_json.typed_encoder import NUMBER, STRING, BOOLEAN, OBJECT, INTEGER
from mo_logs import Log, suppress_exception
from mo_logs.strings import expand_template, quote
from mo_math import MAX, OR

6
vendor/jx_elasticsearch/es14/setop.py поставляемый
Просмотреть файл

@ -13,18 +13,18 @@ from __future__ import unicode_literals
from collections import Mapping
from jx_base import NESTED
from jx_base.domains import ALGEBRAIC
from jx_base.expressions import IDENTITY
from jx_base.query import DEFAULT_LIMIT
from jx_elasticsearch import post as es_post
from jx_elasticsearch.es14.expressions import Variable, LeavesOp
from jx_elasticsearch.es14.util import jx_sort_to_es_sort, es_query_template, es_and, es_or, es_not, es_script
from jx_elasticsearch.es14.util import jx_sort_to_es_sort, es_query_template, es_and, es_or, es_script
from jx_python.containers.cube import Cube
from jx_python.expressions import jx_expression_to_function
from mo_collections.matrix import Matrix
from mo_dots import coalesce, split_field, set_default, Data, unwraplist, literal_field, unwrap, wrap, concat_field, relative_field, join_field, listwrap
from mo_dots.lists import FlatList
from mo_json.typed_encoder import NESTED
from mo_json.typed_encoder import untype_path, unnest_path, untyped
from mo_logs import Log
from mo_math import AND
@ -328,7 +328,7 @@ def format_cube(T, select, query=None):
data={h: Matrix(list=[]) for i, h in enumerate(table.header)}
)
cols = zip(*unwrap(table.data))
cols = transpose(*unwrap(table.data))
return Cube(
select,
edges=[{"name": "rownum", "domain": {"type": "rownum", "min": 0, "max": len(table.data), "interval": 1}}],

8
vendor/jx_elasticsearch/es14/util.py поставляемый
Просмотреть файл

@ -11,13 +11,11 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from mo_future import text_type
from mo_logs import Log
from jx_base import STRING, BOOLEAN, NUMBER, OBJECT
from jx_elasticsearch.es14.expressions import Variable
from mo_dots import wrap
from mo_future import text_type
from mo_json.typed_encoder import STRING, BOOLEAN, NUMBER, OBJECT
from mo_logs import Log
def es_query_template(path):

80
vendor/jx_elasticsearch/es52/__init__.py поставляемый
Просмотреть файл

@ -11,13 +11,10 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from collections import Mapping
from jx_base import container
from jx_base.container import Container
from jx_base.dimensions import Dimension
from jx_base.expressions import jx_expression
from jx_base.queries import is_variable_name
from jx_base.query import QueryOp
from jx_elasticsearch.es52.aggs import es_aggsop, is_aggsop
from jx_elasticsearch.es52.deep import is_deepop, es_deepop
@ -25,9 +22,9 @@ from jx_elasticsearch.es52.setop import is_setop, es_setop
from jx_elasticsearch.es52.util import aggregates
from jx_elasticsearch.meta import ElasticsearchMetadata, Table
from jx_python import jx
from mo_dots import Data, Null, unwrap, coalesce, split_field, literal_field, unwraplist, join_field, wrap, listwrap, FlatList
from mo_json import scrub, value2json
from mo_json.typed_encoder import TYPE_PREFIX, EXISTS_TYPE
from mo_dots import Data, unwrap, coalesce, split_field, join_field, wrap, listwrap
from mo_json import value2json
from mo_json.typed_encoder import EXISTS_TYPE
from mo_kwargs import override
from mo_logs import Log, Except
from pyLibrary.env import elasticsearch, http
@ -52,7 +49,6 @@ class ES52(Container):
host,
index,
type=None,
alias=None,
name=None,
port=9200,
read_only=True,
@ -68,9 +64,9 @@ class ES52(Container):
"settings": unwrap(kwargs)
}
self.settings = kwargs
self.name = name = coalesce(name, alias, index)
self.name = name = coalesce(name, index)
if read_only:
self.es = elasticsearch.Alias(alias=coalesce(alias, index), kwargs=kwargs)
self.es = elasticsearch.Alias(alias=index, kwargs=kwargs)
else:
self.es = elasticsearch.Cluster(kwargs=kwargs).get_index(read_only=read_only, kwargs=kwargs)
@ -79,7 +75,7 @@ class ES52(Container):
self.edges = Data()
self.worker = None
columns = self._namespace.get_snowflake(self.es.settings.alias).columns # ABSOLUTE COLUMNS
columns = self.snowflake.columns # ABSOLUTE COLUMNS
is_typed = any(c.es_column == EXISTS_TYPE for c in columns)
if typed == None:
@ -98,7 +94,6 @@ class ES52(Container):
def namespace(self):
return self._namespace
def get_table(self, full_name):
return Table(full_name, self)
@ -161,7 +156,7 @@ class ES52(Container):
except Exception as e:
e = Except.wrap(e)
if "Data too large, data for" in e:
http.post(self.es.cluster.path+"/_cache/clear")
http.post(self.es.cluster.url / "_cache/clear")
Log.error("Problem (Tried to clear Elasticsearch cache)", e)
Log.error("problem", e)
@ -198,37 +193,38 @@ class ES52(Container):
THE where CLAUSE IS AN ES FILTER
"""
command = wrap(command)
schema = self.es.get_properties()
table = self.get_table(command['update'])
es_index = self.es.cluster.get_index(read_only=False, alias=None, kwargs=self.es.settings)
schema = table.schema
es_filter = jx_expression(command.where).to_esfilter(schema)
# GET IDS OF DOCUMENTS
results = self.es.search({
"stored_fields": listwrap(schema._routing.path),
"query": {"bool": {
"filter": jx_expression(command.where).to_esfilter(Null)
}},
"size": 10000
})
query = {
"from": command['update'],
"select": ["_id"] + [
{"name": k, "value": v}
for k, v in command.set.items()
],
"where": command.where,
"format": "list",
"limit": 10000
}
# SCRIPT IS SAME FOR ALL (CAN ONLY HANDLE ASSIGNMENT TO CONSTANT)
scripts = FlatList()
for k, v in command.set.items():
if not is_variable_name(k):
Log.error("Only support simple paths for now")
if isinstance(v, Mapping) and v.doc:
scripts.append({"doc": v.doc})
else:
v = scrub(v)
scripts.append({"script": "ctx._source." + k + " = " + jx_expression(v).to_es_script(schema).script(schema)})
results = self.query(query)
if results.hits.hits:
updates = []
for h in results.hits.hits:
for s in scripts:
updates.append({"update": {"_id": h._id, "_routing": unwraplist(h.fields[literal_field(schema._routing.path)])}})
updates.append(s)
content = ("\n".join(value2json(c) for c in updates) + "\n")
if results.data:
content = "".join(
t
for r in results.data
for _id, row in [(r._id, r)]
for _ in [row.__setitem__('_id', None)] # WARNING! DESTRUCTIVE TO row
for update in map(value2json, ({"update": {"_id": _id}}, {"doc": row}))
for t in (update, "\n")
)
response = self.es.cluster.post(
self.es.path + "/_bulk",
es_index.path + "/" + "_bulk",
data=content,
headers={"Content-Type": "application/json"},
timeout=self.settings.timeout,
@ -237,3 +233,11 @@ class ES52(Container):
if response.errors:
Log.error("could not update: {{error}}", error=[e.error for i in response["items"] for e in i.values() if e.status not in (200, 201)])
# DELETE BY QUERY, IF NEEDED
if '.' in listwrap(command.clear):
self.es.delete_record(es_filter)
return
es_index.flush()

8
vendor/jx_elasticsearch/es52/aggs.py поставляемый
Просмотреть файл

@ -11,7 +11,6 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from jx_base import EXISTS
from jx_base.domains import SetDomain
from jx_base.expressions import TupleOp, NULL
from jx_base.query import DEFAULT_LIMIT, MAX_LIMIT
@ -24,7 +23,7 @@ from jx_python import jx
from jx_python.expressions import jx_expression_to_function
from mo_dots import listwrap, Data, wrap, literal_field, set_default, coalesce, Null, split_field, FlatList, unwrap, unwraplist
from mo_future import text_type
from mo_json.typed_encoder import encode_property
from mo_json.typed_encoder import encode_property, EXISTS
from mo_logs import Log
from mo_logs.strings import quote, expand_template
from mo_math import Math, MAX, UNION
@ -222,6 +221,7 @@ def es_aggsop(es, frum, query):
es_query.aggs[key].percentiles.field = columns[0].es_column
es_query.aggs[key].percentiles.percents += [percent]
es_query.aggs[key].percentiles.tdigest.compression = 2
s.pull = jx_expression_to_function(key + ".values." + literal_field(text_type(percent)))
elif s.aggregate == "cardinality":
canonical_names = []
@ -251,7 +251,7 @@ def es_aggsop(es, frum, query):
for column in columns:
script = {"scripted_metric": {
'init_script': 'params._agg.terms = new HashSet()',
'map_script': 'for (v in doc['+quote(column.es_column)+'].values) params._agg.terms.add(v)',
'map_script': 'for (v in doc['+quote(column.es_column)+'].values) params._agg.terms.add(v);',
'combine_script': 'return params._agg.terms.toArray()',
'reduce_script': 'HashSet output = new HashSet(); for (a in params._aggs) { if (a!=null) for (v in a) {output.add(v)} } return output.toArray()',
}}
@ -366,7 +366,7 @@ def es_aggsop(es, frum, query):
decoders = get_decoders_by_depth(query)
start = 0
#<TERRIBLE SECTION> THIS IS WHERE WE WEAVE THE where CLAUSE WITH nested
# <TERRIBLE SECTION> THIS IS WHERE WE WEAVE THE where CLAUSE WITH nested
split_where = split_expression_by_depth(query.where, schema=frum.schema)
if len(split_field(frum.name)) > 1:

17
vendor/jx_elasticsearch/es52/decoders.py поставляемый
Просмотреть файл

@ -13,7 +13,6 @@ from __future__ import unicode_literals
from collections import Mapping
from jx_base import STRING, NUMBER, BOOLEAN
from jx_base.dimensions import Dimension
from jx_base.domains import SimpleSetDomain, DefaultDomain, PARTITION
from jx_base.expressions import TupleOp, TRUE
@ -22,8 +21,8 @@ from jx_elasticsearch.es52.expressions import Variable, NotOp, InOp, Literal, An
from jx_elasticsearch.es52.util import es_missing
from jx_python import jx
from mo_dots import wrap, set_default, coalesce, literal_field, Data, relative_field, unwraplist
from mo_future import text_type
from mo_json.typed_encoder import untype_path
from mo_future import text_type, transpose
from mo_json.typed_encoder import untype_path, STRING, NUMBER, BOOLEAN
from mo_logs import Log
from mo_logs.strings import quote, expand_template
from mo_math import MAX, MIN, Math
@ -125,13 +124,13 @@ class AggsDecoder(object):
pass
def get_value_from_row(self, row):
Log.error("Not implemented")
raise NotImplementedError()
def get_value(self, index):
Log.error("Not implemented")
raise NotImplementedError()
def get_index(self, row):
Log.error("Not implemented")
raise NotImplementedError()
@property
def num_columns(self):
@ -161,7 +160,7 @@ class SetDecoder(AggsDecoder):
domain = self.domain
domain_key = domain.key
include, text_include = zip(*(
include, text_include = transpose(*(
(
float(v) if isinstance(v, (int, float)) else v,
text_type(float(v)) if isinstance(v, (int, float)) else v
@ -502,7 +501,7 @@ class ObjectDecoder(AggsDecoder):
prefix = edge.value.var
flatter = lambda k: relative_field(k, prefix)
self.put, self.fields = zip(*[
self.put, self.fields = transpose(*[
(flatter(untype_path(c.names["."])), c.es_column)
for c in query.frum.schema.leaves(prefix)
])
@ -562,7 +561,7 @@ class ObjectDecoder(AggsDecoder):
return None
output = Data()
for k, v in zip(self.put, part):
for k, v in transpose(self.put, part):
output[k] = v.get('key')
return output

2
vendor/jx_elasticsearch/es52/deep.py поставляемый
Просмотреть файл

@ -11,7 +11,6 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from jx_base import NESTED
from jx_base.expressions import NULL
from jx_base.query import DEFAULT_LIMIT
from jx_elasticsearch import post as es_post
@ -20,6 +19,7 @@ from jx_elasticsearch.es52.setop import format_dispatch, get_pull_function, get_
from jx_elasticsearch.es52.util import jx_sort_to_es_sort, es_query_template
from jx_python.expressions import compile_expression, jx_expression_to_function
from mo_dots import split_field, FlatList, listwrap, literal_field, coalesce, Data, concat_field, set_default, relative_field, startswith_field
from mo_json.typed_encoder import NESTED
from mo_json.typed_encoder import untype_path
from mo_logs import Log
from mo_threads import Thread

37
vendor/jx_elasticsearch/es52/expressions.py поставляемый
Просмотреть файл

@ -13,7 +13,6 @@ from __future__ import unicode_literals
import itertools
from jx_base import NUMBER, STRING, BOOLEAN, OBJECT, INTEGER
from jx_base.expressions import Variable, TupleOp, LeavesOp, BinaryOp, OrOp, ScriptOp, \
WhenOp, InequalityOp, extend, Literal, NullOp, TrueOp, FalseOp, DivOp, FloorOp, \
EqOp, NeOp, NotOp, LengthOp, NumberOp, StringOp, CountOp, MultiOp, RegExpOp, CoalesceOp, MissingOp, ExistsOp, \
@ -22,9 +21,11 @@ from jx_base.expressions import Variable, TupleOp, LeavesOp, BinaryOp, OrOp, Scr
from jx_elasticsearch.es52.util import es_not, es_script, es_or, es_and, es_missing
from mo_dots import coalesce, wrap, Null, set_default, literal_field
from mo_future import text_type
from mo_json.typed_encoder import NUMBER, STRING, BOOLEAN, OBJECT, INTEGER
from mo_logs import Log, suppress_exception
from mo_logs.strings import expand_template, quote
from mo_math import MAX, OR
from mo_times import Date
from pyLibrary.convert import string2regexp
NUMBER_TO_STRING = """
@ -257,6 +258,12 @@ def to_es_script(self, schema):
expr="[" + ", ".join(_convert(vv).expr for vv in v) + "]",
frum=self
)
if isinstance(v, Date):
return EsScript(
type=NUMBER,
expr=text_type(v.unix),
frum=self
)
return _convert(self.term)
@ -705,18 +712,22 @@ def to_es_script(self, schema):
@extend(OrOp)
def to_esfilter(self, schema):
return es_or([t.partial_eval().to_esfilter(schema) for t in self.terms])
# TODO: REPLICATE THIS WHOLE expression.py SO IT IS CLEAR ES5 QUERIES ARE A BIT DIFFERENT
if schema.snowflake.namespace.es_cluster.version.startswith("5."):
# VERSION 5.2.x
# WE REQUIRE EXIT-EARLY SEMANTICS, OTHERWISE EVERY EXPRESSION IS A SCRIPT EXPRESSION
# {"bool":{"should" :[a, b, c]}} RUNS IN PARALLEL
# {"bool":{"must_not":[a, b, c]}} ALSO RUNS IN PARALLEL
# OR(x) == NOT(AND(NOT(xi) for xi in x))
# output = es_not(es_and([
# NotOp("not", t).partial_eval().to_esfilter(schema)
# for t in self.terms
# ]))
# return output
# WE REQUIRE EXIT-EARLY SEMANTICS, OTHERWISE EVERY EXPRESSION IS A SCRIPT EXPRESSION
# {"bool":{"should" :[a, b, c]}} RUNS IN PARALLEL
# {"bool":{"must_not":[a, b, c]}} ALSO RUNS IN PARALLEL
# OR(x) == NOT(AND(NOT(xi) for xi in x))
output = es_not(es_and([
NotOp("not", t).partial_eval().to_esfilter(schema)
for t in self.terms
]))
return output
else:
# VERSION 6.2
return es_or([t.partial_eval().to_esfilter(schema) for t in self.terms])
@extend(LengthOp)
@ -1170,7 +1181,7 @@ def to_es_script(self, schema):
frum=self
)
else:
Log.error("do not know how to handle")
Log.error("do not know how to handle: {{self}}", self=self.__data__())
else:
return self.partial_eval().to_es_script(schema)

33
vendor/jx_elasticsearch/es52/setop.py поставляемый
Просмотреть файл

@ -13,22 +13,22 @@ from __future__ import unicode_literals
from collections import Mapping
from jx_base import NESTED
from jx_base.domains import ALGEBRAIC
from jx_base.expressions import IDENTITY
from jx_base.query import DEFAULT_LIMIT
from jx_elasticsearch import post as es_post
from jx_elasticsearch.es52.expressions import Variable, LeavesOp
from jx_elasticsearch.es52.util import jx_sort_to_es_sort, es_query_template, es_and, es_or, es_not, es_script
from jx_elasticsearch.es52.util import jx_sort_to_es_sort, es_query_template, es_and, es_or, es_script
from jx_python.containers.cube import Cube
from jx_python.expressions import jx_expression_to_function
from mo_collections.matrix import Matrix
from mo_dots import coalesce, split_field, set_default, Data, unwraplist, literal_field, unwrap, wrap, concat_field, relative_field, join_field, listwrap
from mo_dots.lists import FlatList
from mo_future import transpose
from mo_json.typed_encoder import NESTED
from mo_json.typed_encoder import untype_path, unnest_path, untyped
from mo_logs import Log
from mo_math import AND
from mo_math import MAX
from mo_math import AND, MAX
from mo_times.timer import Timer
format_dispatch = {}
@ -102,11 +102,20 @@ def es_setop(es, query):
leaves = schema.leaves(s_column)
nested_selects = {}
if leaves:
if s_column == '.' or any(c.jx_type == NESTED for c in leaves):
if s_column == '.':
# PULL ALL SOURCE
es_query.stored_fields = ["_source"]
new_select.append({
"name": select.name,
"value": select.value,
"put": {"name": select.name, "index": put_index, "child": "."},
"pull": get_pull_source(".")
})
elif any(c.jx_type == NESTED for c in leaves):
# PULL WHOLE NESTED ARRAYS
es_query.stored_fields = ["_source"]
for c in leaves:
if len(c.nested_path) == 1:
if len(c.nested_path) == 1: # NESTED PROPERTIES ARE IGNORED, CAPTURED BY THESE FIRT LEVEL PROPERTIES
jx_name = untype_path(c.names["."])
new_select.append({
"name": select.name,
@ -193,12 +202,14 @@ def es_setop(es, query):
if es_query.stored_fields[0] == "_source":
es_query.stored_fields = ["_source"]
n.pull = get_pull_source(n.value.var)
elif n.value == "_id":
n.pull = jx_expression_to_function("_id")
else:
n.pull = jx_expression_to_function(concat_field("fields", literal_field(n.value.var)))
else:
Log.error("Do not know what to do")
with Timer("call to ES", silent=True) as call_timer:
with Timer("call to ES") as call_timer:
data = es_post(es, es_query, query.limit)
T = data.hits.hits
@ -206,7 +217,8 @@ def es_setop(es, query):
try:
formatter, groupby_formatter, mime_type = format_dispatch[query.format]
output = formatter(T, new_select, query)
with Timer("formatter"):
output = formatter(T, new_select, query)
output.meta.timing.es = call_timer.duration
output.meta.content_type = mime_type
output.meta.es_query = es_query
@ -318,7 +330,8 @@ def format_table(T, select, query=None):
def format_cube(T, select, query=None):
table = format_table(T, select, query)
with Timer("format table"):
table = format_table(T, select, query)
if len(table.data) == 0:
return Cube(
@ -327,7 +340,7 @@ def format_cube(T, select, query=None):
data={h: Matrix(list=[]) for i, h in enumerate(table.header)}
)
cols = zip(*unwrap(table.data))
cols = transpose(*unwrap(table.data))
return Cube(
select,
edges=[{"name": "rownum", "domain": {"type": "rownum", "min": 0, "max": len(table.data), "interval": 1}}],

8
vendor/jx_elasticsearch/es52/util.py поставляемый
Просмотреть файл

@ -11,13 +11,11 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from mo_future import text_type
from mo_logs import Log
from jx_base import STRING, BOOLEAN, NUMBER, OBJECT
from jx_elasticsearch.es52.expressions import Variable
from mo_dots import wrap
from mo_future import text_type
from mo_json.typed_encoder import STRING, BOOLEAN, NUMBER, OBJECT
from mo_logs import Log
def es_query_template(path):

165
vendor/jx_elasticsearch/meta.py поставляемый
Просмотреть файл

@ -15,18 +15,18 @@ import itertools
from itertools import product
import jx_base
from jx_base import STRUCT, TableDesc, BOOLEAN
from jx_base import TableDesc
from jx_base.namespace import Namespace
from jx_base.query import QueryOp
from jx_python import jx, meta as jx_base_meta
from jx_python import jx
from jx_python.containers.list_usingPythonList import ListContainer
from jx_python.meta import ColumnList, Column
from mo_collections.relation import Relation_usingList
from mo_dots import Data, relative_field, SELF_PATH, ROOT_PATH, coalesce, set_default, Null, split_field, join_field, wrap, concat_field, startswith_field, literal_field
from mo_json.typed_encoder import EXISTS_TYPE, untype_path, unnest_path
from mo_json.typed_encoder import EXISTS_TYPE, untype_path, unnest_path, OBJECT, EXISTS, STRUCT, BOOLEAN
from mo_kwargs import override
from mo_logs import Log
from mo_logs.exceptions import extract_stack
from mo_logs.exceptions import Except
from mo_logs.strings import quote
from mo_math import MAX
from mo_threads import Queue, THREAD_STOP, Thread, Till
@ -50,8 +50,9 @@ class ElasticsearchMetadata(Namespace):
MANAGE SNOWFLAKE SCHEMAS FOR EACH OF THE ALIASES FOUND IN THE CLUSTER
"""
def __new__(cls, *args, **kwargs):
es_cluster = elasticsearch.Cluster(kwargs['kwargs'])
@override
def __new__(cls, kwargs, *args, **_kwargs):
es_cluster = elasticsearch.Cluster(kwargs)
output = known_clusters.get(id(es_cluster))
if output is None:
output = object.__new__(cls)
@ -88,7 +89,14 @@ class ElasticsearchMetadata(Namespace):
"meta.tables": Date.now()
}
table_columns = metadata_tables()
self.meta.tables = ListContainer("meta.tables", [], jx_base.Schema(".", table_columns))
self.meta.tables = ListContainer(
"meta.tables",
[
# TableDesc("meta.columns", None, ".", Date.now()),
# TableDesc("meta.tables", None, ".", Date.now())
],
jx_base.Schema(".", table_columns)
)
self.meta.columns.extend(table_columns)
# TODO: fix monitor so it does not bring down ES
if ENABLE_META_SCAN:
@ -97,9 +105,13 @@ class ElasticsearchMetadata(Namespace):
self.worker = Thread.run("refresh metadata", self.not_monitor)
return
@property
def namespace(self):
return self.meta.columns.namespace
@property
def url(self):
return self.es_cluster.path + "/" + self.default_name.replace(".", "/")
return self.es_cluster.url / self.default_name.replace(".", "/")
def _reload_columns(self, table_desc):
"""
@ -141,7 +153,17 @@ class ElasticsearchMetadata(Namespace):
def _parse_properties(self, alias, mapping, meta):
abs_columns = elasticsearch.parse_properties(alias, None, mapping.properties)
with Timer("upserting {{num}} columns", {"num": len(abs_columns)}, debug=DEBUG):
if any(c.cardinality == 0 and c.names['.'] != '_id' for c in abs_columns):
Log.warning(
"Some columns are not stored {{names}}",
names=[
".".join((c.es_index, c.names['.']))
for c in abs_columns
if c.cardinality == 0
]
)
with Timer("upserting {{num}} columns", {"num": len(abs_columns)}, silent=not DEBUG):
# LIST OF EVERY NESTED PATH
query_paths = [[c.es_column] for c in abs_columns if c.es_type == "nested"]
for a, b in itertools.product(query_paths, query_paths):
@ -159,11 +181,13 @@ class ElasticsearchMetadata(Namespace):
q.append(SELF_PATH)
query_paths.append(ROOT_PATH)
self.alias_to_query_paths[alias] = query_paths
for i in self.index_to_alias.get_domain(alias):
self.alias_to_query_paths[i] = query_paths
# ADD RELATIVE NAMES
for abs_column in abs_columns:
abs_column.last_updated = None
abs_column.jx_type = es_type_to_json_type[abs_column.es_type]
abs_column.jx_type = jx_type(abs_column)
for query_path in query_paths:
abs_column.names[query_path[0]] = relative_field(abs_column.names["."], query_path[0])
self.todo.add(self.meta.columns.add(abs_column))
@ -203,7 +227,7 @@ class ElasticsearchMetadata(Namespace):
Log.error("{{table|quote}} does not exist", table=table_name)
try:
last_update = MAX([
last_update = MAX([
self.es_cluster.index_last_updated[i]
for i in self.index_to_alias.get_domain(alias)
])
@ -288,7 +312,7 @@ class ElasticsearchMetadata(Namespace):
"size": 0
})
count = result.hits.total
cardinality = 1001
cardinality = max(1001, count)
multi = 1001
elif column.es_column == "_id":
result = self.es_cluster.post("/" + es_index + "/_search", data={
@ -350,7 +374,7 @@ class ElasticsearchMetadata(Namespace):
})
return
elif column.es_type in elasticsearch.ES_NUMERIC_TYPES and cardinality > 30:
DEBUG and Log.note("{{field}} has {{num}} parts", field=column.es_index, num=cardinality)
DEBUG and Log.note("{{table}}.{{field}} has {{num}} parts", table=column.es_index, field=column.es_column, num=cardinality)
self.meta.columns.update({
"set": {
"count": count,
@ -393,9 +417,10 @@ class ElasticsearchMetadata(Namespace):
except Exception as e:
# CAN NOT IMPORT: THE TEST MODULES SETS UP LOGGING
# from tests.test_jx import TEST_TABLE
e = Except.wrap(e)
TEST_TABLE = "testdata"
is_missing_index = any(w in e for w in ["IndexMissingException", "index_not_found_exception"])
is_test_table = any(column.es_index.startswith(t) for t in [TEST_TABLE_PREFIX, TEST_TABLE])
is_test_table = column.es_index.startswith((TEST_TABLE_PREFIX, TEST_TABLE))
if is_missing_index and is_test_table:
# WE EXPECT TEST TABLES TO DISAPPEAR
self.meta.columns.update({
@ -414,7 +439,7 @@ class ElasticsearchMetadata(Namespace):
"multi",
"partitions",
],
"where": {"eq": {"names.\\.": ".", "es_index": column.es_index, "es_column": column.es_column}}
"where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}}
})
Log.warning("Could not get {{col.es_index}}.{{col.es_column}} info", col=column, cause=e)
@ -430,10 +455,10 @@ class ElasticsearchMetadata(Namespace):
]
if old_columns:
DEBUG and Log.note(
"Old columns {{names|json}} last updated {{dates|json}}",
names=wrap(old_columns).es_column,
dates=[Date(t).format() for t in wrap(old_columns).last_updated]
)
"Old columns {{names|json}} last updated {{dates|json}}",
names=wrap(old_columns).es_column,
dates=[Date(t).format() for t in wrap(old_columns).last_updated]
)
self.todo.extend(old_columns)
# TEST CONSISTENCY
for c, d in product(list(self.todo.queue), list(self.todo.queue)):
@ -447,23 +472,29 @@ class ElasticsearchMetadata(Namespace):
if column is THREAD_STOP:
continue
DEBUG and Log.note("update {{table}}.{{column}}", table=column.es_index, column=column.es_column)
if column.es_index in self.index_does_not_exist:
self.meta.columns.update({
"clear": ".",
"where": {"eq": {"es_index": column.es_index}}
})
continue
if column.jx_type in STRUCT or column.es_column.endswith("." + EXISTS_TYPE):
column.last_updated = Date.now()
continue
elif column.last_updated >= Date.now()-TOO_OLD:
continue
try:
self._update_cardinality(column)
(DEBUG and not column.es_index.startswith(TEST_TABLE_PREFIX)) and Log.note("updated {{column.name}}", column=column)
except Exception as e:
Log.warning("problem getting cardinality for {{column.name}}", column=column, cause=e)
with Timer("update {{table}}.{{column}}", param={"table": column.es_index, "column": column.es_column}, silent=not DEBUG):
if column.es_index in self.index_does_not_exist:
self.meta.columns.update({
"clear": ".",
"where": {"eq": {"es_index": column.es_index}}
})
continue
if column.jx_type in STRUCT or column.es_column.endswith("." + EXISTS_TYPE):
column.last_updated = Date.now()
continue
elif column.last_updated >= Date.now()-TOO_OLD:
continue
try:
self._update_cardinality(column)
(DEBUG and not column.es_index.startswith(TEST_TABLE_PREFIX)) and Log.note("updated {{column.name}}", column=column)
except Exception as e:
if '"status":404' in e:
self.meta.columns.update({
"clear": ".",
"where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}}
})
else:
Log.warning("problem getting cardinality for {{column.name}}", column=column, cause=e)
except Exception as e:
Log.warning("problem in cardinality monitor", cause=e)
@ -478,23 +509,27 @@ class ElasticsearchMetadata(Namespace):
if c.last_updated >= Date.now()-TOO_OLD:
continue
self.meta.columns.update({
"set": {
"last_updated": Date.now()
},
"clear":[
"count",
"cardinality",
"multi",
"partitions",
],
"where": {"eq": {"es_index": c.es_index, "es_column": c.es_column}}
})
DEBUG and Log.note("Did not get {{col.es_index}}.{{col.es_column}} info", col=c)
with Timer("Update {{col.es_index}}.{{col.es_column}}", param={"col": c}, silent=not DEBUG, too_long=0.05):
self.meta.columns.update({
"set": {
"last_updated": Date.now()
},
"clear": [
"count",
"cardinality",
"multi",
"partitions",
],
"where": {"eq": {"es_index": c.es_index, "es_column": c.es_column}}
})
def get_table(self, alias_name):
def get_table(self, name):
if name == "meta.columns":
return self.meta.columns
# return self.meta.columns
with self.meta.tables.locker:
return wrap([t for t in self.meta.tables.data if t.name == alias_name])
return wrap([t for t in self.meta.tables.data if t.name == name])
def get_snowflake(self, fact_table_name):
return Snowflake(fact_table_name, self)
@ -512,8 +547,8 @@ class Snowflake(object):
REPRESENT ONE ALIAS, AND ITS NESTED ARRAYS
"""
def __init__(self, alias, namespace):
self.alias = alias
def __init__(self, name, namespace):
self.name = name
self.namespace = namespace
def get_schema(self, query_path):
@ -524,20 +559,17 @@ class Snowflake(object):
"""
RETURN A LIST OF ALL NESTED COLUMNS
"""
output = self.namespace.alias_to_query_paths.get(self.alias)
output = self.namespace.alias_to_query_paths.get(self.name)
if output:
return output
Log.error("Can not find index {{index|quote}}", index=self.alias)
Log.error("Can not find index {{index|quote}}", index=self.name)
@property
def columns(self):
"""
RETURN ALL COLUMNS FROM ORIGIN OF FACT TABLE
"""
if any("verify_no_private_attachments" in t['method'] for t in extract_stack()):
pass
return self.namespace.get_columns(literal_field(self.alias))
return self.namespace.get_columns(literal_field(self.name))
class Schema(jx_base.Schema):
@ -605,11 +637,11 @@ class Schema(jx_base.Schema):
@property
def name(self):
return concat_field(self.snowflake.alias, self.query_path[0])
return concat_field(self.snowflake.name, self.query_path[0])
@property
def columns(self):
return self.snowflake.namespace.get_columns(literal_field(self.snowflake.alias))
return self.snowflake.namespace.get_columns(literal_field(self.snowflake.name))
def map_to_es(self):
"""
@ -689,4 +721,13 @@ def metadata_tables():
)
OBJECTS = (jx_base.OBJECT, jx_base.EXISTS)
def jx_type(column):
"""
return the jx_type for given column
"""
if column.es_column.endswith(EXISTS_TYPE):
return EXISTS
return es_type_to_json_type[column.es_type]
OBJECTS = (OBJECT, EXISTS)

103
vendor/jx_python/__init__.py поставляемый
Просмотреть файл

@ -1,103 +0,0 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
from collections import Mapping
from jx_base import container
from mo_dots import Data
from mo_dots import wrap, set_default, split_field
from mo_future import text_type
from mo_logs import Log
config = Data() # config.default IS EXPECTED TO BE SET BEFORE CALLS ARE MADE
_ListContainer = None
_meta = None
def _delayed_imports():
global _ListContainer
global _meta
from jx_python import meta as _meta
from jx_python.containers.list_usingPythonList import ListContainer as _ListContainer
_ = _ListContainer
_ = _meta
try:
from pyLibrary.queries.jx_usingMySQL import MySQL
except Exception:
MySQL = None
try:
from jx_elasticsearch.meta import ElasticsearchMetadata
except Exception:
ElasticsearchSnowflake = None
set_default(container.type2container, {
"mysql": MySQL,
"memory": None,
"meta": ElasticsearchMetadata
})
def find_container(frum, schema=None):
"""
:param frum:
:param schema:
:return:
"""
if not _meta:
_delayed_imports()
frum = wrap(frum)
if isinstance(frum, text_type):
if not container.config.default.settings:
Log.error("expecting jx_base.container.config.default.settings to contain default elasticsearch connection info")
type_ = None
if frum.startswith("meta."):
if frum == "meta.columns":
return _meta.singlton.meta.columns.denormalized()
elif frum == "meta.tables":
return _meta.singlton.meta.tables
else:
Log.error("{{name}} not a recognized table", name=frum)
type_ = container.config.default.type
fact_table_name = split_field(frum)[0]
settings = set_default(
{
"index": fact_table_name,
"name": frum,
"exists": True,
},
container.config.default.settings
)
settings.type = None
return container.type2container[type_](settings)
elif isinstance(frum, Mapping) and frum.type and container.type2container[frum.type]:
# TODO: Ensure the frum.name is set, so we capture the deep queries
if not frum.type:
Log.error("Expecting from clause to have a 'type' property")
return container.type2container[frum.type](frum.settings)
elif isinstance(frum, Mapping) and (frum["from"] or isinstance(frum["from"], (list, set))):
from jx_base.query import QueryOp
return QueryOp.wrap(frum, namespace=schema)
elif isinstance(frum, (list, set)):
return _ListContainer("test_list", frum)
else:
return frum

12
vendor/jx_python/containers/cube.py поставляемый
Просмотреть файл

@ -322,16 +322,16 @@ class Cube(Container):
if isinstance(self.select, list):
selects = listwrap(self.select)
index, v = zip(*self.data[selects[0].name].groupby(selector))
index, v = transpose(*self.data[selects[0].name].groupby(selector))
coord = wrap([coord2term(c) for c in index])
values = [v]
for s in selects[1::]:
i, v = zip(*self.data[s.name].group_by(selector))
i, v = transpose(*self.data[s.name].group_by(selector))
values.append(v)
output = zip(coord, [Cube(self.select, remainder, {s.name: v[i] for i, s in enumerate(selects)}) for v in zip(*values)])
output = transpose(coord, [Cube(self.select, remainder, {s.name: v[i] for i, s in enumerate(selects)}) for v in zip(*values)])
elif not remainder:
# v IS A VALUE, NO NEED TO WRAP IT IN A Cube
output = (
@ -377,7 +377,7 @@ class Cube(Container):
if isinstance(self.select, list):
selects = listwrap(self.select)
index, v = zip(*self.data[selects[0].name].groupby(selector))
index, v = transpose(*self.data[selects[0].name].groupby(selector))
coord = wrap([coord2term(c) for c in index])
@ -386,7 +386,7 @@ class Cube(Container):
i, v = zip(*self.data[s.name].group_by(selector))
values.append(v)
output = zip(coord, [Cube(self.select, remainder, {s.name: v[i] for i, s in enumerate(selects)}) for v in zip(*values)])
output = transpose(coord, [Cube(self.select, remainder, {s.name: v[i] for i, s in enumerate(selects)}) for v in zip(*values)])
elif not remainder:
# v IS A VALUE, NO NEED TO WRAP IT IN A Cube
output = (
@ -409,7 +409,7 @@ class Cube(Container):
def window(self, window):
if window.edges or window.sort:
Log.error("not implemented")
raise NotImplementedError()
from jx_python import jx

2
vendor/jx_python/containers/doc_store.py поставляемый
Просмотреть файл

@ -192,7 +192,7 @@ class DocStore(Container):
def having(self, having):
_ = having
Log.error("not implemented")
raise NotImplementedError()
def format(self, format):
if format == "list":

Просмотреть файл

@ -14,8 +14,6 @@ from __future__ import unicode_literals
import itertools
from collections import Mapping
from mo_math import UNION
import jx_base
from jx_base import Container
from jx_base.expressions import jx_expression, Expression, Variable, TRUE
@ -207,7 +205,7 @@ class ListContainer(Container, jx_base.Namespace, jx_base.Table):
def having(self, having):
_ = having
Log.error("not implemented")
raise NotImplementedError()
def format(self, format):
if format == "table":

3
vendor/jx_python/jx.py поставляемый
Просмотреть файл

@ -12,12 +12,11 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from mo_times import Date
_range = range
from mo_times import Date
from collections import Mapping
from jx_base import query
from jx_python import expressions as _expressions
from jx_python import flat_list, group_by

126
vendor/jx_python/meta.py поставляемый
Просмотреть файл

@ -14,16 +14,16 @@ from __future__ import unicode_literals
from collections import Mapping
from datetime import date
from datetime import datetime
from decimal import Decimal
import jx_base
from jx_base import python_type_to_json_type
from jx_base import STRUCT, Column, Table
from jx_base import Column, Table
from jx_base.schema import Schema
from jx_python import jx
from mo_collections import UniqueIndex
from mo_dots import Data, concat_field, get_attr, listwrap, unwraplist, NullType, FlatList, set_default, split_field, join_field, ROOT_PATH, wrap, coalesce
from mo_dots import Data, concat_field, listwrap, unwraplist, NullType, FlatList, set_default, split_field, join_field, ROOT_PATH, wrap, coalesce
from mo_future import none_type, text_type, long, PY2
from mo_json.typed_encoder import untype_path, unnest_path
from mo_json.typed_encoder import untype_path, unnest_path, python_type_to_json_type, STRUCT
from mo_logs import Log
from mo_threads import Lock
from mo_times.dates import Date
@ -31,7 +31,7 @@ from mo_times.dates import Date
singlton = None
class ColumnList(Table):
class ColumnList(Table, jx_base.Container):
"""
OPTIMIZED FOR THE PARTICULAR ACCESS PATTERNS USED
"""
@ -89,24 +89,22 @@ class ColumnList(Table):
values = set()
objects = 0
multi = 1
for t, cs in self.data.items():
for c, css in cs.items():
for column in css:
value = column[mc.names["."]]
if value == None:
pass
else:
count += 1
if isinstance(value, list):
multi = max(multi, len(value))
try:
values |= set(value)
except Exception:
objects += len(value)
elif isinstance(value, Mapping):
objects += 1
else:
values.add(value)
for column in self._all_columns():
value = column[mc.names["."]]
if value == None:
pass
else:
count += 1
if isinstance(value, list):
multi = max(multi, len(value))
try:
values |= set(value)
except Exception:
objects += len(value)
elif isinstance(value, Mapping):
objects += 1
else:
values.add(value)
mc.count = count
mc.cardinality = len(values) + objects
mc.partitions = jx.sort(values)
@ -114,12 +112,18 @@ class ColumnList(Table):
mc.last_updated = Date.now()
self.dirty = False
def _all_columns(self):
return [
column
for t, cs in self.data.items()
for _, css in cs.items()
for column in css
]
def __iter__(self):
self._update_meta()
for t, cs in self.data.items():
for c, css in cs.items():
for column in css:
yield column
with self.locker:
self._update_meta()
return iter(self._all_columns())
def __len__(self):
return self.data['meta.columns']['es_index'].count
@ -130,22 +134,49 @@ class ColumnList(Table):
command = wrap(command)
eq = command.where.eq
if eq.es_index:
columns = self.find(eq.es_index, eq.name)
columns = [
c
for c in columns
if all(get_attr(c, k) == v for k, v in eq.items())
]
all_columns = self.data.get(eq.es_index, {}).values()
if len(eq) == 1:
# FASTEST
with self.locker:
columns = [
c
for cs in all_columns
for c in cs
]
elif eq.es_column and len(eq) == 2:
# FASTER
with self.locker:
columns = [
c
for cs in all_columns
for c in cs
if c.es_column == eq.es_column
]
else:
# SLOWER
with self.locker:
columns = [
c
for cs in all_columns
for c in cs
if all(c[k] == v for k, v in eq.items()) # THIS LINE IS VERY SLOW
]
else:
with self.locker:
columns = list(self)
columns = jx.filter(columns, command.where)
columns = list(self)
columns = jx.filter(columns, command.where)
with self.locker:
for col in list(columns):
for col in columns:
for k in command["clear"]:
if k == ".":
columns.remove(col)
lst = self.data[col.es_index]
cols = lst[col.names['.']]
cols.remove(col)
if len(cols) == 0:
del lst[col.names['.']]
if len(lst) == 0:
del self.data[col.es_index]
else:
col[k] = None
@ -155,12 +186,17 @@ class ColumnList(Table):
Log.error("should not happen", cause=e)
def query(self, query):
# NOT EXPECTED TO BE RUN
Log.error("not")
with self.locker:
self._update_meta()
query.frum = self.__iter__()
output = jx.run(query)
if not self._schema:
self._schema = Schema(".", [c for cs in self.data["meta.columns"].values() for c in cs])
snapshot = self._all_columns()
return output
from jx_python.containers.list_usingPythonList import ListContainer
query.frum = ListContainer("meta.columns", snapshot, self._schema)
return jx.run(query)
def groupby(self, keys):
with self.locker:
@ -179,6 +215,11 @@ class ColumnList(Table):
def namespace(self):
return self
def get_table(self, table_name):
if table_name != "meta.columns":
Log.error("this container has only the meta.columns")
return self
def denormalized(self):
"""
THE INTERNAL STRUCTURE FOR THE COLUMN METADATA IS VERY DIFFERENT FROM
@ -374,6 +415,7 @@ _type_to_name = {
list: "nested",
FlatList: "nested",
Date: "double",
Decimal: "double",
datetime: "double",
date: "double"
}

2
vendor/jx_python/windows.py поставляемый
Просмотреть файл

@ -232,7 +232,7 @@ class Max(WindowFunction):
self.max = mo_math.MAX([self.max, value])
def sub(self, value):
Log.error("Not implemented")
raise NotImplementedError()
def end(self):
return self.max

2
vendor/mo_collections/index.py поставляемый
Просмотреть файл

@ -38,7 +38,7 @@ class Index(object):
try:
if isinstance(key, (list, tuple)) and len(key) < len(self._keys):
# RETURN ANOTHER Index
Log.error("not implemented")
raise NotImplementedError()
key = value2key(self._keys, key)
return wrap(copy(self._data.get(key, [])))

8
vendor/mo_collections/matrix.py поставляемый
Просмотреть файл

@ -11,7 +11,7 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from mo_future import text_type
from mo_future import text_type, xrange
from mo_dots import Null, Data, coalesce, get_module
from mo_kwargs import override
from mo_logs import Log
@ -335,18 +335,18 @@ def _getitem(c, i):
return (len(c), ), c
elif isinstance(select, slice):
sub = c[select]
dims, cube = zip(*[_getitem(cc, i[1::]) for cc in sub])
dims, cube = transpose(*[_getitem(cc, i[1::]) for cc in sub])
return (len(cube),) + dims[0], cube
else:
return (), c[select]
else:
select = i[0]
if select == None:
dims, cube = zip(*[_getitem(cc, i[1::]) for cc in c])
dims, cube = transpose(*[_getitem(cc, i[1::]) for cc in c])
return (len(cube),)+dims[0], cube
elif isinstance(select, slice):
sub = c[select]
dims, cube = zip(*[_getitem(cc, i[1::]) for cc in sub])
dims, cube = transpose(*[_getitem(cc, i[1::]) for cc in sub])
return (len(cube),)+dims[0], cube
else:
with suppress_exception:

1
vendor/mo_collections/persistent_queue.py поставляемый
Просмотреть файл

@ -93,7 +93,6 @@ class PersistentQueue(object):
yield value
except Exception as e:
Log.warning("Tell me about what happened here", cause=e)
DEBUG and Log.note("queue iterator is done")
def add(self, value):
with self.lock:

28
vendor/mo_collections/queue.py поставляемый
Просмотреть файл

@ -13,6 +13,8 @@ from __future__ import unicode_literals
from __future__ import division
from __future__ import absolute_import
from collections import deque
class Queue(object):
"""
@ -29,14 +31,28 @@ class Queue(object):
"""
def __init__(self):
self.set = set()
self.list = []
self.list = deque()
def __nonzero__(self):
return len(self.list) > 0
def __contains__(self, value):
return value in self.set
def __len__(self):
return self.list.__len__()
def __iter__(self):
return iter(self.list)
def __rsub__(self, other):
if isinstance(other, set):
return other - self.set
return set(o for o in other if o not in self.set)
def __data__(self):
return list(self.list)
def add(self, value):
if value in self.set:
return self
@ -44,7 +60,12 @@ class Queue(object):
self.list.append(value)
def push(self, value):
self.add(value)
if value in self.set:
self.list.remove(value)
else:
self.set.add(value)
self.list.appendleft(value)
def extend(self, values):
for v in values:
@ -54,7 +75,6 @@ class Queue(object):
if len(self.list) == 0:
return None
output = self.list.pop(0)
output = self.list.popleft()
self.set.remove(output)
return output

10
vendor/mo_dots/__init__.py поставляемый
Просмотреть файл

@ -208,7 +208,7 @@ def _all_default(d, default, seen=None):
if default is None:
return
if isinstance(default, Data):
default = object.__getattribute__(default, b"_dict") # REACH IN AND GET THE dict
default = object.__getattribute__(default, SLOT) # REACH IN AND GET THE dict
# Log = _late_import()
# Log.error("strictly dict (or object) allowed: got {{type}}", type=default.__class__.__name__)
@ -417,11 +417,11 @@ def wrap(v):
:return: Data INSTANCE
"""
type_ = _get(v, "__class__")
type_ = v.__class__
if type_ is dict:
m = object.__new__(Data)
_set(m, "_dict", v)
_set(m, SLOT, v)
return m
elif type_ is none_type:
return Null
@ -489,7 +489,7 @@ def _wrap_leaves(value):
def unwrap(v):
_type = _get(v, "__class__")
if _type is Data:
d = _get(v, "_dict")
d = _get(v, SLOT)
return d
elif _type is FlatList:
return v.list
@ -569,6 +569,6 @@ def tuplewrap(value):
from mo_dots.nones import Null, NullType
from mo_dots.datas import Data
from mo_dots.datas import Data, SLOT
from mo_dots.lists import FlatList
from mo_dots.objects import DataObject

94
vendor/mo_dots/datas.py поставляемый
Просмотреть файл

@ -14,13 +14,15 @@ from __future__ import unicode_literals
from collections import MutableMapping, Mapping
from copy import deepcopy
from mo_dots.lists import FlatList
from mo_dots import _getdefault, hash_value, literal_field, coalesce, listwrap, get_logger
from mo_future import text_type, PY2
from mo_future import text_type, PY2, iteritems, none_type, generator_types
_get = object.__getattribute__
_set = object.__setattr__
SLOT = str("_internal_dict")
DEBUG = False
@ -29,7 +31,7 @@ class Data(MutableMapping):
Please see README.md
"""
__slots__ = ["_dict"]
__slots__ = [SLOT]
def __init__(self, *args, **kwargs):
"""
@ -37,59 +39,59 @@ class Data(MutableMapping):
IS UNLIKELY TO BE USEFUL. USE wrap() INSTEAD
"""
if DEBUG:
d = _get(self, "_dict")
d = self._internal_dict
for k, v in kwargs.items():
d[literal_field(k)] = unwrap(v)
else:
if args:
args0 = args[0]
if isinstance(args0, Data):
_set(self, "_dict", _get(args0, "_dict"))
_set(self, SLOT, _get(args0, SLOT))
elif isinstance(args0, dict):
_set(self, "_dict", args0)
_set(self, SLOT, args0)
else:
_set(self, "_dict", dict(args0))
_set(self, SLOT, dict(args0))
elif kwargs:
_set(self, "_dict", unwrap(kwargs))
_set(self, SLOT, unwrap(kwargs))
else:
_set(self, "_dict", {})
_set(self, SLOT, {})
def __bool__(self):
d = _get(self, "_dict")
d = self._internal_dict
if isinstance(d, dict):
return bool(d)
else:
return d != None
def __nonzero__(self):
d = _get(self, "_dict")
d = self._internal_dict
if isinstance(d, dict):
return True if d else False
else:
return d != None
def __contains__(self, item):
if Data.__getitem__(self, item):
value = Data.__getitem__(self, item)
if isinstance(value, Mapping) or value:
return True
return False
def __iter__(self):
d = _get(self, "_dict")
d = self._internal_dict
return d.__iter__()
def __getitem__(self, key):
if key == None:
return Null
if key == ".":
output = _get(self, "_dict")
output = self._internal_dict
if isinstance(output, Mapping):
return self
else:
return output
key = text_type(key)
d = _get(self, "_dict")
d = self._internal_dict
if key.find(".") >= 0:
seq = _split_field(key)
@ -118,11 +120,11 @@ class Data(MutableMapping):
# SOMETHING TERRIBLE HAPPENS WHEN value IS NOT A Mapping;
# HOPEFULLY THE ONLY OTHER METHOD RUN ON self IS unwrap()
v = unwrap(value)
_set(self, "_dict", v)
_set(self, SLOT, v)
return v
try:
d = _get(self, "_dict")
d = self._internal_dict
value = unwrap(value)
if key.find(".") == -1:
if value is None:
@ -148,31 +150,43 @@ class Data(MutableMapping):
raise e
def __getattr__(self, key):
d = _get(self, "_dict")
o = d.get(key)
if o == None:
d = self._internal_dict
v = d.get(key)
t = v.__class__
# OPTIMIZED wrap()
if t is dict:
m = object.__new__(Data)
_set(m, SLOT, v)
return m
elif t in (none_type, NullType):
return NullType(d, key)
return wrap(o)
elif t is list:
return FlatList(v)
elif t in generator_types:
return FlatList(list(unwrap(vv) for vv in v))
else:
return v
def __setattr__(self, key, value):
d = _get(self, "_dict")
d = self._internal_dict
value = unwrap(value)
if value is None:
d = _get(self, "_dict")
d = self._internal_dict
d.pop(key, None)
else:
d[key] = value
return self
def __hash__(self):
d = _get(self, "_dict")
d = self._internal_dict
return hash_value(d)
def __eq__(self, other):
if self is other:
return True
d = _get(self, "_dict")
d = self._internal_dict
if not isinstance(d, dict):
return d == other
@ -194,11 +208,11 @@ class Data(MutableMapping):
return not self.__eq__(other)
def get(self, key, default=None):
d = _get(self, "_dict")
d = self._internal_dict
return d.get(key, default)
def items(self):
d = _get(self, "_dict")
d = self._internal_dict
return [(k, wrap(v)) for k, v in d.items() if v != None or isinstance(v, Mapping)]
def leaves(self, prefix=None):
@ -209,42 +223,42 @@ class Data(MutableMapping):
def iteritems(self):
# LOW LEVEL ITERATION, NO WRAPPING
d = _get(self, "_dict")
d = self._internal_dict
return ((k, wrap(v)) for k, v in iteritems(d))
def keys(self):
d = _get(self, "_dict")
d = self._internal_dict
return set(d.keys())
def values(self):
d = _get(self, "_dict")
d = self._internal_dict
return listwrap(list(d.values()))
def clear(self):
get_logger().error("clear() not supported")
def __len__(self):
d = _get(self, "_dict")
d = self._internal_dict
return dict.__len__(d)
def copy(self):
return Data(**self)
def __copy__(self):
d = _get(self, "_dict")
d = self._internal_dict
return Data(**d)
def __deepcopy__(self, memo):
d = _get(self, "_dict")
d = self._internal_dict
return wrap(deepcopy(d, memo))
def __delitem__(self, key):
if key.find(".") == -1:
d = _get(self, "_dict")
d = self._internal_dict
d.pop(key, None)
return
d = _get(self, "_dict")
d = self._internal_dict
seq = _split_field(key)
for k in seq[:-1]:
d = d[k]
@ -252,7 +266,7 @@ class Data(MutableMapping):
def __delattr__(self, key):
key = text_type(key)
d = _get(self, "_dict")
d = self._internal_dict
d.pop(key, None)
def setdefault(self, k, d=None):
@ -262,13 +276,13 @@ class Data(MutableMapping):
def __str__(self):
try:
return dict.__str__(_get(self, "_dict"))
return dict.__str__(self._internal_dict)
except Exception:
return "{}"
def __repr__(self):
try:
return "Data("+dict.__repr__(_get(self, "_dict"))+")"
return "Data("+dict.__repr__(self._internal_dict)+")"
except Exception as e:
return "Data()"
@ -449,7 +463,7 @@ class _DictUsingSelf(dict):
get_logger().error("clear() not supported")
def __len__(self):
d = _get(self, "_dict")
d = self._internal_dict
return d.__len__()
def copy(self):

82
vendor/mo_dots/lists.py поставляемый
Просмотреть файл

@ -17,6 +17,7 @@ from mo_dots import wrap, unwrap, coalesce
from mo_dots.nones import Null
_get = object.__getattribute__
_get_list = lambda self: _get(self, "list")
_set = object.__setattr__
_emit_slice_warning = True
@ -62,7 +63,7 @@ class FlatList(list):
if not Log:
_late_import()
Log.error("slice step must be None, do not know how to deal with values")
length = len(_get(self, "list"))
length = len(_get_list(self))
i = index.start
if i is None:
@ -74,15 +75,15 @@ class FlatList(list):
j = length
else:
j = max(min(j, length), 0)
return FlatList(_get(self, "list")[i:j])
return FlatList(_get_list(self)[i:j])
if index < 0 or len(_get(self, "list")) <= index:
if index < 0 or len(_get_list(self)) <= index:
return Null
return wrap(_get(self, "list")[index])
return wrap(_get_list(self)[index])
def __setitem__(self, i, y):
try:
_list = _get(self, "list")
_list = _get_list(self)
if i <= len(_list):
for i in range(len(_list), i):
_list.append(None)
@ -109,7 +110,7 @@ class FlatList(list):
if not Log:
_late_import()
return FlatList(vals=[unwrap(coalesce(_datawrap(v), Null)[key]) for v in _get(self, "list")])
return FlatList(vals=[unwrap(coalesce(_datawrap(v), Null)[key]) for v in _get_list(self)])
def select(self, key):
if not Log:
@ -117,7 +118,7 @@ class FlatList(list):
Log.error("Not supported. Use `get()`")
def filter(self, _filter):
return FlatList(vals=[unwrap(u) for u in (wrap(v) for v in _get(self, "list")) if _filter(u)])
return FlatList(vals=[unwrap(u) for u in (wrap(v) for v in _get_list(self)) if _filter(u)])
def __delslice__(self, i, j):
if not Log:
@ -128,20 +129,21 @@ class FlatList(list):
self.list = []
def __iter__(self):
return (wrap(v) for v in _get(self, "list"))
temp = [wrap(v) for v in _get_list(self)]
return iter(temp)
def __contains__(self, item):
return list.__contains__(_get(self, "list"), item)
return list.__contains__(_get_list(self), item)
def append(self, val):
_get(self, "list").append(unwrap(val))
_get_list(self).append(unwrap(val))
return self
def __str__(self):
return _get(self, "list").__str__()
return _get_list(self).__str__()
def __len__(self):
return _get(self, "list").__len__()
return _get_list(self).__len__()
def __getslice__(self, i, j):
global _emit_slice_warning
@ -157,45 +159,59 @@ class FlatList(list):
return self.list
def copy(self):
return FlatList(list(_get(self, "list")))
return FlatList(list(_get_list(self)))
def __copy__(self):
return FlatList(list(_get(self, "list")))
return FlatList(list(_get_list(self)))
def __deepcopy__(self, memo):
d = _get(self, "list")
d = _get_list(self)
return wrap(deepcopy(d, memo))
def remove(self, x):
_get(self, "list").remove(x)
_get_list(self).remove(x)
return self
def extend(self, values):
lst = _get_list(self)
for v in values:
_get(self, "list").append(unwrap(v))
lst.append(unwrap(v))
return self
def pop(self, index=None):
if index is None:
return wrap(_get(self, "list").pop())
return wrap(_get_list(self).pop())
else:
return wrap(_get(self, "list").pop(index))
return wrap(_get_list(self).pop(index))
def __eq__(self, other):
if isinstance(other, FlatList):
other = _get_list(other)
lst = _get_list(self)
if other == None and len(lst) == 0:
return True
if not isinstance(other, list):
return False
if len(lst) != len(other):
return False
return all([s == o for s, o in zip(lst, other)])
def __add__(self, value):
if value == None:
return self
output = list(_get(self, "list"))
output = list(_get_list(self))
output.extend(value)
return FlatList(vals=output)
def __or__(self, value):
output = list(_get(self, "list"))
output = list(_get_list(self))
output.append(value)
return FlatList(vals=output)
def __radd__(self, other):
output = list(other)
output.extend(_get(self, "list"))
output.extend(_get_list(self))
return FlatList(vals=output)
def __iadd__(self, other):
@ -210,59 +226,59 @@ class FlatList(list):
WITH SLICES BEING FLAT, WE NEED A SIMPLE WAY TO SLICE FROM THE RIGHT [-num:]
"""
if num == None:
return FlatList([_get(self, "list")[-1]])
return FlatList([_get_list(self)[-1]])
if num <= 0:
return Null
return FlatList(_get(self, "list")[-num:])
return FlatList(_get_list(self)[-num:])
def left(self, num=None):
"""
NOT REQUIRED, BUT EXISTS AS OPPOSITE OF right()
"""
if num == None:
return FlatList([_get(self, "list")[0]])
return FlatList([_get_list(self)[0]])
if num <= 0:
return Null
return FlatList(_get(self, "list")[:num])
return FlatList(_get_list(self)[:num])
def not_right(self, num):
"""
WITH SLICES BEING FLAT, WE NEED A SIMPLE WAY TO SLICE FROM THE LEFT [:-num:]
"""
if num == None:
return FlatList([_get(self, "list")[:-1:]])
return FlatList([_get_list(self)[:-1:]])
if num <= 0:
return FlatList.EMPTY
return FlatList(_get(self, "list")[:-num:])
return FlatList(_get_list(self)[:-num:])
def not_left(self, num):
"""
NOT REQUIRED, EXISTS AS OPPOSITE OF not_right()
"""
if num == None:
return FlatList([_get(self, "list")[-1]])
return FlatList([_get_list(self)[-1]])
if num <= 0:
return self
return FlatList(_get(self, "list")[num::])
return FlatList(_get_list(self)[num::])
def last(self):
"""
RETURN LAST ELEMENT IN FlatList [-1]
"""
lst = _get(self, "list")
lst = _get_list(self)
if lst:
return wrap(lst[-1])
return Null
def map(self, oper, includeNone=True):
if includeNone:
return FlatList([oper(v) for v in _get(self, "list")])
return FlatList([oper(v) for v in _get_list(self)])
else:
return FlatList([oper(v) for v in _get(self, "list") if v != None])
return FlatList([oper(v) for v in _get_list(self) if v != None])
FlatList.EMPTY = Null

7
vendor/mo_dots/nones.py поставляемый
Просмотреть файл

@ -177,7 +177,11 @@ class NullType(object):
v = o.get(k)
if v == None:
return NullType(self, key)
return wrap(v.get(key))
try:
return wrap(v.get(key))
except Exception as e:
from mo_logs import Log
Log.error("not expected", cause=e)
def __setattr__(self, key, value):
key = text_type(key)
@ -223,6 +227,7 @@ class NullType(object):
def __hash__(self):
return hash(None)
Null = NullType() # INSTEAD OF None!!!

6
vendor/mo_dots/objects.py поставляемый
Просмотреть файл

@ -15,7 +15,7 @@ from collections import Mapping
from datetime import date, datetime
from decimal import Decimal
from mo_dots import wrap, unwrap, Data, FlatList, NullType, get_attr, set_attr
from mo_dots import wrap, unwrap, Data, FlatList, NullType, get_attr, set_attr, SLOT
from mo_future import text_type, binary_type, get_function_defaults, get_function_arguments, none_type, generator_types
_get = object.__getattribute__
@ -103,7 +103,7 @@ def datawrap(v):
if type_ is dict:
m = Data()
_set(m, "_dict", v) # INJECT m.__dict__=v SO THERE IS NO COPY
_set(m, SLOT, v) # INJECT m.__dict__=v SO THERE IS NO COPY
return m
elif type_ is Data:
return v
@ -127,7 +127,7 @@ def datawrap(v):
class DictClass(object):
"""
ALLOW INSTANCES OF class_ TO ACK LIKE dicts
ALLOW INSTANCES OF class_ TO ACT LIKE dicts
ALLOW CONSTRUCTOR TO ACCEPT @override
"""

28
vendor/mo_dots/utils.py поставляемый
Просмотреть файл

@ -14,9 +14,17 @@ from __future__ import unicode_literals
import importlib
import sys
from mo_future import PY2
_Log = None
if PY2:
STDOUT = sys.stdout
STDERR = sys.stderr
else:
STDOUT = sys.stdout.buffer
STDERR = sys.stderr.buffer
def get_logger():
global _Log
@ -31,6 +39,7 @@ def get_logger():
return _Log
def get_module(name):
try:
return importlib.import_module(name)
@ -39,16 +48,19 @@ def get_module(name):
class PoorLogger(object):
def note(self, note, **kwargs):
sys.stdout.write(note+"\n")
@classmethod
def note(cls, note, **kwargs):
STDOUT.write(note.encode('utf8')+b"\n")
def warning(self, note, **kwargs):
sys.stdout.write("WARNING: "+note+"\n")
@classmethod
def warning(cls, note, **kwargs):
STDOUT.write(b"WARNING: " + note.encode('utf8') + b"\n")
def error(self, note, **kwargs):
sys.stderr.write(note)
if "cause" in kwargs:
raise kwargs["cause"]
@classmethod
def error(cls, note, **kwargs):
STDERR.write(note.encode('utf8'))
if str("cause") in kwargs:
raise kwargs[str("cause")]
else:
raise Exception(note)

62
vendor/mo_files/__init__.py поставляемый
Просмотреть файл

@ -9,22 +9,22 @@
#
import base64
import io
import os
import re
import shutil
from datetime import datetime
from mimetypes import MimeTypes
from tempfile import mkdtemp, NamedTemporaryFile
import os
from mo_future import text_type, binary_type
from mo_dots import get_module, coalesce, Null
from mo_future import text_type, binary_type, PY3
from mo_logs import Log, Except
from mo_logs.exceptions import extract_stack
from mo_threads import Thread, Till
mime = MimeTypes()
class File(object):
"""
ASSUMES ALL FILE CONTENT IS UTF8 ENCODED STRINGS
@ -48,27 +48,32 @@ class File(object):
elif isinstance(filename, File):
return
elif isinstance(filename, (binary_type, text_type)):
self.key = None
if filename==".":
self._filename = ""
elif filename.startswith("~"):
home_path = os.path.expanduser("~")
if os.sep == "\\":
home_path = home_path.replace(os.sep, "/")
if home_path.endswith("/"):
home_path = home_path[:-1]
filename = home_path + filename[1::]
self._filename = filename.replace(os.sep, "/") # USE UNIX STANDARD
try:
self.key = None
if filename==".":
self._filename = ""
elif filename.startswith("~"):
home_path = os.path.expanduser("~")
if os.sep == "\\":
home_path = home_path.replace(os.sep, "/")
if home_path.endswith("/"):
home_path = home_path[:-1]
filename = home_path + filename[1::]
self._filename = filename.replace(os.sep, "/") # USE UNIX STANDARD
except Exception as e:
Log.error(u"can not load {{file}}", file=filename, cause=e)
else:
self.key = base642bytearray(filename.key)
self._filename = "/".join(filename.path.split(os.sep)) # USE UNIX STANDARD
try:
self.key = base642bytearray(filename.key)
self._filename = "/".join(filename.path.split(os.sep)) # USE UNIX STANDARD
except Exception as e:
Log.error(u"can not load {{file}}", file=filename.path, cause=e)
while self._filename.find(".../") >= 0:
# LET ... REFER TO GRANDPARENT, .... REFER TO GREAT-GRAND-PARENT, etc...
self._filename = self._filename.replace(".../", "../../")
self.buffering = buffering
if suffix:
self._filename = File.add_suffix(self._filename, suffix)
@ -419,9 +424,15 @@ class File(object):
def copy(cls, from_, to_):
_copy(File(from_), File(to_))
def __data__(self):
return self._filename
def __unicode__(self):
return self.abspath
def __str__(self):
return self.abspath
class TempDirectory(File):
"""
@ -469,11 +480,18 @@ def _copy(from_, to_):
File.new_instance(to_).write_bytes(File.new_instance(from_).read_bytes())
def base642bytearray(value):
if value == None:
return bytearray("")
else:
return bytearray(base64.b64decode(value))
if PY3:
def base642bytearray(value):
if value == None:
return bytearray(b"")
else:
return bytearray(base64.b64decode(value))
else:
def base642bytearray(value):
if value == None:
return bytearray(b"")
else:
return bytearray(base64.b64decode(value))
def datetime2string(value, format="%Y-%m-%d %H:%M:%S"):

Просмотреть файл

@ -1,45 +1,18 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
# REMOVED SO LOGIC SWITCHES FROM BYTES TO STRING BETWEEN PY2 AND PY3 RESPECTIVELY
# from __future__ import unicode_literals
from collections import Mapping
from mo_dots import wrap, Data
from mo_future import text_type, PY3
from mo_future import urlparse
_value2json = None
_json2value = None
_Log = None
def _late_import():
global _value2json
global _json2value
global _Log
from mo_json import value2json as value2json_
from mo_json import json2value as json2value_
from mo_logs import Log as _Log
if PY3:
_value2json = value2json_
_json2value = json2value_
else:
_value2json = lambda v: value2json_(v).encode('latin1')
_json2value = lambda v: json2value_(v.decode('latin1'))
_ = _Log
from mo_dots import wrap, Data, coalesce, Null
from mo_future import urlparse, text_type, PY2, unichr
from mo_json import value2json, json2value
from mo_logs import Log
class URL(object):
@ -49,37 +22,34 @@ class URL(object):
[1] https://docs.python.org/3/library/urllib.parse.html
"""
def __init__(self, value):
def __init__(self, value, port=None, path=None, query=None, fragment=None):
try:
self.scheme = None
self.host = None
self.port = None
self.path = ""
self.query = ""
self.fragment = ""
self.port = port
self.path = path
self.query = query
self.fragment = fragment
if value == None:
return
if value.startswith("file://") or value.startswith("//"):
# urlparse DOES NOT WORK IN THESE CASES
scheme, suffix = value.split("//", 1)
scheme, suffix = value.split("//", 2)
self.scheme = scheme.rstrip(":")
parse(self, suffix, 0, 1)
self.query = wrap(url_param2value(self.query))
else:
output = urlparse(value)
self.scheme = output.scheme
self.port = output.port
self.port = coalesce(port, output.port)
self.host = output.netloc.split(":")[0]
self.path = output.path
self.query = wrap(url_param2value(output.query))
self.fragment = output.fragment
self.path = coalesce(path, output.path)
self.query = coalesce(query, wrap(url_param2value(output.query)))
self.fragment = coalesce(fragment, output.fragment)
except Exception as e:
if not _Log:
_late_import()
_Log.error(u"problem parsing {{value}} to URL", value=value, cause=e)
Log.error(u"problem parsing {{value}} to URL", value=value, cause=e)
def __nonzero__(self):
if self.scheme or self.host or self.port or self.path or self.query or self.fragment:
@ -91,19 +61,39 @@ class URL(object):
return True
return False
def __truediv__(self, other):
if not isinstance(other, text_type):
Log.error(u"Expecting text path")
output = self.__copy__()
output.path = output.path.rstrip('/') + "/" + other.lstrip('/')
return output
def __unicode__(self):
return self.__str__().decode('utf8') # ASSUME chr<128 ARE VALID UNICODE
def __copy__(self):
output = URL(None)
output.scheme = self.scheme
output.host = self.host
output.port = self.port
output.path = self.path
output.query = self.query
output.fragment = self.fragment
return output
def __data__(self):
return str(self)
def __str__(self):
url = ""
if self.host:
url = self.host
if self.scheme:
url = self.scheme + "://" + url
url = self.scheme + "://"+url
if self.port:
url = url + ":" + str(self.port)
if self.path:
if self.path[0] == "/":
if self.path[0] == text_type("/"):
url += str(self.path)
else:
url += "/" + str(self.path)
@ -114,16 +104,27 @@ class URL(object):
return url
def int_to_hex(value, size):
def int2hex(value, size):
return (("0" * size) + hex(value)[2:])[-size:]
_str_to_url = {chr(i): chr(i) for i in range(32, 128)}
for c in " {}<>;/?:@&=+$,":
_str_to_url[c] = "%" + int_to_hex(ord(c), 2)
for i in range(128, 256):
_str_to_url[chr(i)] = "%" + int_to_hex(i, 2)
_url_to_str = {v: k for k, v in _str_to_url.items()}
def hex2chr(hex):
return unichr(int(hex, 16))
if PY2:
_map2url = {chr(i): chr(i) for i in range(32, 128)}
for c in " {}<>;/?:@&=+$,":
_map2url[c] = "%" + str(int2hex(ord(c), 2))
for i in range(128, 256):
_map2url[chr(i)] = "%" + str(int2hex(i, 2))
else:
_map2url = {i: unichr(i) for i in range(32, 128)}
for c in b" {}<>;/?:@&=+$,":
_map2url[c] = "%" + int2hex(c, 2)
for i in range(128, 256):
_map2url[i] = "%" + str(int2hex(i, 2))
names = ["path", "query", "fragment"]
indicator = ["/", "?", "#"]
@ -146,30 +147,33 @@ def url_param2value(param):
"""
CONVERT URL QUERY PARAMETERS INTO DICT
"""
if param == None:
return Null
if param == None:
return Null
def _decode(v):
output = []
i = 0
while i < len(v):
c = v[i]
if c == "%":
d = _url_to_str[v[i:i + 3]]
d = hex2chr(v[i + 1:i + 3])
output.append(d)
i += 3
else:
output.append(c)
i += 1
output = ("".join(output))
output = text_type("".join(output))
try:
if not _Log:
_late_import()
return _json2value(output)
return json2value(output)
except Exception:
pass
return output
query = Data()
for p in param.split("&"):
for p in param.split('&'):
if not p:
continue
if p.find("=") == -1:
@ -190,27 +194,24 @@ def url_param2value(param):
return query
def value2url_param(value):
"""
:param value:
:return: ascii URL
"""
if not _Log:
_late_import()
if value == None:
_Log.error(u"Can not encode None into a URL")
Log.error("Can not encode None into a URL")
if isinstance(value, Mapping):
value_ = wrap(value)
output = "&".join([
value2url_param(k) + "=" + (value2url_param(v) if isinstance(v, text_type) else value2url_param(_value2json(v)))
value2url_param(k) + "=" + (value2url_param(v) if isinstance(v, text_type) else value2url_param(value2json(v)))
for k, v in value_.leaves()
])
elif isinstance(value, text_type):
output = "".join(_str_to_url[c] for c in value)
output = "".join(_map2url[c] for c in value.encode('utf8'))
elif isinstance(value, str):
output = "".join(_map2url[c] for c in value)
elif hasattr(value, "__iter__"):
output = ",".join(value2url_param(v) for v in value)
else:

84
vendor/mo_future/__init__.py поставляемый
Просмотреть файл

@ -14,19 +14,30 @@ from __future__ import unicode_literals
import json
import sys
PY3 = sys.version_info[0] == 3
PY2 = sys.version_info[0] == 2
PYPY = False
try:
import __pypy__ as _
PYPY=True
except Exception:
PYPY=False
none_type = type(None)
boolean_type = type(True)
if PY3:
import itertools
import collections
from functools import cmp_to_key
from configparser import ConfigParser
from itertools import zip_longest
izip = zip
zip_longest = itertools.zip_longest
text_type = str
string_types = str
@ -55,6 +66,9 @@ if PY3:
from io import BytesIO
from _thread import allocate_lock, get_ident, start_new_thread, interrupt_main
def items(d):
return list(d.items())
def iteritems(d):
return d.items()
@ -90,7 +104,10 @@ if PY3:
sort_keys=True # <-- IMPORTANT! sort_keys==True
).encode
UserDict = collections.UserDict
else:
import collections
import __builtin__
from types import GeneratorType
from ConfigParser import ConfigParser
@ -117,6 +134,9 @@ else:
from io import BytesIO
from thread import allocate_lock, get_ident, start_new_thread, interrupt_main
def items(d):
return d.items()
def iteritems(d):
return d.iteritems()
@ -154,3 +174,67 @@ else:
sort_keys=True # <-- IMPORTANT! sort_keys==True
).encode
# COPIED FROM Python's collections.UserDict (copied July 2018)
class UserDict(collections.MutableMapping):
# Start by filling-out the abstract methods
def __init__(*args, **kwargs):
if not args:
raise TypeError("descriptor '__init__' of 'UserDict' object "
"needs an argument")
self, args = args[0], args[1:]
if len(args) > 1:
raise TypeError('expected at most 1 arguments, got %d' % len(args))
if args:
dict = args[0]
elif 'dict' in kwargs:
dict = kwargs.pop('dict')
import warnings
warnings.warn("Passing 'dict' as keyword argument is deprecated",
DeprecationWarning, stacklevel=2)
else:
dict = None
self.data = {}
if dict is not None:
self.update(dict)
if len(kwargs):
self.update(kwargs)
def __len__(self): return len(self.data)
def __getitem__(self, key):
if key in self.data:
return self.data[key]
if hasattr(self.__class__, "__missing__"):
return self.__class__.__missing__(self, key)
raise KeyError(key)
def __setitem__(self, key, item): self.data[key] = item
def __delitem__(self, key): del self.data[key]
def __iter__(self):
return iter(self.data)
# Modify __contains__ to work correctly when __missing__ is present
def __contains__(self, key):
return key in self.data
# Now, add the methods in dicts but not in MutableMapping
def __repr__(self): return repr(self.data)
def copy(self):
if self.__class__ is UserDict:
return UserDict(self.data.copy())
import copy
data = self.data
try:
self.data = {}
c = copy.copy(self)
finally:
self.data = data
c.update(self)
return c
@classmethod
def fromkeys(cls, iterable, value=None):
d = cls()
for key in iterable:
d[key] = value
return d

208
vendor/mo_hg/cache.py поставляемый Normal file
Просмотреть файл

@ -0,0 +1,208 @@
# encoding: utf-8
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import division
from __future__ import unicode_literals
import json
from flask import Response
from mo_dots import coalesce
from mo_files.url import URL
from mo_future import text_type, xrange
from mo_json import value2json
from mo_kwargs import override
from mo_logs import Log
from mo_threads import Lock, Signal, Queue, Thread, Till
from mo_times import Date, SECOND, MINUTE
from pyLibrary.env import http
from pyLibrary.sql.sqlite import Sqlite, quote_value, quote_list
from mo_hg.rate_logger import RateLogger
APP_NAME = "HG Cache"
CONCURRENCY = 5
AMORTIZATION_PERIOD = SECOND
HG_REQUEST_PER_SECOND = 10
CACHE_RETENTION = 10 * MINUTE
class Cache(object):
"""
For Caching hg.mo requests
"""
@override
def __init__(self, rate=None, amortization_period=None, source=None, database=None, kwargs=None):
self.amortization_period = coalesce(amortization_period, AMORTIZATION_PERIOD)
self.rate = coalesce(rate, HG_REQUEST_PER_SECOND)
self.cache_locker = Lock()
self.cache = {} # MAP FROM url TO (ready, headers, response, timestamp) PAIR
self.no_cache = {} # VERY SHORT TERM CACHE
self.workers = []
self.todo = Queue(APP_NAME+" todo")
self.requests = Queue(APP_NAME + " requests", max=int(self.rate * self.amortization_period.seconds))
self.url = URL(source.url)
self.db = Sqlite(database)
self.inbound_rate = RateLogger("Inbound")
self.outbound_rate = RateLogger("hg.mo")
if not self.db.query("SELECT name FROM sqlite_master WHERE type='table'").data:
with self.db.transaction() as t:
t.execute(
"CREATE TABLE cache ("
" path TEXT PRIMARY KEY, "
" headers TEXT, "
" response TEXT, "
" timestamp REAL "
")"
)
self.threads = [
Thread.run(APP_NAME+" worker" + text_type(i), self._worker)
for i in range(CONCURRENCY)
]
self.limiter = Thread.run(APP_NAME+" limiter", self._rate_limiter)
self.cleaner = Thread.run(APP_NAME+" cleaner", self._cache_cleaner)
def _rate_limiter(self, please_stop):
try:
max_requests = self.requests.max
recent_requests = []
while not please_stop:
now = Date.now()
too_old = now - self.amortization_period
recent_requests = [t for t in recent_requests if t > too_old]
num_recent = len(recent_requests)
if num_recent >= max_requests:
space_free_at = recent_requests[0] + self.amortization_period
(please_stop | Till(till=space_free_at.unix)).wait()
continue
for _ in xrange(num_recent, max_requests):
request = self.todo.pop()
now = Date.now()
recent_requests.append(now)
self.requests.add(request)
except Exception as e:
Log.warning("failure", cause=e)
def _cache_cleaner(self, please_stop):
while not please_stop:
now = Date.now()
too_old = now-CACHE_RETENTION
remove = set()
with self.cache_locker:
for path, (ready, headers, response, timestamp) in self.cache:
if timestamp < too_old:
remove.add(path)
for r in remove:
del self.cache[r]
(please_stop | Till(seconds=CACHE_RETENTION.seconds / 2)).wait()
def please_cache(self, path):
"""
:return: False if `path` is not to be cached
"""
if path.endswith("/tip"):
return False
if any(k in path for k in ["/json-annotate/", "/json-info/", "/json-log/", "/json-rev/", "/rev/", "/raw-rev/", "/raw-file/", "/json-pushes", "/pushloghtml", "/file/"]):
return True
return False
def request(self, method, path, headers):
now = Date.now()
self.inbound_rate.add(now)
ready = Signal(path)
# TEST CACHE
with self.cache_locker:
pair = self.cache.get(path)
if pair is None:
self.cache[path] = (ready, None, None, now)
if pair is not None:
# REQUEST IS IN THE QUEUE ALREADY, WAIT
ready, headers, response, then = pair
if response is None:
ready.wait()
with self.cache_locker:
ready, headers, response, timestamp = self.cache.get(path)
with self.db.transaction() as t:
t.execute("UPDATE cache SET timestamp=" + quote_value(now) + " WHERE path=" + quote_value(path) + " AND timestamp<" + quote_value(now))
return Response(
response,
status=200,
headers=json.loads(headers)
)
# TEST DB
db_response = self.db.query("SELECT headers, response FROM cache WHERE path=" + quote_value(path)).data
if db_response:
headers, response = db_response[0]
with self.db.transaction() as t:
t.execute("UPDATE cache SET timestamp=" + quote_value(now) + " WHERE path=" + quote_value(path) + " AND timestamp<" + quote_value(now))
with self.cache_locker:
self.cache[path] = (ready, headers, response.encode('latin1'), now)
ready.go()
return Response(
response,
status=200,
headers=json.loads(headers)
)
# MAKE A NETWORK REQUEST
self.todo.add((ready, method, path, headers, now))
ready.wait()
with self.cache_locker:
ready, headers, response, timestamp = self.cache[path]
return Response(
response,
status=200,
headers=json.loads(headers)
)
def _worker(self, please_stop):
while not please_stop:
pair = self.requests.pop(till=please_stop)
if please_stop:
break
ready, method, path, req_headers, timestamp = pair
try:
url = self.url / path
self.outbound_rate.add(Date.now())
response = http.request(method, url, req_headers)
del response.headers['transfer-encoding']
resp_headers = value2json(response.headers)
resp_content = response.raw.read()
please_cache = self.please_cache(path)
if please_cache:
with self.db.transaction() as t:
t.execute("INSERT INTO cache (path, headers, response, timestamp) VALUES" + quote_list((path, resp_headers, resp_content.decode('latin1'), timestamp)))
with self.cache_locker:
self.cache[path] = (ready, resp_headers, resp_content, timestamp)
except Exception as e:
Log.warning("problem with request to {{path}}", path=path, cause=e)
with self.cache_locker:
ready, headers, response = self.cache[path]
del self.cache[path]
finally:
ready.go()

10
vendor/mo_hg/hg_branches.py поставляемый
Просмотреть файл

@ -24,6 +24,7 @@ from pyLibrary.env import elasticsearch, http
EXTRA_WAIT_TIME = 20 * SECOND # WAIT TIME TO SEND TO AWS, IF WE wait_forever
OLD_BRANCH = DAY
BRANCH_WHITELIST = None
@override
@ -175,6 +176,15 @@ def _get_single_branch_from_hg(settings, description, dir):
detail.locale = _path[-1]
detail.name = "weave"
if BRANCH_WHITELIST is not None:
found = False
for br in BRANCH_WHITELIST:
if br in str(detail.name):
found = True
break
if not found:
continue
Log.note("Branch {{name}} {{locale}}", name=detail.name, locale=detail.locale)
output.append(detail)
except Exception as e:

7
vendor/mo_hg/hg_mozilla_org.py поставляемый
Просмотреть файл

@ -7,6 +7,7 @@
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
@ -575,10 +576,10 @@ class HgMozillaOrg(object):
json_diff = diff_to_json(diff)
num_changes = _count(c for f in json_diff for c in f.changes)
if json_diff:
if num_changes < MAX_DIFF_SIZE:
return json_diff
elif revision.changeset.description.startswith("merge "):
if revision.changeset.description.startswith("merge "):
return None # IGNORE THE MERGE CHANGESETS
elif num_changes < MAX_DIFF_SIZE:
return json_diff
else:
Log.warning("Revision at {{url}} has a diff with {{num}} changes, ignored", url=url, num=num_changes)
for file in json_diff:

1
vendor/mo_hg/parse.py поставляемый
Просмотреть файл

@ -6,6 +6,7 @@
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals

45
vendor/mo_hg/rate_logger.py поставляемый Normal file
Просмотреть файл

@ -0,0 +1,45 @@
# encoding: utf-8
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from mo_logs import Log
from mo_threads import Till, Thread, Lock
from mo_times import Date, SECOND
METRIC_DECAY_RATE = 0.9 # PER-SECOND DECAY RATE FOR REPORTING REQUEST RATE
METRIC_REPORT_PERIOD = 10 * SECOND
class RateLogger(object):
def __init__(self, name):
self.name = name
self.lock = Lock("rate locker")
self.request_rate = 0.0
self.last_request = Date.now()
Thread.run("rate logger", self._daemon)
def add(self, timestamp):
with self.lock:
decay = METRIC_DECAY_RATE ** (timestamp - self.last_request).seconds
self.request_rate = decay*self.request_rate + 1
self.last_request = timestamp
def _daemon(self, please_stop):
while not please_stop:
timestamp = Date.now()
with self.lock:
decay = METRIC_DECAY_RATE ** (timestamp - self.last_request).seconds
request_rate = self.request_rate = decay * self.request_rate
self.last_request = timestamp
Log.note("{{name}} request rate: {{rate|round(places=2)}} requests per second", name=self.name, rate=request_rate)
(please_stop | Till(seconds=METRIC_REPORT_PERIOD.seconds)).wait()

107
vendor/mo_hg/relay_app.py поставляемый Normal file
Просмотреть файл

@ -0,0 +1,107 @@
# encoding: utf-8
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
import os
import flask
from flask import Flask, Response
from mo_hg.cache import Cache
from mo_json import value2json
from mo_logs import Log, constants, startup, Except
from mo_logs.strings import unicode2utf8
from pyLibrary.env.flask_wrappers import cors_wrapper
APP_NAME = "HG Relay"
class RelayApp(Flask):
def run(self, *args, **kwargs):
# ENSURE THE LOGGING IS CLEANED UP
try:
Flask.run(self, *args, **kwargs)
except BaseException as e: # MUST CATCH BaseException BECAUSE argparse LIKES TO EXIT THAT WAY, AND gunicorn WILL NOT REPORT
Log.warning(APP_NAME + " service shutdown!", cause=e)
finally:
Log.stop()
flask_app = None
config = None
cache = None
@cors_wrapper
def relay_get(path):
try:
return cache.request("get", path, flask.request.headers)
except Exception as e:
e = Except.wrap(e)
Log.warning("could not handle request", cause=e)
return Response(
unicode2utf8(value2json(e, pretty=True)),
status=400,
headers={
"Content-Type": "text/html"
}
)
@cors_wrapper
def relay_post(path):
try:
return cache.request("post", path, flask.request.headers)
except Exception as e:
e = Except.wrap(e)
Log.warning("could not handle request", cause=e)
return Response(
unicode2utf8(value2json(e, pretty=True)),
status=400,
headers={
"Content-Type": "text/html"
}
)
def add(any_flask_app):
global cache
cache = Cache(config.cache)
any_flask_app.add_url_rule(str('/<path:path>'), None, relay_get, methods=[str('GET')])
any_flask_app.add_url_rule(str('/<path:path>'), None, relay_post, methods=[str('POST')])
any_flask_app.add_url_rule(str('/'), None, relay_get, methods=[str('GET')])
any_flask_app.add_url_rule(str('/'), None, relay_post, methods=[str('POST')])
if __name__ in ("__main__",):
Log.note("Starting " + APP_NAME + " Service App...")
flask_app = RelayApp(__name__)
try:
config = startup.read_settings(
filename=os.environ.get('HG_RELAY_CONFIG')
)
constants.set(config.constants)
Log.start(config.debug)
add(flask_app)
Log.note("Started " + APP_NAME + " Service")
except BaseException as e: # MUST CATCH BaseException BECAUSE argparse LIKES TO EXIT THAT WAY, AND gunicorn WILL NOT REPORT
try:
Log.error("Serious problem with " + APP_NAME + " service construction! Shutdown!", cause=e)
finally:
Log.stop()
if config.flask:
if config.flask.port and config.args.process_num:
config.flask.port += config.args.process_num
Log.note("Running Flask...")
flask_app.run(**config.flask)

1
vendor/mo_hg/repos/changesets.py поставляемый
Просмотреть файл

@ -7,6 +7,7 @@
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals

1
vendor/mo_hg/repos/pushs.py поставляемый
Просмотреть файл

@ -7,6 +7,7 @@
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals

1
vendor/mo_hg/repos/revisions.py поставляемый
Просмотреть файл

@ -7,6 +7,7 @@
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals

4
vendor/mo_json/__init__.py поставляемый
Просмотреть файл

@ -17,7 +17,7 @@ from collections import Mapping
from datetime import date, timedelta, datetime
from decimal import Decimal
from mo_dots import FlatList, NullType, Data, wrap_leaves, wrap, Null
from mo_dots import FlatList, NullType, Data, wrap_leaves, wrap, Null, SLOT
from mo_dots.objects import DataObject
from mo_future import text_type, none_type, long, binary_type, PY2
from mo_logs import Except, strings, Log
@ -158,7 +158,7 @@ def _scrub(value, is_done, stack, scrub_text, scrub_number):
elif type_ is Decimal:
return scrub_number(value)
elif type_ is Data:
return _scrub(_get(value, '_dict'), is_done, stack, scrub_text, scrub_number)
return _scrub(_get(value, SLOT), is_done, stack, scrub_text, scrub_number)
elif isinstance(value, Mapping):
_id = id(value)
if _id in is_done:

34
vendor/mo_json/encoder.py поставляемый
Просмотреть файл

@ -13,7 +13,6 @@ from __future__ import unicode_literals
import json
import math
import sys
import time
from collections import Mapping
from datetime import datetime, date, timedelta
@ -21,11 +20,12 @@ from decimal import Decimal
from json.encoder import encode_basestring
from math import floor
from mo_dots import Data, FlatList, NullType, Null
from mo_future import text_type, binary_type, long, utf8_json_encoder, sort_using_key, xrange
from mo_dots import Data, FlatList, NullType, Null, SLOT
from mo_future import text_type, binary_type, long, utf8_json_encoder, sort_using_key, xrange, PYPY
from mo_json import ESCAPE_DCT, scrub, float2json
from mo_logs import Except
from mo_logs.strings import utf82unicode, quote
from mo_times import Timer
from mo_times.dates import Date
from mo_times.durations import Duration
@ -43,8 +43,6 @@ _ = Except
# 2) WHEN USING PYPY, WE USE CLEAR-AND-SIMPLE PROGRAMMING SO THE OPTIMIZER CAN DO
# ITS JOB. ALONG WITH THE UnicodeBuilder WE GET NEAR C SPEEDS
use_pypy = False
COMMA = u","
QUOTE = u'"'
COLON = u":"
@ -54,20 +52,10 @@ COMMA_QUOTE = COMMA + QUOTE
PRETTY_COMMA = u", "
PRETTY_COLON = u": "
try:
if PYPY:
# UnicodeBuilder IS ABOUT 2x FASTER THAN list()
from __pypy__.builders import UnicodeBuilder
use_pypy = True
except Exception as e:
if use_pypy:
sys.stdout.write(
b"*********************************************************\n"
b"** The PyLibrary JSON serializer for PyPy is in use!\n"
b"** Currently running CPython: This will run sloooow!\n"
b"*********************************************************\n"
)
else:
class UnicodeBuilder(list):
def __init__(self, length=None):
list.__init__(self)
@ -121,8 +109,10 @@ class cPythonJSONEncoder(object):
return pretty_json(value)
try:
scrubbed = scrub(value)
return text_type(self.encoder(scrubbed))
with Timer("scrub", too_long=0.1):
scrubbed = scrub(value)
with Timer("encode", too_long=0.1):
return text_type(self.encoder(scrubbed))
except Exception as e:
from mo_logs.exceptions import Except
from mo_logs import Log
@ -184,7 +174,7 @@ def _value2json(value, _buffer):
_dict2json(value, _buffer)
return
elif type is Data:
d = _get(value, "_dict") # MIGHT BE A VALUE NOT A DICT
d = _get(value, SLOT) # MIGHT BE A VALUE NOT A DICT
_value2json(d, _buffer)
return
elif type in (int, long, Decimal):
@ -287,7 +277,7 @@ def pretty_json(value):
return "true"
elif isinstance(value, Mapping):
try:
items = sort_using_key(list(value.items()), lambda r: r[0])
items = sort_using_key(value.items(), lambda r: r[0])
values = [encode_basestring(k) + PRETTY_COLON + indent(pretty_json(v)).strip() for k, v in items if v != None]
if not values:
return "{}"
@ -509,7 +499,7 @@ def unicode_key(key):
# OH HUM, cPython with uJSON, OR pypy WITH BUILTIN JSON?
# http://liangnuren.wordpress.com/2012/08/13/python-json-performance/
# http://morepypy.blogspot.ca/2011/10/speeding-up-json-encoding-in-pypy.html
if use_pypy:
if PYPY:
json_encoder = pypy_json_encode
else:
# from ujson import dumps as ujson_dumps

125
vendor/mo_json/typed_encoder.py поставляемый
Просмотреть файл

@ -11,22 +11,20 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
import time
from collections import Mapping
from datetime import date, datetime, timedelta
from decimal import Decimal
from json.encoder import encode_basestring
from datetime import date, datetime, timedelta
import time
from jx_base import Column, python_type_to_json_type, NESTED, EXISTS, STRING, NUMBER, INTEGER, BOOLEAN
from mo_dots import Data, FlatList, NullType, join_field, split_field
from mo_future import text_type, binary_type, sort_using_key
from mo_dots import Data, FlatList, NullType, join_field, split_field, _get, SLOT, DataObject
from mo_future import text_type, binary_type, sort_using_key, long, PY2, none_type, generator_types
from mo_json import ESCAPE_DCT, float2json
from mo_json.encoder import UnicodeBuilder, COLON, COMMA, problem_serializing, json_encoder
from mo_logs import Log
from mo_logs.strings import quote, utf82unicode
from mo_times import Date, Duration
from mo_json import ESCAPE_DCT, float2json
from mo_json.encoder import UnicodeBuilder, COLON, COMMA, problem_serializing, json_encoder
def encode_property(name):
return name.replace(",", "\\,").replace(".", ",")
@ -55,23 +53,59 @@ def unnest_path(encoded):
def untyped(value):
return _untype(value)
return _untype_value(value)
def _untype(value):
if isinstance(value, Mapping):
output = {}
def _untype_list(value):
if any(isinstance(v, Mapping) for v in value):
# MAY BE MORE TYPED OBJECTS IN THIS LIST
output = [_untype_value(v) for v in value]
else:
# LIST OF PRIMITIVE VALUES
output = value
for k, v in value.items():
if len(output) == 0:
return None
elif len(output) == 1:
return output[0]
else:
return output
def _untype_dict(value):
output = {}
for k, v in value.items():
if k.startswith(TYPE_PREFIX):
if k == EXISTS_TYPE:
continue
elif k.startswith(TYPE_PREFIX):
return v
elif k == NESTED_TYPE:
return _untype_list(v)
else:
output[decode_property(k)] = _untype(v)
return output
elif isinstance(value, list):
return [_untype(v) for v in value]
return v
else:
new_v = _untype_value(v)
if new_v is not None:
output[decode_property(k)] = new_v
return output
def _untype_value(value):
_type = _get(value, "__class__")
if _type is Data:
return _untype_dict(_get(value, SLOT))
elif _type is dict:
return _untype_dict(value)
elif _type is FlatList:
return _untype_list(value.list)
elif _type is list:
return _untype_list(value)
elif _type is NullType:
return None
elif _type is DataObject:
return _untype_value(_get(value, "_obj"))
elif _type in generator_types:
return _untype_list(value)
else:
return value
@ -90,7 +124,7 @@ def encode(value):
def typed_encode(value, sub_schema, path, net_new_properties, buffer):
"""
:param value: THE DATASCRUTURE TO ENCODE
:param value: THE DATA STRUCTURE TO ENCODE
:param sub_schema: dict FROM PATH TO Column DESCRIBING THE TYPE
:param path: list OF CURRENT PATH
:param net_new_properties: list FOR ADDING NEW PROPERTIES NOT FOUND IN sub_schema
@ -98,7 +132,8 @@ def typed_encode(value, sub_schema, path, net_new_properties, buffer):
:return:
"""
try:
if isinstance(sub_schema, Column):
# from jx_base import Column
if sub_schema.__class__.__name__=='Column':
value_json_type = python_type_to_json_type[value.__class__]
column_json_type = es_type_to_json_type[sub_schema.es_type]
@ -135,7 +170,7 @@ def typed_encode(value, sub_schema, path, net_new_properties, buffer):
_type = value.__class__
if _type in (dict, Data):
if isinstance(sub_schema, Column):
if sub_schema.__class__.__name__ == 'Column':
from mo_logs import Log
Log.error("Can not handle {{column|json}}", column=sub_schema)
@ -191,16 +226,16 @@ def typed_encode(value, sub_schema, path, net_new_properties, buffer):
for c in value:
append(buffer, ESCAPE_DCT.get(c, c))
append(buffer, '"}')
elif _type in (int, long, Decimal):
elif _type in (int, long):
if NUMBER_TYPE not in sub_schema:
sub_schema[NUMBER_TYPE] = True
net_new_properties.append(path + [NUMBER_TYPE])
append(buffer, '{')
append(buffer, QUOTED_NUMBER_TYPE)
append(buffer, float2json(value))
append(buffer, text_type(value))
append(buffer, '}')
elif _type is float:
elif _type in (float, Decimal):
if NUMBER_TYPE not in sub_schema:
sub_schema[NUMBER_TYPE] = True
net_new_properties.append(path + [NUMBER_TYPE])
@ -362,7 +397,7 @@ def _dict2json(value, sub_schema, path, net_new_properties, buffer):
if k not in sub_schema:
sub_schema[k] = {}
net_new_properties.append(path + [k])
append(buffer, encode_basestring(k))
append(buffer, encode_basestring(encode_property(k)))
append(buffer, COLON)
typed_encode(v, sub_schema[k], path + [k], net_new_properties, buffer)
if prefix is COMMA:
@ -372,7 +407,43 @@ def _dict2json(value, sub_schema, path, net_new_properties, buffer):
else:
append(buffer, '{')
append(buffer, QUOTED_EXISTS_TYPE)
append(buffer, '0}')
append(buffer, '1}')
IS_NULL = '0'
BOOLEAN = 'boolean'
INTEGER = 'integer'
NUMBER = 'number'
STRING = 'string'
OBJECT = 'object'
NESTED = "nested"
EXISTS = "exists"
JSON_TYPES = [BOOLEAN, INTEGER, NUMBER, STRING, OBJECT]
PRIMITIVE = [EXISTS, BOOLEAN, INTEGER, NUMBER, STRING]
STRUCT = [EXISTS, OBJECT, NESTED]
python_type_to_json_type = {
int: NUMBER,
text_type: STRING,
float: NUMBER,
None: OBJECT,
bool: BOOLEAN,
NullType: OBJECT,
none_type: OBJECT,
Data: OBJECT,
dict: OBJECT,
object: OBJECT,
Mapping: OBJECT,
list: NESTED,
FlatList: NESTED,
Date: NUMBER
}
if PY2:
python_type_to_json_type[str] = STRING
python_type_to_json_type[long] = NUMBER
TYPE_PREFIX = "~" # u'\u0442\u0443\u0440\u0435-' # "туре"

29
vendor/mo_json_config/__init__.py поставляемый
Просмотреть файл

@ -18,19 +18,28 @@ from collections import Mapping
import mo_dots
from mo_dots import set_default, wrap, unwrap
from mo_files import File
from mo_files.url import URL
from mo_future import text_type
from mo_json import json2value
from mo_json_config.convert import ini2value
from mo_logs import Log, Except
from mo_logs.url import URL
DEBUG = False
def get_file(file):
file = File(file)
if os.sep=="\\":
return get("file:///" + file.abspath)
else:
return get("file://" + file.abspath)
def get(url):
"""
USE json.net CONVENTIONS TO LINK TO INLINE OTHER JSON
"""
url = str(url)
url = text_type(url)
if url.find("://") == -1:
Log.error("{{url}} must have a prototcol (eg http://) declared", url=url)
@ -114,6 +123,8 @@ def _replace_ref(node, url):
if not output:
output = new_value
elif isinstance(output, text_type):
Log.error("Can not handle set_default({{output}},{{new_value}})", output=output, new_value=new_value)
else:
output = unwrap(set_default(output, new_value))
@ -181,7 +192,7 @@ def _replace_locals(node, doc_path):
## SCHEME LOADERS ARE BELOW THIS LINE
###############################################################################
def get_file(ref, url):
def _get_file(ref, url):
if ref.path.startswith("~"):
home_path = os.path.expanduser("~")
@ -233,17 +244,17 @@ def get_http(ref, url):
return new_value
def get_env(ref, url):
def _get_env(ref, url):
# GET ENVIRONMENT VARIABLES
ref = ref.host
try:
new_value = json2value(os.environ[ref])
except Exception as e:
new_value = os.environ[ref]
new_value = os.environ.get(ref)
return new_value
def get_param(ref, url):
def _get_param(ref, url):
# GET PARAMETERS FROM url
param = url.query
new_value = param[ref.host]
@ -252,8 +263,8 @@ def get_param(ref, url):
scheme_loaders = {
"http": get_http,
"file": get_file,
"env": get_env,
"param": get_param
"file": _get_file,
"env": _get_env,
"param": _get_param
}

180
vendor/mo_json_config/url.py поставляемый
Просмотреть файл

@ -1,180 +0,0 @@
from collections import Mapping
from urlparse import urlparse
from mo_dots import wrap, Data
from mo_json import value2json, json2value
from mo_logs import Log
class URL(object):
"""
JUST LIKE urllib.parse() [1], BUT CAN HANDLE JSON query PARAMETERS
[1] https://docs.python.org/3/library/urllib.parse.html
"""
def __init__(self, value):
try:
self.scheme = None
self.host = None
self.port = None
self.path = ""
self.query = ""
self.fragment = ""
if value == None:
return
if value.startswith("file://") or value.startswith("//"):
# urlparse DOES NOT WORK IN THESE CASES
scheme, suffix = value.split("//", 2)
self.scheme = scheme.rstrip(":")
parse(self, suffix, 0, 1)
self.query = wrap(url_param2value(self.query))
else:
output = urlparse(value)
self.scheme = output.scheme
self.port = output.port
self.host = output.netloc.split(":")[0]
self.path = output.path
self.query = wrap(url_param2value(output.query))
self.fragment = output.fragment
except Exception as e:
Log.error("problem parsing {{value}} to URL", value=value, cause=e)
def __nonzero__(self):
if self.scheme or self.host or self.port or self.path or self.query or self.fragment:
return True
return False
def __bool__(self):
if self.scheme or self.host or self.port or self.path or self.query or self.fragment:
return True
return False
def __unicode__(self):
return self.__str__().decode('utf8') # ASSUME chr<128 ARE VALID UNICODE
def __str__(self):
url = b""
if self.host:
url = self.host
if self.scheme:
url = self.scheme + b"://"+url
if self.port:
url = url + b":" + str(self.port)
if self.path:
if self.path[0]=="/":
url += str(self.path)
else:
url += b"/"+str(self.path)
if self.query:
url = url + b'?' + value2url_param(self.query)
if self.fragment:
url = url + b'#' + value2url_param(self.fragment)
return url
def int2hex(value, size):
return (("0" * size) + hex(value)[2:])[-size:]
_map2url = {chr(i): chr(i) for i in range(32, 128)}
for c in b" {}<>;/?:@&=+$,":
_map2url[c] = b"%" + str(int2hex(ord(c), 2))
for i in range(128, 256):
_map2url[chr(i)] = b"%" + str(int2hex(i, 2))
names = ["path", "query", "fragment"]
indicator = ["/", "?", "#"]
def parse(output, suffix, curr, next):
if next == len(indicator):
output.__setattr__(names[curr], suffix)
return
e = suffix.find(indicator[next])
if e == -1:
parse(output, suffix, curr, next + 1)
else:
output.__setattr__(names[curr], suffix[:e:])
parse(output, suffix[e + 1::], next, next + 1)
def url_param2value(param):
"""
CONVERT URL QUERY PARAMETERS INTO DICT
"""
if isinstance(param, text_type):
param = param.encode("ascii")
def _decode(v):
output = []
i = 0
while i < len(v):
c = v[i]
if c == "%":
d = (v[i + 1:i + 3]).decode("hex")
output.append(d)
i += 3
else:
output.append(c)
i += 1
output = (b"".join(output)).decode("latin1")
try:
return json2value(output)
except Exception:
pass
return output
query = Data()
for p in param.split(b'&'):
if not p:
continue
if p.find(b"=") == -1:
k = p
v = True
else:
k, v = p.split(b"=")
v = _decode(v)
u = query.get(k)
if u is None:
query[k] = v
elif isinstance(u, list):
u += [v]
else:
query[k] = [u, v]
return query
def value2url_param(value):
"""
:param value:
:return: ascii URL
"""
if value == None:
Log.error("Can not encode None into a URL")
if isinstance(value, Mapping):
value_ = wrap(value)
output = b"&".join([
value2url_param(k) + b"=" + (value2url_param(v) if isinstance(v, text_type) else value2url_param(value2json(v)))
for k, v in value_.leaves()
])
elif isinstance(value, text_type):
output = b"".join(_map2url[c] for c in value.encode('utf8'))
elif isinstance(value, str):
output = b"".join(_map2url[c] for c in value)
elif hasattr(value, "__iter__"):
output = b",".join(value2url_param(v) for v in value)
else:
output = str(value)
return output

98
vendor/mo_kwargs/__init__.py поставляемый
Просмотреть файл

@ -13,9 +13,8 @@ from __future__ import unicode_literals
from collections import Mapping
from mo_future import text_type, get_function_arguments, get_function_defaults, get_function_name
from mo_dots import zip as dict_zip, get_logger, wrap
from mo_future import text_type, get_function_arguments, get_function_defaults, get_function_name
from mo_logs import Except
@ -34,77 +33,88 @@ def override(func):
3) DEFAULT VALUES ASSIGNED IN FUNCTION DEFINITION
"""
func_name = get_function_name(func)
params = get_function_arguments(func)
if not get_function_defaults(func):
defaults = {}
else:
defaults = {k: v for k, v in zip(reversed(params), reversed(get_function_defaults(func)))}
def raise_error(e, packed):
err = text_type(e)
e = Except.wrap(e)
if err.startswith(func_name) and ("takes at least" in err or "required positional argument" in err):
missing = [p for p in params if str(p) not in packed]
given = [p for p in params if str(p) in packed]
get_logger().error(
"Problem calling {{func_name}}: Expecting parameter {{missing}}, given {{given}}",
func_name=func_name,
missing=missing,
given=given,
stack_depth=2
)
get_logger().error("Error dispatching call", e)
if "kwargs" not in params:
# WE ASSUME WE ARE ONLY ADDING A kwargs PARAMETER TO SOME REGULAR METHOD
def w_settings(*args, **kwargs):
def wo_kwargs(*args, **kwargs):
settings = kwargs.get("kwargs")
params = get_function_arguments(func)
if not get_function_defaults(func):
defaults = {}
else:
defaults = {k: v for k, v in zip(reversed(params), reversed(get_function_defaults(func)))}
ordered_params = dict(zip(params, args))
packed = params_pack(params, ordered_params, kwargs, settings, defaults)
try:
return func(**packed)
except TypeError as e:
raise_error(e, packed)
return wo_kwargs
return func(**params_pack(params, ordered_params, kwargs, settings, defaults))
return w_settings
def wrapper(*args, **kwargs):
try:
func_name = get_function_name(func)
if func_name in ("__init__", "__new__") and "kwargs" in kwargs:
elif func_name in ("__init__", "__new__"):
def w_constructor(*args, **kwargs):
if "kwargs" in kwargs:
packed = params_pack(params, kwargs, dict_zip(params[1:], args[1:]), kwargs["kwargs"], defaults)
return func(args[0], **packed)
elif func_name in ("__init__", "__new__") and len(args) == 2 and len(kwargs) == 0 and isinstance(args[1], Mapping):
elif len(args) == 2 and len(kwargs) == 0 and isinstance(args[1], Mapping):
# ASSUME SECOND UNNAMED PARAM IS kwargs
packed = params_pack(params, args[1], defaults)
return func(args[0], **packed)
elif func_name in ("__init__", "__new__"):
else:
# DO NOT INCLUDE self IN kwargs
packed = params_pack(params, kwargs, dict_zip(params[1:], args[1:]), defaults)
try:
return func(args[0], **packed)
elif params[0] == "self" and "kwargs" in kwargs:
packed = params_pack(params, kwargs, dict_zip(params[1:], args[1:]), kwargs["kwargs"], defaults)
return func(args[0], **packed)
elif params[0] == "self" and len(args) == 2 and len(kwargs) == 0 and isinstance(args[1], Mapping):
except TypeError as e:
raise_error(e, packed)
return w_constructor
elif params[0] == "self":
def w_bound_method(*args, **kwargs):
if len(args) == 2 and len(kwargs) == 0 and isinstance(args[1], Mapping):
# ASSUME SECOND UNNAMED PARAM IS kwargs
packed = params_pack(params, args[1], defaults)
return func(args[0], **packed)
elif params[0] == "self":
elif "kwargs" in kwargs and isinstance(kwargs["kwargs"], Mapping):
# PUT args INTO kwargs
packed = params_pack(params, kwargs, dict_zip(params[1:], args[1:]), kwargs["kwargs"], defaults)
else:
packed = params_pack(params, kwargs, dict_zip(params[1:], args[1:]), defaults)
try:
return func(args[0], **packed)
elif len(args) == 1 and len(kwargs) == 0 and isinstance(args[0], Mapping):
# ASSUME SINGLE PARAMETER IS A SETTING
except TypeError as e:
raise_error(e, packed)
return w_bound_method
else:
def w_kwargs(*args, **kwargs):
if len(args) == 1 and len(kwargs) == 0 and isinstance(args[0], Mapping):
# ASSUME SINGLE PARAMETER IS kwargs
packed = params_pack(params, args[0], defaults)
return func(**packed)
elif "kwargs" in kwargs and isinstance(kwargs["kwargs"], Mapping):
# PUT args INTO kwargs
packed = params_pack(params, kwargs, dict_zip(params, args), kwargs["kwargs"], defaults)
return func(**packed)
else:
# PULL kwargs OUT INTO PARAMS
packed = params_pack(params, kwargs, dict_zip(params, args), defaults)
try:
return func(**packed)
except TypeError as e:
e = Except.wrap(e)
if e.message.startswith(func_name) and "takes at least" in e:
missing = [p for p in params if str(p) not in packed]
get_logger().error(
"Problem calling {{func_name}}: Expecting parameter {{missing}}, given {{given}}",
func_name=func_name,
missing=missing,
given=packed.keys(),
stack_depth=1
)
get_logger().error("Error dispatching call", e)
return wrapper
except TypeError as e:
raise_error(e, packed)
return w_kwargs
def params_pack(params, *args):

102
vendor/mo_logs/__init__.py поставляемый
Просмотреть файл

@ -13,18 +13,23 @@ from __future__ import unicode_literals
import os
import platform
import sys
from collections import Mapping
from datetime import datetime
import sys
from mo_dots import coalesce, listwrap, wrap, unwrap, unwraplist, set_default, FlatList
from mo_future import text_type, PY3, iteritems
from mo_future import text_type, PY3
from mo_logs import constants
from mo_logs.exceptions import Except, suppress_exception
from mo_logs.strings import indent
_Thread = None
if PY3:
STDOUT = sys.stdout.buffer
else:
STDOUT = sys.stdout
class Log(object):
"""
@ -34,8 +39,6 @@ class Log(object):
main_log = None
logging_multi = None
profiler = None # simple pypy-friendly profiler
cprofiler = None # screws up with pypy, but better than nothing
cprofiler_stats = None
error_mode = False # prevent error loops
@classmethod
@ -53,7 +56,6 @@ class Log(object):
constants - UPDATE MODULE CONSTANTS AT STARTUP (PRIMARILY INTENDED TO CHANGE DEBUG STATE)
"""
global _Thread
if not settings:
return
settings = wrap(settings)
@ -66,40 +68,37 @@ class Log(object):
from mo_threads import Thread as _Thread
_ = _Thread
# ENABLE CPROFILE
if settings.cprofile is False:
settings.cprofile = {"enabled": False}
elif settings.cprofile is True or (isinstance(settings.cprofile, Mapping) and settings.cprofile.enabled):
elif settings.cprofile is True:
if isinstance(settings.cprofile, bool):
settings.cprofile = {"enabled": True, "filename": "cprofile.tab"}
import cProfile
cls.cprofiler = cProfile.Profile()
cls.cprofiler.enable()
if settings.cprofile.enabled:
from mo_threads import profiles
profiles.enable_profilers(settings.cprofile.filename)
if settings.profile is True or (isinstance(settings.profile, Mapping) and settings.profile.enabled):
from mo_logs import profiles
if isinstance(settings.profile, bool):
profiles.ON = True
settings.profile = {"enabled": True, "filename": "profile.tab"}
if settings.profile.enabled:
profiles.ON = True
Log.error("REMOVED 2018-09-02, Activedata revision 3f30ff46f5971776f8ba18")
# from mo_logs import profiles
#
# if isinstance(settings.profile, bool):
# profiles.ON = True
# settings.profile = {"enabled": True, "filename": "profile.tab"}
#
# if settings.profile.enabled:
# profiles.ON = True
if settings.constants:
constants.set(settings.constants)
if settings.log:
cls.logging_multi = StructuredLogger_usingMulti()
from mo_logs.log_usingThread import StructuredLogger_usingThread
cls.main_log = StructuredLogger_usingThread(cls.logging_multi)
for log in listwrap(settings.log):
Log.add_log(Log.new_instance(log))
if settings.cprofile.enabled == True:
Log.alert("cprofiling is enabled, writing to {{filename}}", filename=os.path.abspath(settings.cprofile.filename))
from mo_logs.log_usingThread import StructuredLogger_usingThread
cls.main_log = StructuredLogger_usingThread(cls.logging_multi)
@classmethod
def stop(cls):
@ -108,23 +107,8 @@ class Log(object):
EXECUTING MULUTIPLE TIMES IN A ROW IS SAFE, IT HAS NO NET EFFECT, IT STILL LOGS TO stdout
:return: NOTHING
"""
from mo_threads import profiles
if cls.cprofiler and hasattr(cls, "settings"):
if cls.cprofiler == None:
from mo_threads import Queue
cls.cprofiler_stats = Queue("cprofiler stats") # ACCUMULATION OF STATS FROM ALL THREADS
import pstats
cls.cprofiler_stats.add(pstats.Stats(cls.cprofiler))
write_profile(cls.settings.cprofile, cls.cprofiler_stats.pop_all())
if profiles.ON and hasattr(cls, "settings"):
profiles.write(cls.settings.profile)
cls.main_log.stop()
cls.main_log = StructuredLogger_usingStream(sys.stdout)
main_log, cls.main_log = cls.main_log, StructuredLogger_usingStream(STDOUT)
main_log.stop()
@classmethod
def new_instance(cls, settings):
@ -148,7 +132,10 @@ class Log(object):
return StructuredLogger_usingFile(settings.filename)
if settings.log_type == "console":
from mo_logs.log_usingThreadedStream import StructuredLogger_usingThreadedStream
return StructuredLogger_usingThreadedStream(sys.stdout)
return StructuredLogger_usingThreadedStream(STDOUT)
if settings.log_type == "mozlog":
from mo_logs.log_usingMozLog import StructuredLogger_usingMozLog
return StructuredLogger_usingMozLog(STDOUT, coalesce(settings.app_name, settings.appname))
if settings.log_type == "stream" or settings.stream:
from mo_logs.log_usingThreadedStream import StructuredLogger_usingThreadedStream
return StructuredLogger_usingThreadedStream(settings.stream)
@ -401,7 +388,7 @@ class Log(object):
if add_to_trace:
cause[0].trace.extend(trace[1:])
e = Except(type=exceptions.ERROR, template=template, params=params, cause=cause, trace=trace)
e = Except(type=exceptions.ERROR, template=template, params=params, cause=causes, trace=trace)
raise_from_none(e)
@classmethod
@ -455,31 +442,6 @@ class Log(object):
raise NotImplementedError
def write_profile(profile_settings, stats):
from pyLibrary import convert
from mo_files import File
Log.note("aggregating {{num}} profile stats", num=len(stats))
acc = stats[0]
for s in stats[1:]:
acc.add(s)
stats = [{
"num_calls": d[1],
"self_time": d[2],
"total_time": d[3],
"self_time_per_call": d[2] / d[1],
"total_time_per_call": d[3] / d[1],
"file": (f[0] if f[0] != "~" else "").replace("\\", "/"),
"line": f[1],
"method": f[2].lstrip("<").rstrip(">")
}
for f, d, in iteritems(acc.stats)
]
stats_file = File(profile_settings.filename, suffix=convert.datetime2string(datetime.now(), "_%Y%m%d_%H%M%S"))
stats_file.write(convert.list2tab(stats))
def _same_frame(frameA, frameB):
return (frameA.line, frameA.file) == (frameB.line, frameB.file)
@ -506,5 +468,5 @@ from mo_logs.log_usingStream import StructuredLogger_usingStream
if not Log.main_log:
Log.main_log = StructuredLogger_usingStream(sys.stdout)
Log.main_log = StructuredLogger_usingStream(STDOUT)

2
vendor/mo_logs/exceptions.py поставляемый
Просмотреть файл

@ -211,7 +211,7 @@ class Suppress(object):
self.type = exception_type
def __enter__(self):
pass
return self
def __exit__(self, exc_type, exc_val, exc_tb):
if not exc_val or isinstance(exc_val, self.type):

2
vendor/mo_logs/log_usingLogger.py поставляемый
Просмотреть файл

@ -94,7 +94,7 @@ def make_log_from_settings(settings):
Log.error("Can not find class {{class}}", {"class": path}, cause=e)
# IF WE NEED A FILE, MAKE SURE DIRECTORY EXISTS
if settings.filename:
if settings.filename != None:
from mo_files import File
f = File(settings.filename)

70
vendor/mo_logs/log_usingMozLog.py поставляемый Normal file
Просмотреть файл

@ -0,0 +1,70 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from decimal import Decimal
from mo_dots import wrap
from mo_json import value2json, datetime2unix
from mo_kwargs import override
from mo_logs import Log
from mo_logs.exceptions import ERROR, NOTE, WARNING, ALARM
from mo_logs.log_usingElasticSearch import _deep_json_to_string
from mo_logs.log_usingNothing import StructuredLogger
class StructuredLogger_usingMozLog(StructuredLogger):
"""
WRITE TO MozLog STANDARD FORMAT
https://wiki.mozilla.org/Firefox/Services/Logging
"""
@override
def __init__(self, stream, app_name):
"""
:param stream: MozLog IS A JSON FORMAT, WHICH IS BYTES
:param app_name: MozLog WOULD LIKE TO KNOW WHAT APP IS MAKING THESE LOGS
"""
self.stream = stream
self.app_name = app_name
if not app_name:
Log.error("mozlog expects an `app_name` in the config")
if not Log.trace:
Log.error("mozlog expects trace=True so it get s the information it requires")
def write(self, template, params):
output = {
"Timestamp": (Decimal(datetime2unix(params.timestamp)) * Decimal(1e9)).to_integral_exact(), # NANOSECONDS
"Type": params.template,
"Logger": params.machine.name,
"Hostname": self.app_name,
"EnvVersion": "2.0",
"Severity": severity_map.get(params.context, 3), # https://en.wikipedia.org/wiki/Syslog#Severity_levels
"Pid": params.machine.pid,
"Fields": {
k: _deep_json_to_string(v, 0)
for k, v in wrap(params).leaves()
}
}
self.stream.write(value2json(output).encode('utf8'))
self.stream.write(b'\n')
severity_map = {
ERROR: 3,
WARNING: 4,
ALARM: 5,
NOTE: 6
}
def datatime2decimal(value):
return

2
vendor/mo_logs/log_usingMulti.py поставляемый
Просмотреть файл

@ -29,7 +29,7 @@ class StructuredLogger_usingMulti(StructuredLogger):
m.write(template, params)
except Exception as e:
bad.append(m)
Log.warning("Logger failed! It will be removed: {{type}}", type=m.__class__.__name__, cause=e)
Log.warning("Logger {{type|quote}} failed! It will be removed.", type=m.__class__.__name__, cause=e)
with suppress_exception:
for b in bad:
self.many.remove(b)

11
vendor/mo_logs/log_usingStream.py поставляемый
Просмотреть файл

@ -27,13 +27,8 @@ class StructuredLogger_usingStream(StructuredLogger):
self.flush = stream.flush
if stream in (sys.stdout, sys.stderr):
if PY3:
self.writer = stream.write
else:
self.writer = _UTF8Encoder(stream).write
elif hasattr(stream, 'encoding') and stream.encoding:
self.writer = _UTF8Encoder(stream).write
else:
self.writer = stream.write
stream = stream.buffer
self.writer = _UTF8Encoder(stream).write
except Exception as _:
sys.stderr.write("can not handle")
@ -57,5 +52,5 @@ class _UTF8Encoder(object):
def write(self, v):
try:
self.stream.write(v.encode('utf8'))
except Exception as _:
except Exception:
sys.stderr.write("can not handle")

14
vendor/mo_logs/log_usingThread.py поставляемый
Просмотреть файл

@ -17,6 +17,8 @@ from mo_logs import Log, Except, suppress_exception
from mo_logs.log_usingNothing import StructuredLogger
from mo_threads import Thread, Queue, Till, THREAD_STOP
DEBUG = False
class StructuredLogger_usingThread(StructuredLogger):
@ -30,14 +32,17 @@ class StructuredLogger_usingThread(StructuredLogger):
def worker(logger, please_stop):
try:
while not please_stop:
Till(seconds=1).wait()
(Till(seconds=1) | please_stop).wait()
logs = self.queue.pop_all()
for log in logs:
if log is THREAD_STOP:
please_stop.go()
else:
logger.write(**log)
except Exception as e:
print("problem in " + StructuredLogger_usingThread.__name__ + ": " + str(e))
finally:
Log.note("stop the child")
logger.stop()
self.thread = Thread("Thread for " + self.__class__.__name__, worker, logger)
@ -53,10 +58,13 @@ class StructuredLogger_usingThread(StructuredLogger):
raise e # OH NO!
def stop(self):
with suppress_exception:
Log.warning("Stopping threaded logger")
try:
self.queue.add(THREAD_STOP) # BE PATIENT, LET REST OF MESSAGE BE SENT
self.thread.join()
self.logger.stop()
Log.note("joined on thread")
except Exception as e:
Log.note("problem in threaded logger" + str(e))
with suppress_exception:
self.queue.close()

32
vendor/mo_logs/log_usingThreadedStream.py поставляемый
Просмотреть файл

@ -16,7 +16,8 @@ from __future__ import unicode_literals
import sys
from time import time
from mo_future import text_type, PY2
from mo_dots import Data
from mo_future import text_type, PY3
from mo_logs import Log
from mo_logs.log_usingNothing import StructuredLogger
from mo_logs.strings import expand_template
@ -31,29 +32,24 @@ class StructuredLogger_usingThreadedStream(StructuredLogger):
def __init__(self, stream):
assert stream
use_UTF8 = False
if isinstance(stream, text_type):
if stream.startswith("sys."):
use_UTF8 = True # sys.* ARE OLD AND CAN NOT HANDLE unicode
self.stream = eval(stream)
name = stream
stream = self.stream = eval(stream)
if name.startswith("sys.") and PY3:
self.stream = Data(write=lambda d: stream.write(d.decode('utf8')))
else:
self.stream = stream
name = "stream"
self.stream = stream
# WRITE TO STREAMS CAN BE *REALLY* SLOW, WE WILL USE A THREAD
from mo_threads import Queue
if use_UTF8 and PY2:
def utf8_appender(value):
if isinstance(value, text_type):
value = value.encode('utf8')
self.stream.write(value)
def utf8_appender(value):
if isinstance(value, text_type):
value = value.encode('utf8')
self.stream.write(value)
appender = utf8_appender
else:
appender = self.stream.write
appender = utf8_appender
self.queue = Queue("queue for " + self.__class__.__name__ + "(" + name + ")", max=10000, silent=True)
self.thread = Thread("log to " + self.__class__.__name__ + "(" + name + ")", time_delta_pusher, appender=appender, queue=self.queue, interval=0.3)
@ -93,9 +89,11 @@ def time_delta_pusher(please_stop, appender, queue, interval):
next_run = time() + interval
while not please_stop:
Thread.current().cprofiler.disable()
profiler = Thread.current().cprofiler
profiler.disable()
(Till(till=next_run) | please_stop).wait()
Thread.current().cprofiler.enable()
profiler.enable()
next_run = time() + interval
logs = queue.pop_all()
if not logs:

2
vendor/mo_logs/startup.py поставляемый
Просмотреть файл

@ -83,7 +83,7 @@ def read_settings(filename=None, defs=None):
Log.error("Can not read configuration file {{filename}}", {
"filename": settings_file.abspath
})
settings = mo_json_config.get("file:///" + settings_file.abspath)
settings = mo_json_config.get_file(settings_file)
settings.args = args
return settings

171
vendor/mo_logs/strings.py поставляемый
Просмотреть файл

@ -22,12 +22,11 @@ from datetime import datetime as builtin_datetime
from datetime import timedelta, date
from json.encoder import encode_basestring
import sys
from mo_dots import coalesce, wrap, get_module, Data
from mo_future import text_type, xrange, binary_type, round as _round, PY3, get_function_name, zip_longest
from mo_future import text_type, xrange, binary_type, round as _round, get_function_name, zip_longest, transpose, PY3
from mo_logs.convert import datetime2unix, datetime2string, value2json, milli2datetime, unix2datetime
from mo_logs.url import value2url_param
# from mo_files.url import value2url_param
FORMATTERS = {}
@ -46,7 +45,7 @@ def _late_import():
try:
_json_encoder = get_module("mo_json.encoder").json_encoder
except Exception:
_json_encoder = _json.dumps
_json_encoder = lambda value, pretty: _json.dumps(value)
from mo_logs import Log as _Log
from mo_logs.exceptions import Except as _Except
from mo_times.durations import Duration as _Duration
@ -111,11 +110,17 @@ def unix(value):
return str(datetime2unix(value))
value2url_param = None
@formatter
def url(value):
"""
convert FROM dict OR string TO URL PARAMETERS
"""
global value2url_param
if not value2url_param:
from mo_files.url import value2url_param
return value2url_param(value)
@ -187,7 +192,7 @@ def tab(value):
:return:
"""
if isinstance(value, Mapping):
h, d = zip(*wrap(value).leaves())
h, d = transpose(*wrap(value).leaves())
return (
"\t".join(map(value2json, h)) +
"\n" +
@ -484,16 +489,20 @@ _SNIP = "...<snip>..."
@formatter
def limit(value, length):
# LIMIT THE STRING value TO GIVEN LENGTH, CHOPPING OUT THE MIDDLE IF REQUIRED
if len(value) <= length:
return value
elif length < len(_SNIP) * 2:
return value[0:length]
else:
lhs = int(round((length - len(_SNIP)) / 2, 0))
rhs = length - len(_SNIP) - lhs
return value[:lhs] + _SNIP + value[-rhs:]
try:
# LIMIT THE STRING value TO GIVEN LENGTH, CHOPPING OUT THE MIDDLE IF REQUIRED
if len(value) <= length:
return value
elif length < len(_SNIP) * 2:
return value[0:length]
else:
lhs = int(round((length - len(_SNIP)) / 2, 0))
rhs = length - len(_SNIP) - lhs
return value[:lhs] + _SNIP + value[-rhs:]
except Exception as e:
if not _Duration:
_late_import()
_Log.error("Not expected", cause=e)
@formatter
def split(value, sep="\n"):
@ -742,19 +751,15 @@ def apply_diff(text, diff, reverse=False, verify=True):
+Content Team Engagement & Tasks : https://appreview.etherpad.mozilla.org/40
"""
output = text
if not diff:
return output
start_of_hunk = 0
while True:
if start_of_hunk>=len(diff):
break
header = diff[start_of_hunk]
start_of_hunk += 1
if not header.strip():
continue
return text
output = text
hunks = [
(new_diff[start_hunk], new_diff[start_hunk+1:end_hunk])
for new_diff in [[d.lstrip() for d in diff if d.lstrip() and d != "\\ No newline at end of file"] + ["@@"]] # ANOTHER REPAIR
for start_hunk, end_hunk in pairwise(i for i, l in enumerate(new_diff) if l.startswith('@@'))
]
for header, hunk_body in (reversed(hunks) if reverse else hunks):
matches = DIFF_PREFIX.match(header.strip())
if not matches:
if not _Log:
@ -762,76 +767,86 @@ def apply_diff(text, diff, reverse=False, verify=True):
_Log.error("Can not handle \n---\n{{diff}}\n---\n", diff=diff)
remove = tuple(int(i.strip()) for i in matches.group(1).split(",")) # EXPECTING start_line, length TO REMOVE
remove = Data(start=remove[0], length=1 if len(remove) == 1 else remove[1]) # ASSUME FIRST LINE
add = tuple(int(i.strip()) for i in matches.group(2).split(",")) # EXPECTING start_line, length TO ADD
add = Data(start=add[0], length=1 if len(add) == 1 else add[1])
removes = tuple(int(i.strip()) for i in matches.group(1).split(",")) # EXPECTING start_line, length TO REMOVE
remove = Data(start=removes[0], length=1 if len(removes) == 1 else removes[1]) # ASSUME FIRST LINE
adds = tuple(int(i.strip()) for i in matches.group(2).split(",")) # EXPECTING start_line, length TO ADD
add = Data(start=adds[0], length=1 if len(adds) == 1 else adds[1])
if remove.start == 0 and remove.length == 0:
remove.start = add.start
if add.start == 0 and add.length == 0:
if add.length == 0 and add.start == 0:
add.start = remove.start
if remove.start != add.start:
if not _Log:
_late_import()
_Log.warning("Do not know how to handle")
def repair_hunk(diff):
def repair_hunk(hunk_body):
# THE LAST DELETED LINE MAY MISS A "\n" MEANING THE FIRST
# ADDED LINE WILL BE APPENDED TO THE LAST DELETED LINE
# EXAMPLE: -kward has the details.+kward has the details.
# DETECT THIS PROBLEM FOR THIS HUNK AND FIX THE DIFF
problem_line = diff[start_of_hunk + remove.length - 1]
if reverse:
if add.length == 0:
return diff
first_added_line = output[add.start - 1]
if problem_line.endswith('+' + first_added_line):
split_point = len(problem_line) - len(first_added_line) - 1
last_lines = [
o
for b, o in zip(reversed(hunk_body), reversed(output))
if b != "+" + o
]
if not last_lines:
return hunk_body
last_line = last_lines[0]
for problem_index, problem_line in enumerate(hunk_body):
if problem_line.startswith('-') and problem_line.endswith('+' + last_line):
split_point = len(problem_line) - (len(last_line) + 1)
break
elif problem_line.startswith('+' + last_line + "-"):
split_point = len(last_line) + 1
break
else:
return diff
return hunk_body
else:
if remove.length == 0:
return diff
last_removed_line = output[remove.start - 1]
if problem_line.startswith('-' + last_removed_line + "+"):
split_point = len(last_removed_line) + 1
if not output:
return hunk_body
last_line = output[-1]
for problem_index, problem_line in enumerate(hunk_body):
if problem_line.startswith('+') and problem_line.endswith('-' + last_line):
split_point = len(problem_line) - (len(last_line) + 1)
break
elif problem_line.startswith('-' + last_line + "+"):
split_point = len(last_line) + 1
break
else:
return diff
return hunk_body
new_diff = (
diff[:start_of_hunk + remove.length - 1] +
new_hunk_body = (
hunk_body[:problem_index] +
[problem_line[:split_point], problem_line[split_point:]] +
diff[start_of_hunk + remove.length:]
hunk_body[problem_index + 1:]
)
return new_diff
diff = repair_hunk(diff)
diff = [d for d in diff if d != "\\ no newline at end of file"] # ANOTHER REPAIR
return new_hunk_body
hunk_body = repair_hunk(hunk_body)
if reverse:
new_output = (
output[:add.start - 1] +
[d[1:] for d in diff[start_of_hunk:start_of_hunk + remove.length]] +
[d[1:] for d in hunk_body if d and d[0] == '-'] +
output[add.start + add.length - 1:]
)
else:
# APPLYING DIFF FORWARD REQUIRES WE APPLY THE HUNKS IN REVERSE TO GET THE LINE NUMBERS RIGHT?
new_output = (
output[:remove.start-1] +
[d[1:] for d in diff[start_of_hunk + remove.length :start_of_hunk + remove.length + add.length ]] +
output[remove.start + remove.length - 1:]
output[:add.start - 1] +
[d[1:] for d in hunk_body if d and d[0] == '+'] +
output[add.start + remove.length - 1:]
)
start_of_hunk += remove.length + add.length
output = new_output
if verify:
original = apply_diff(output, diff, not reverse, False)
if any(t!=o for t, o in zip_longest(text, original)):
if not _Log:
_late_import()
_Log.error("logical verification check failed")
if set(text) != set(original): # bugzilla-etl diffs are a jumble
for t, o in zip_longest(text, original):
if t in ['reports: https://goo.gl/70o6w6\r']:
break # KNOWN INCONSISTENCIES
if t != o:
if not _Log:
_late_import()
_Log.error("logical verification check failed")
break
return output
@ -858,7 +873,7 @@ def utf82unicode(value):
try:
c.decode("utf8")
except Exception as f:
_Log.error("Can not convert charcode {{c}} in string index {{i}}", i=i, c=ord(c), cause=[e, _Except.wrap(f)])
_Log.error("Can not convert charcode {{c}} in string index {{i}}", i=i, c=ord(c), cause=[e, _Except.wrap(f)])
try:
latin1 = text_type(value.decode("latin1"))
@ -880,3 +895,15 @@ def wordify(value):
def pairwise(values):
"""
WITH values = [a, b, c, d, ...]
RETURN [(a, b), (b, c), (c, d), ...]
"""
i = iter(values)
a = next(i)
for b in i:
yield (a, b)
a = b

2
vendor/mo_math/randoms.py поставляемый
Просмотреть файл

@ -30,7 +30,7 @@ class Random(object):
@staticmethod
def base64(length):
return Random.string(length, string.digits + string.letters + '+/')
return Random.string(length, SIMPLE_ALPHABET + '+/')
@staticmethod
def int(*args):

94
vendor/mo_math/vendor/strangman/stats.py поставляемый
Просмотреть файл

@ -228,7 +228,7 @@ import math
import copy
# from types import *
import pstat
from mo_math.vendor.strangman import pstat
__version__ = 0.6
@ -447,7 +447,7 @@ given by inlist.
Usage: lscoreatpercentile(inlist,percent)
"""
if percent > 1:
print "\nDividing percent>1 by 100 in lscoreatpercentile().\n"
print("\nDividing percent>1 by 100 in lscoreatpercentile().\n")
percent = percent / 100.0
targetcf = percent * len(inlist)
h, lrl, binsize, extras = histogram(inlist)
@ -485,8 +485,8 @@ spanning all the numbers in the inlist.
Usage: lhistogram (inlist, numbins=10, defaultreallimits=None,suppressoutput=0)
Returns: list of bin values, lowerreallimit, binsize, extrapoints
"""
if (defaultreallimits <> None):
if type(defaultreallimits) not in [ListType, TupleType] or len(defaultreallimits) == 1: # only one limit given, assumed to be lower one & upper is calc'd
if (defaultreallimits != None):
if type(defaultreallimits) not in [list, tuple] or len(defaultreallimits) == 1: # only one limit given, assumed to be lower one & upper is calc'd
lowerreallimit = defaultreallimits
upperreallimit = 1.000001 * max(inlist)
else: # assume both limits given
@ -509,7 +509,7 @@ Returns: list of bin values, lowerreallimit, binsize, extrapoints
except:
extrapoints = extrapoints + 1
if (extrapoints > 0 and printextras == 1):
print '\nPoints outside given histogram range =', extrapoints
print('\nPoints outside given histogram range =', extrapoints)
return (bins, lowerreallimit, binsize, extrapoints)
@ -572,8 +572,8 @@ Returns: transformed data for use in an ANOVA
for j in range(k):
if v[j] - mean(nargs[j]) > TINY:
check = 0
if check <> 1:
raise ValueError, 'Problem in obrientransform.'
if check != 1:
raise ValueError('Problem in obrientransform.')
else:
return nargs
@ -751,11 +751,11 @@ Returns: appropriate statistic name, value, and probability
"""
samples = ''
while samples not in ['i', 'r', 'I', 'R', 'c', 'C']:
print '\nIndependent or related samples, or correlation (i,r,c): ',
print('\nIndependent or related samples, or correlation (i,r,c): ',)
samples = raw_input()
if samples in ['i', 'I', 'r', 'R']:
print '\nComparing variances ...',
print('\nComparing variances ...',)
# USE O'BRIEN'S TEST FOR HOMOGENEITY OF VARIANCE, Maxwell & delaney, p.112
r = obrientransform(x, y)
f, p = F_oneway(pstat.colex(r, 0), pstat.colex(r, 1))
@ -763,45 +763,44 @@ Returns: appropriate statistic name, value, and probability
vartype = 'unequal, p=' + str(round(p, 4))
else:
vartype = 'equal'
print vartype
print(vartype)
if samples in ['i', 'I']:
if vartype[0] == 'e':
t, p = ttest_ind(x, y, 0)
print '\nIndependent samples t-test: ', round(t, 4), round(p, 4)
print('\nIndependent samples t-test: ', round(t, 4), round(p, 4))
else:
if len(x) > 20 or len(y) > 20:
z, p = ranksums(x, y)
print '\nRank Sums test (NONparametric, n>20): ', round(z, 4), round(p, 4)
print('\nRank Sums test (NONparametric, n>20): ', round(z, 4), round(p, 4))
else:
u, p = mannwhitneyu(x, y)
print '\nMann-Whitney U-test (NONparametric, ns<20): ', round(u, 4), round(p, 4)
print('\nMann-Whitney U-test (NONparametric, ns<20): ', round(u, 4), round(p, 4))
else: # RELATED SAMPLES
if vartype[0] == 'e':
t, p = ttest_rel(x, y, 0)
print '\nRelated samples t-test: ', round(t, 4), round(p, 4)
print('\nRelated samples t-test: ', round(t, 4), round(p, 4))
else:
t, p = ranksums(x, y)
print '\nWilcoxon T-test (NONparametric): ', round(t, 4), round(p, 4)
print('\nWilcoxon T-test (NONparametric): ', round(t, 4), round(p, 4))
else: # CORRELATION ANALYSIS
corrtype = ''
while corrtype not in ['c', 'C', 'r', 'R', 'd', 'D']:
print '\nIs the data Continuous, Ranked, or Dichotomous (c,r,d): ',
print('\nIs the data Continuous, Ranked, or Dichotomous (c,r,d): ',)
corrtype = raw_input()
if corrtype in ['c', 'C']:
m, b, r, p, see = linregress(x, y)
print '\nLinear regression for continuous variables ...'
print('\nLinear regression for continuous variables ...')
lol = [['Slope', 'Intercept', 'r', 'Prob', 'SEestimate'], [round(m, 4), round(b, 4), round(r, 4), round(p, 4), round(see, 4)]]
pstat.printcc(lol)
elif corrtype in ['r', 'R']:
r, p = spearmanr(x, y)
print '\nCorrelation for ranked variables ...'
print "Spearman's r: ", round(r, 4), round(p, 4)
print('\nCorrelation for ranked variables ...')
print("Spearman's r: ", round(r, 4), round(p, 4))
else: # DICHOTOMOUS
r, p = pointbiserialr(x, y)
print '\nAssuming x contains a dichotomous variable ...'
print 'Point Biserial r: ', round(r, 4), round(p, 4)
print '\n\n'
print('\nAssuming x contains a dichotomous variable ...')
print('Point Biserial r: ', round(r, 4), round(p, 4))
print('\n\n')
return None
@ -815,8 +814,8 @@ Usage: lpearsonr(x,y) where x and y are equal-length lists
Returns: Pearson's r value, two-tailed p-value
"""
TINY = 1.0e-30
if len(x) <> len(y):
raise ValueError, 'Input values not paired in pearsonr. Aborting.'
if len(x) != len(y):
raise ValueError('Input values not paired in pearsonr. Aborting.')
n = len(x)
x = map(float, x)
y = map(float, y)
@ -854,8 +853,8 @@ Usage: lspearmanr(x,y) where x and y are equal-length lists
Returns: Spearman's r, two-tailed p-value
"""
TINY = 1e-30
if len(x) <> len(y):
raise ValueError, 'Input values not paired in spearmanr. Aborting.'
if len(x) != len(y):
raise ValueError('Input values not paired in spearmanr. Aborting.')
n = len(x)
rankx = rankdata(x)
ranky = rankdata(y)
@ -879,12 +878,12 @@ Usage: pointbiserialr(x,y) where x,y are equal-length lists
Returns: Point-biserial r, two-tailed p-value
"""
TINY = 1e-30
if len(cats) <> len(vals):
raise ValueError, 'INPUT VALUES NOT PAIRED IN pointbiserialr. ABORTING.'
if len(cats) != len(vals):
raise ValueError('INPUT VALUES NOT PAIRED IN pointbiserialr. ABORTING.')
data = zip(cats, vals)
categories = pstat.unique(cats)
if len(categories) <> 2:
raise ValueError, "Exactly 2 categories required for pointbiserialr()."
if len(categories) != 2:
raise ValueError("Exactly 2 categories required for pointbiserialr().")
else: # there are 2 categories, continue
c1 = [v for i, v in enumerate(vals) if cats[i] == categories[0]]
c2 = [v for i, v in enumerate(vals) if cats[i] == categories[1]]
@ -942,8 +941,8 @@ Usage: llinregress(x,y) x,y are equal-length lists of x-y coordinates
Returns: slope, intercept, r, two-tailed prob, sterr-of-estimate
"""
TINY = 1.0e-20
if len(x) <> len(y):
raise ValueError, 'Input values not paired in linregress. Aborting.'
if len(x) != len(y):
raise ValueError('Input values not paired in linregress. Aborting.')
n = len(x)
x = map(float, x)
y = map(float, y)
@ -1017,8 +1016,8 @@ and prob.
Usage: lttest_rel(a,b)
Returns: t-value, two-tailed prob
"""
if len(a) <> len(b):
raise ValueError, 'Unequal length lists in ttest_rel.'
if len(a) != len(b):
raise ValueError('Unequal length lists in ttest_rel.')
x1 = mean(a)
x2 = mean(b)
v1 = var(a)
@ -1119,7 +1118,7 @@ Returns: u-statistic, one-tailed p-value (i.e., p(z(U)))
proportion = bigu / float(n1 * n2)
T = math.sqrt(tiecorrect(ranked)) # correction factor for tied scores
if T == 0:
raise ValueError, 'All numbers are identical in lmannwhitneyu'
raise ValueError('All numbers are identical in lmannwhitneyu')
sd = math.sqrt(T * n1 * n2 * (n1 + n2 + 1) / 12.0)
z = abs((bigu - n1 * n2 / 2.0) / sd) # normal approximation for prob calc
return smallu, 1.0 - zprob(z) #, proportion
@ -1180,12 +1179,12 @@ result. A non-parametric T-test.
Usage: lwilcoxont(x,y)
Returns: a t-statistic, two-tail probability estimate
"""
if len(x) <> len(y):
raise ValueError, 'Unequal N in wilcoxont. Aborting.'
if len(x) != len(y):
raise ValueError('Unequal N in wilcoxont. Aborting.')
d = []
for i in range(len(x)):
diff = x[i] - y[i]
if diff <> 0:
if diff != 0:
d.append(diff)
count = len(d)
absd = map(abs, d)
@ -1235,7 +1234,7 @@ Returns: H-statistic (corrected for ties), associated p-value
h = 12.0 / (totaln * (totaln + 1)) * ssbn - 3 * (totaln + 1)
df = len(args) - 1
if T == 0:
raise ValueError, 'All numbers are identical in lkruskalwallish'
raise ValueError('All numbers are identical in lkruskalwallish')
h = h / float(T)
return h, chisqprob(h, df)
@ -1254,9 +1253,9 @@ Returns: chi-square statistic, associated p-value
"""
k = len(args)
if k < 3:
raise ValueError, 'Less than 3 levels. Friedman test not appropriate.'
raise ValueError('Less than 3 levels. Friedman test not appropriate.')
n = len(args[0])
data = apply(zip, tuple(args))
data = map(zip, tuple(args))
for i in range(len(data)):
data[i] = rankdata(data[i])
ssbn = 0
@ -1454,8 +1453,7 @@ def betacf(a, b, x):
bz = 1.0
if (abs(az - aold) < (EPS * abs(az))):
return az
print 'a or b too big, or ITMAX too small in Betacf.'
print('a or b too big, or ITMAX too small in Betacf.')
def gammln(xx):
"""
@ -1490,7 +1488,7 @@ using the betacf function. (Adapted from: Numerical Recipies in C.)
Usage: lbetai(a,b,x)
"""
if (x < 0.0 or x > 1.0):
raise ValueError, 'Bad x in lbetai'
raise ValueError('Bad x in lbetai')
if (x == 0.0 or x == 1.0):
bt = 0.0
@ -1608,8 +1606,8 @@ length lists.
Usage: lsummult(list1,list2)
"""
if len(list1) <> len(list2):
raise ValueError, "Lists not equal length in summult."
if len(list1) != len(list2):
raise ValueError("Lists not equal length in summult.")
s = 0
for item1, item2 in zip(list1, list2):
s = s + item1 * item2
@ -1684,7 +1682,7 @@ Returns: a list of length equal to inlist, containing rank scores
for i in range(n):
sumranks = sumranks + i
dupcount = dupcount + 1
if i == n - 1 or svec[i] <> svec[i + 1]:
if i == n - 1 or svec[i] != svec[i + 1]:
averank = sumranks / float(dupcount) + 1
for j in range(i - dupcount + 1, i + 1):
newlist[ivec[j]] = averank

281
vendor/mo_math/vendor/strangman/statstest.py поставляемый
Просмотреть файл

@ -15,83 +15,76 @@ af = N.array(lf)
ll = [l]*5
aa = N.array(ll)
print '\nCENTRAL TENDENCY'
print 'geometricmean:',stats.geometricmean(l), stats.geometricmean(lf), stats.geometricmean(a), stats.geometricmean(af)
print 'harmonicmean:',stats.harmonicmean(l), stats.harmonicmean(lf), stats.harmonicmean(a), stats.harmonicmean(af)
print 'mean:',stats.mean(l), stats.mean(lf), stats.mean(a), stats.mean(af)
print 'median:',stats.median(l),stats.median(lf),stats.median(a),stats.median(af)
print 'medianscore:',stats.medianscore(l),stats.medianscore(lf),stats.medianscore(a),stats.medianscore(af)
print 'mode:',stats.mode(l),stats.mode(a)
print '\nMOMENTS'
print 'moment:',stats.moment(l),stats.moment(lf),stats.moment(a),stats.moment(af)
print 'variation:',stats.variation(l),stats.variation(a),stats.variation(lf),stats.variation(af)
print 'skew:',stats.skew(l),stats.skew(lf),stats.skew(a),stats.skew(af)
print 'kurtosis:',stats.kurtosis(l),stats.kurtosis(lf),stats.kurtosis(a),stats.kurtosis(af)
print 'mean:',stats.mean(a),stats.mean(af)
print 'var:',stats.var(a),stats.var(af)
print 'stdev:',stats.stdev(a),stats.stdev(af)
print 'sem:',stats.sem(a),stats.sem(af)
print 'describe:'
print stats.describe(l)
print stats.describe(lf)
print stats.describe(a)
print stats.describe(af)
print '\nFREQUENCY'
print 'freqtable:'
print 'itemfreq:'
print stats.itemfreq(l)
print stats.itemfreq(a)
print 'scoreatpercentile:',stats.scoreatpercentile(l,40),stats.scoreatpercentile(lf,40),stats.scoreatpercentile(a,40),stats.scoreatpercentile(af,40)
print 'percentileofscore:',stats.percentileofscore(l,12),stats.percentileofscore(lf,12),stats.percentileofscore(a,12),stats.percentileofscore(af,12)
print 'histogram:',stats.histogram(l),stats.histogram(a)
print 'cumfreq:'
print stats.cumfreq(l)
print stats.cumfreq(lf)
print stats.cumfreq(a)
print stats.cumfreq(af)
print 'relfreq:'
print stats.relfreq(l)
print stats.relfreq(lf)
print stats.relfreq(a)
print stats.relfreq(af)
print '\nVARIATION'
print 'obrientransform:'
print('\nCENTRAL TENDENCY')
print('geometricmean:',stats.geometricmean(l), stats.geometricmean(lf), stats.geometricmean(a), stats.geometricmean(af))
print('harmonicmean:',stats.harmonicmean(l), stats.harmonicmean(lf), stats.harmonicmean(a), stats.harmonicmean(af))
print('mean:',stats.mean(l), stats.mean(lf), stats.mean(a), stats.mean(af))
print('median:',stats.median(l),stats.median(lf),stats.median(a),stats.median(af))
print('medianscore:',stats.medianscore(l),stats.medianscore(lf),stats.medianscore(a),stats.medianscore(af))
print('mode:',stats.mode(l),stats.mode(a))
print('\nMOMENTS')
print('moment:',stats.moment(l),stats.moment(lf),stats.moment(a),stats.moment(af))
print('variation:',stats.variation(l),stats.variation(a),stats.variation(lf),stats.variation(af))
print('skew:',stats.skew(l),stats.skew(lf),stats.skew(a),stats.skew(af))
print('kurtosis:',stats.kurtosis(l),stats.kurtosis(lf),stats.kurtosis(a),stats.kurtosis(af))
print('mean:',stats.mean(a),stats.mean(af))
print('var:',stats.var(a),stats.var(af))
print('stdev:',stats.stdev(a),stats.stdev(af))
print('sem:',stats.sem(a),stats.sem(af))
print('describe:')
print(stats.describe(l))
print(stats.describe(lf))
print(stats.describe(a))
print(stats.describe(af))
print('\nFREQUENCY')
print('freqtable:')
print('itemfreq:')
print(stats.itemfreq(l))
print(stats.itemfreq(a))
print('scoreatpercentile:',stats.scoreatpercentile(l,40),stats.scoreatpercentile(lf,40),stats.scoreatpercentile(a,40),stats.scoreatpercentile(af,40))
print('percentileofscore:',stats.percentileofscore(l,12),stats.percentileofscore(lf,12),stats.percentileofscore(a,12),stats.percentileofscore(af,12))
print('histogram:',stats.histogram(l),stats.histogram(a))
print('cumfreq:')
print(stats.cumfreq(l))
print(stats.cumfreq(lf))
print(stats.cumfreq(a))
print(stats.cumfreq(af))
print('relfreq:')
print(stats.relfreq(l))
print(stats.relfreq(lf))
print(stats.relfreq(a))
print(stats.relfreq(af))
print('\nVARIATION')
print('obrientransform:')
l = range(1,21)
a = N.array(l)
ll = [l]*5
aa = N.array(ll)
print stats.obrientransform(l,l,l,l,l)
print stats.obrientransform(a,a,a,a,a)
print 'samplevar:',stats.samplevar(l),stats.samplevar(a)
print 'samplestdev:',stats.samplestdev(l),stats.samplestdev(a)
print 'var:',stats.var(l),stats.var(a)
print 'stdev:',stats.stdev(l),stats.stdev(a)
print 'sterr:',stats.sterr(l),stats.sterr(a)
print 'sem:',stats.sem(l),stats.sem(a)
print 'z:',stats.z(l,4),stats.z(a,4)
print 'zs:'
print stats.zs(l)
print stats.zs(a)
print '\nTRIMMING'
print 'trimboth:'
print stats.trimboth(l,.2)
print stats.trimboth(lf,.2)
print stats.trimboth(a,.2)
print stats.trimboth(af,.2)
print 'trim1:'
print stats.trim1(l,.2)
print stats.trim1(lf,.2)
print stats.trim1(a,.2)
print stats.trim1(af,.2)
print '\nCORRELATION'
print(stats.obrientransform(l,l,l,l,l))
print(stats.obrientransform(a,a,a,a,a))
print('samplevar:',stats.samplevar(l),stats.samplevar(a))
print('samplestdev:',stats.samplestdev(l),stats.samplestdev(a))
print('var:',stats.var(l),stats.var(a))
print('stdev:',stats.stdev(l),stats.stdev(a))
print('sterr:',stats.sterr(l),stats.sterr(a))
print('sem:',stats.sem(l),stats.sem(a))
print('z:',stats.z(l,4),stats.z(a,4))
print('zs:')
print(stats.zs(l))
print(stats.zs(a))
print('\nTRIMMING')
print('trimboth:')
print(stats.trimboth(l,.2))
print(stats.trimboth(lf,.2))
print(stats.trimboth(a,.2))
print(stats.trimboth(af,.2))
print('trim1:')
print(stats.trim1(l,.2))
print(stats.trim1(lf,.2))
print(stats.trim1(a,.2))
print(stats.trim1(af,.2))
print('\nCORRELATION')
# execfile('testpairedstats.py')
l = range(1,21)
@ -106,62 +99,58 @@ b = N.array(m)
pb = [0]*9 + [1]*11
apb = N.array(pb)
print 'paired:'
print('paired:')
# stats.paired(l,m)
# stats.paired(a,b)
print
print
print 'pearsonr:'
print stats.pearsonr(l,m)
print stats.pearsonr(a,b)
print 'spearmanr:'
print stats.spearmanr(l,m)
print stats.spearmanr(a,b)
print 'pointbiserialr:'
print stats.pointbiserialr(pb,l)
print stats.pointbiserialr(apb,a)
print 'kendalltau:'
print stats.kendalltau(l,m)
print stats.kendalltau(a,b)
print 'linregress:'
print stats.linregress(l,m)
print stats.linregress(a,b)
print '\nINFERENTIAL'
print 'ttest_1samp:'
print stats.ttest_1samp(l,12)
print stats.ttest_1samp(a,12)
print 'ttest_ind:'
print stats.ttest_ind(l,m)
print stats.ttest_ind(a,b)
print 'ttest_rel:'
print stats.ttest_rel(l,m)
print stats.ttest_rel(a,b)
print 'chisquare:'
print stats.chisquare(l)
print stats.chisquare(a)
print 'ks_2samp:'
print stats.ks_2samp(l,m)
print stats.ks_2samp(a,b)
print 'mannwhitneyu:'
print stats.mannwhitneyu(l,m)
print stats.mannwhitneyu(a,b)
print 'ranksums:'
print stats.ranksums(l,m)
print stats.ranksums(a,b)
print 'wilcoxont:'
print stats.wilcoxont(l,m)
print stats.wilcoxont(a,b)
print 'kruskalwallish:'
print stats.kruskalwallish(l,m,l)
print len(l), len(m)
print stats.kruskalwallish(a,b,a)
print 'friedmanchisquare:'
print stats.friedmanchisquare(l,m,l)
print stats.friedmanchisquare(a,b,a)
print(print)
print('pearsonr:')
print(stats.pearsonr(l,m))
print(stats.pearsonr(a,b))
print('spearmanr:')
print(stats.spearmanr(l,m))
print(stats.spearmanr(a,b))
print('pointbiserialr:')
print(stats.pointbiserialr(pb,l))
print(stats.pointbiserialr(apb,a))
print('kendalltau:')
print(stats.kendalltau(l,m))
print(stats.kendalltau(a,b))
print('linregress:')
print(stats.linregress(l,m))
print(stats.linregress(a,b))
print('\nINFERENTIAL')
print('ttest_1samp:')
print(stats.ttest_1samp(l,12))
print(stats.ttest_1samp(a,12))
print('ttest_ind:')
print(stats.ttest_ind(l,m))
print(stats.ttest_ind(a,b))
print('ttest_rel:')
print(stats.ttest_rel(l,m))
print(stats.ttest_rel(a,b))
print('chisquare:')
print(stats.chisquare(l))
print(stats.chisquare(a))
print('ks_2samp:')
print(stats.ks_2samp(l,m))
print(stats.ks_2samp(a,b))
print('mannwhitneyu:')
print(stats.mannwhitneyu(l,m))
print(stats.mannwhitneyu(a,b))
print('ranksums:')
print(stats.ranksums(l,m))
print(stats.ranksums(a,b))
print('wilcoxont:')
print(stats.wilcoxont(l,m))
print(stats.wilcoxont(a,b))
print('kruskalwallish:')
print(stats.kruskalwallish(l,m,l))
print(len(l), len(m))
print(stats.kruskalwallish(a,b,a))
print('friedmanchisquare:')
print(stats.friedmanchisquare(l,m,l))
print(stats.friedmanchisquare(a,b,a))
l = range(1,21)
a = N.array(l)
ll = [l]*5
@ -171,29 +160,29 @@ m = range(4,24)
m[10] = 34
b = N.array(m)
print '\n\nF_oneway:'
print stats.F_oneway(l,m)
print stats.F_oneway(a,b)
print('\n\nF_oneway:')
print(stats.F_oneway(l,m))
print(stats.F_oneway(a,b))
# print 'F_value:',stats.F_value(l),stats.F_value(a)
print '\nSUPPORT'
print 'sum:',stats.sum(l),stats.sum(lf),stats.sum(a),stats.sum(af)
print 'cumsum:'
print stats.cumsum(l)
print stats.cumsum(lf)
print stats.cumsum(a)
print stats.cumsum(af)
print 'ss:',stats.ss(l),stats.ss(lf),stats.ss(a),stats.ss(af)
print 'summult:',stats.summult(l,m),stats.summult(lf,m),stats.summult(a,b),stats.summult(af,b)
print 'sumsquared:',stats.square_of_sums(l),stats.square_of_sums(lf),stats.square_of_sums(a),stats.square_of_sums(af)
print 'sumdiffsquared:',stats.sumdiffsquared(l,m),stats.sumdiffsquared(lf,m),stats.sumdiffsquared(a,b),stats.sumdiffsquared(af,b)
print 'shellsort:'
print stats.shellsort(m)
print stats.shellsort(b)
print 'rankdata:'
print stats.rankdata(m)
print stats.rankdata(b)
print('\nSUPPORT')
print('sum:',stats.sum(l),stats.sum(lf),stats.sum(a),stats.sum(af))
print('cumsum:')
print(stats.cumsum(l))
print(stats.cumsum(lf))
print(stats.cumsum(a))
print(stats.cumsum(af))
print('ss:',stats.ss(l),stats.ss(lf),stats.ss(a),stats.ss(af))
print('summult:',stats.summult(l,m),stats.summult(lf,m),stats.summult(a,b),stats.summult(af,b))
print('sumsquared:',stats.square_of_sums(l),stats.square_of_sums(lf),stats.square_of_sums(a),stats.square_of_sums(af))
print('sumdiffsquared:',stats.sumdiffsquared(l,m),stats.sumdiffsquared(lf,m),stats.sumdiffsquared(a,b),stats.sumdiffsquared(af,b))
print('shellsort:')
print(stats.shellsort(m))
print(stats.shellsort(b))
print('rankdata:')
print(stats.rankdata(m))
print(stats.rankdata(b))
print('\nANOVAs')
print '\nANOVAs'
execfile('testanova.py')

2
vendor/mo_testing/fuzzytestcase.py поставляемый
Просмотреть файл

@ -159,7 +159,7 @@ def assertAlmostEqualValue(test, expected, digits=None, places=None, msg=None, d
if not Math.is_number(expected):
# SOME SPECIAL CASES, EXPECTING EMPTY CONTAINERS IS THE SAME AS EXPECTING NULL
if isinstance(expected, list) and len(expected)==0 and test == None:
if isinstance(expected, list) and len(expected) == 0 and test == None:
return
if isinstance(expected, Mapping) and not expected.keys() and test == None:
return

65
vendor/mo_times/dates.py поставляемый
Просмотреть файл

@ -171,37 +171,68 @@ class Date(object):
def __lt__(self, other):
try:
if other == None:
return False
elif isinstance(other, Date):
return self.unix < other.unix
elif isinstance(other, (float, int)):
return self.unix < other
other = Date(other)
return self.unix < other.unix
except Exception:
return False
return self.unix < other.unix
def __eq__(self, other):
if other == None or other == '':
return Null
try:
return other.unix == self.unix
except Exception:
pass
try:
return Date(other).unix == self.unix
if other == None:
return False
elif isinstance(other, Date):
return self.unix == other.unix
elif isinstance(other, (float, int)):
return self.unix == other
other = Date(other)
return self.unix == other.unix
except Exception:
return False
def __le__(self, other):
other = Date(other)
return self.unix <= other.unix
try:
if other == None:
return False
elif isinstance(other, Date):
return self.unix <= other.unix
elif isinstance(other, (float, int)):
return self.unix <= other
other = Date(other)
return self.unix <= other.unix
except Exception:
return False
def __gt__(self, other):
other = Date(other)
return self.unix > other.unix
try:
if other == None:
return False
elif isinstance(other, Date):
return self.unix > other.unix
elif isinstance(other, (float, int)):
return self.unix > other
other = Date(other)
return self.unix > other.unix
except Exception:
return False
def __ge__(self, other):
other = Date(other)
return self.unix >= other.unix
try:
if other == None:
return False
elif isinstance(other, Date):
return self.unix >= other.unix
elif isinstance(other, (float, int)):
return self.unix >= other
other = Date(other)
return self.unix >= other.unix
except Exception:
return False
def __add__(self, other):
return self.add(other)

21
vendor/mo_times/timer.py поставляемый
Просмотреть файл

@ -32,20 +32,19 @@ class Timer(object):
debug - SET TO False TO DISABLE THIS TIMER
"""
def __init__(self, description, param=None, debug=True, silent=False):
def __init__(self, description, param=None, silent=False, too_long=0):
self.template = description
self.param = wrap(coalesce(param, {}))
self.debug = debug
self.silent = silent
self.agg = 0
self.too_long = too_long # ONLY SHOW TIMING FOR DURATIONS THAT ARE too_long
self.start = 0
self.end = 0
self.interval = None
def __enter__(self):
if self.debug:
if not self.silent:
Log.note("Timer start: " + self.template, stack_depth=1, **self.param)
if not self.silent and self.too_long == 0:
Log.note("Timer start: " + self.template, stack_depth=1, **self.param)
self.start = time()
return self
@ -53,12 +52,12 @@ class Timer(object):
self.end = time()
self.interval = self.end - self.start
self.agg += self.interval
if self.debug:
param = wrap(self.param)
param.duration = timedelta(seconds=self.interval)
if not self.silent:
Log.note("Timer end : " + self.template + " (took {{duration}})", self.param, stack_depth=1)
self.param.duration = timedelta(seconds=self.interval)
if not self.silent:
if self.too_long == 0:
Log.note("Timer end : " + self.template + " (took {{duration}})", default_params=self.param, stack_depth=1)
elif self.interval >= self.too_long:
Log.note("Time too long: " + self.template + " ({{duration}})", default_params=self.param, stack_depth=1)
@property
def duration(self):

4
vendor/pyLibrary/aws/s3.py поставляемый
Просмотреть файл

@ -20,11 +20,11 @@ from boto.s3.connection import Location
from bs4 import BeautifulSoup
from mo_dots import wrap, Null, coalesce, unwrap, Data
from mo_files.url import value2url_param
from mo_future import text_type, StringIO
from mo_kwargs import override
from mo_logs import Log, Except
from mo_logs.strings import utf82unicode, unicode2utf8
from mo_logs.url import value2url_param
from mo_times.dates import Date
from mo_times.timer import Timer
from pyLibrary import convert
@ -362,7 +362,7 @@ class Bucket(object):
retry = 3
while retry:
try:
with Timer("Sending {{count}} lines in {{file_length|comma}} bytes", {"file_length": file_length, "count": count}, debug=self.settings.debug):
with Timer("Sending {{count}} lines in {{file_length|comma}} bytes", {"file_length": file_length, "count": count}, silent=not self.settings.debug):
buff.seek(0)
storage.set_contents_from_file(buff)
break

2
vendor/pyLibrary/convert.py поставляемый
Просмотреть файл

@ -390,7 +390,7 @@ def value2intlist(value):
elif isinstance(value, int):
return [value]
elif value.strip() == "":
return None
return []
else:
return [int(value)]

1
vendor/pyLibrary/env/big_data.py поставляемый
Просмотреть файл

@ -32,6 +32,7 @@ DEBUG = False
MIN_READ_SIZE = 8 * 1024
MAX_STRING_SIZE = 1 * 1024 * 1024
class FileString(text_type):
"""
ACTS LIKE A STRING, BUT IS A FILE

127
vendor/pyLibrary/env/elasticsearch.py поставляемый
Просмотреть файл

@ -18,8 +18,9 @@ from copy import deepcopy
from jx_python import jx
from jx_python.expressions import jx_expression_to_function
from jx_python.meta import Column
from mo_dots import wrap, FlatList, coalesce, Null, Data, set_default, listwrap, literal_field, ROOT_PATH, concat_field, split_field
from mo_future import text_type, binary_type
from mo_dots import wrap, FlatList, coalesce, Null, Data, set_default, listwrap, literal_field, ROOT_PATH, concat_field, split_field, SLOT
from mo_files.url import URL
from mo_future import text_type, binary_type, items
from mo_json import value2json, json2value
from mo_json.typed_encoder import EXISTS_TYPE, BOOLEAN_TYPE, STRING_TYPE, NUMBER_TYPE, NESTED_TYPE, TYPE_PREFIX, json_type_to_inserter_type
from mo_kwargs import override
@ -111,8 +112,7 @@ class Index(Features):
# EXPLORING (get_metadata()) IS NOT ALLOWED ON THE PUBLIC CLUSTER
Log.error("not expected", cause=e)
if self.debug:
Log.alert("elasticsearch debugging for {{url}} is on", url=self.url)
self.debug and Log.alert("elasticsearch debugging for {{url}} is on", url=self.url)
props = self.get_properties()
if not props:
@ -138,7 +138,7 @@ class Index(Features):
@property
def url(self):
return self.cluster.path.rstrip("/") + "/" + self.path.lstrip("/")
return self.cluster.url / self.path
def get_properties(self, retry=True):
if self.settings.explore_metadata:
@ -228,12 +228,13 @@ class Index(Features):
self.cluster.post("/" + self.settings.index + "/_refresh")
def delete_record(self, filter):
filter = wrap(filter)
if self.settings.read_only:
Log.error("Index opened in read only mode, no changes allowed")
self.cluster.get_metadata()
if self.debug:
Log.note("Delete bugs:\n{{query}}", query=filter)
self.debug and Log.note("Delete bugs:\n{{query}}", query=filter)
if self.cluster.info.version.number.startswith("0.90"):
query = {"filtered": {
@ -269,6 +270,8 @@ class Index(Features):
elif self.cluster.info.version.number.startswith(("5.", "6.")):
query = {"query": filter}
if filter.terms.bug_id['~n~'] != None:
Log.warning("filter is not typed")
wait_for_active_shards = coalesce( # EARLIER VERSIONS USED "consistency" AS A PARAMETER
self.settings.wait_for_active_shards,
@ -310,7 +313,7 @@ class Index(Features):
if not lines:
return
with Timer("Add {{num}} documents to {{index}}", {"num": len(lines) / 2, "index":self.settings.index}, debug=self.debug):
with Timer("Add {{num}} documents to {{index}}", {"num": int(len(lines) / 2), "index": self.settings.index}, silent=not self.debug):
try:
data_string = "\n".join(l for l in lines) + "\n"
except Exception as e:
@ -388,8 +391,7 @@ class Index(Features):
self.extend([record])
def add_property(self, name, details):
if self.debug:
Log.note("Adding property {{prop}} to {{index}}", prop=name, index=self.settings.index)
self.debug and Log.note("Adding property {{prop}} to {{index}}", prop=name, index=self.settings.index)
for n in jx.reverse(split_field(name)):
if n == NESTED_TYPE:
details = {"properties": {n: set_default(details, {"type": "nested", "dynamic": True})}}
@ -500,7 +502,6 @@ class Index(Features):
)
HOPELESS = [
"Document contains at least one immense term",
"400 MapperParsingException",
@ -509,17 +510,16 @@ HOPELESS = [
"JsonParseException"
]
known_clusters = {} # MAP FROM (host, port) PAIR TO CLUSTER INSTANCE
known_clusters = {}
class Cluster(object):
@override
def __new__(cls, host, port=9200, kwargs=None):
if not isinstance(port, int):
if not Math.is_integer(port):
Log.error("port must be integer")
cluster = known_clusters.get((host, port))
cluster = known_clusters.get((host, int(port)))
if cluster:
return cluster
@ -544,7 +544,7 @@ class Cluster(object):
self.metatdata_last_updated = Date.now()
self.debug = debug
self._version = None
self.path = kwargs.host + ":" + text_type(kwargs.port)
self.url = URL(host, port=port)
@override
def get_or_create_index(
@ -726,7 +726,7 @@ class Cluster(object):
elif isinstance(schema, text_type):
Log.error("Expecting a JSON schema")
for k, m in list(schema.mappings.items()):
for k, m in items(schema.mappings):
m.date_detection = False # DISABLE DATE DETECTION
if typed:
@ -737,7 +737,8 @@ class Cluster(object):
DEFAULT_DYNAMIC_TEMPLATES +
m.dynamic_templates
)
if self.version.startswith("6."):
m.dynamic_templates = [t for t in m.dynamic_templates if "default_integer" not in t]
if self.version.startswith("5."):
schema.settings.index.max_inner_result_window = None # NOT ACCEPTED BY ES5
schema = json2value(value2json(schema), leaves=True)
@ -785,8 +786,7 @@ class Cluster(object):
if not isinstance(index_name, text_type):
Log.error("expecting an index name")
if self.debug:
Log.note("Deleting index {{index}}", index=index_name)
self.debug and Log.note("Deleting index {{index}}", index=index_name)
# REMOVE ALL ALIASES TOO
aliases = [a for a in self.get_aliases() if a.index == index_name and a.alias != None]
@ -802,8 +802,7 @@ class Cluster(object):
if response.status_code != 200:
Log.error("Expecting a 200, got {{code}}", code=response.status_code)
details = json2value(utf82unicode(response.content))
if self.debug:
Log.note("delete response {{response}}", response=details)
self.debug and Log.note("delete response {{response}}", response=details)
return response
except Exception as e:
Log.error("Problem with call to {{url}}", url=url, cause=e)
@ -861,7 +860,7 @@ class Cluster(object):
return self._version
def post(self, path, **kwargs):
url = self.settings.host + ":" + text_type(self.settings.port) + path
url = self.url / path # self.settings.host + ":" + text_type(self.settings.port) + path
try:
heads = wrap(kwargs).headers
@ -872,23 +871,26 @@ class Cluster(object):
if data == None:
pass
elif isinstance(data, Mapping):
kwargs[DATA_KEY] = unicode2utf8(value2json(data))
data = kwargs[DATA_KEY] = unicode2utf8(value2json(data))
elif isinstance(data, text_type):
kwargs[DATA_KEY] = unicode2utf8(data)
data = kwargs[DATA_KEY] = unicode2utf8(data)
elif hasattr(data, str("__iter__")):
pass # ASSUME THIS IS AN ITERATOR OVER BYTES
else:
Log.error("data must be utf8 encoded string")
if self.debug:
sample = kwargs.get(DATA_KEY, b"")[:300]
Log.note("{{url}}:\n{{data|indent}}", url=url, data=sample)
if isinstance(data, binary_type):
sample = kwargs.get(DATA_KEY, b"")[:300]
Log.note("{{url}}:\n{{data|indent}}", url=url, data=sample)
else:
Log.note("{{url}}:\n\t<stream>", url=url)
if self.debug:
Log.note("POST {{url}}", url=url)
self.debug and Log.note("POST {{url}}", url=url)
response = http.post(url, **kwargs)
if response.status_code not in [200, 201]:
Log.error(text_type(response.reason) + ": " + strings.limit(response.content.decode("latin1"), 100 if self.debug else 10000))
if self.debug:
Log.note("response: {{response}}", response=utf82unicode(response.content)[:130])
self.debug and Log.note("response: {{response}}", response=utf82unicode(response.content)[:130])
details = json2value(utf82unicode(response.content))
if details.error:
Log.error(convert.quote2string(details.error))
@ -900,7 +902,7 @@ class Cluster(object):
return details
except Exception as e:
e = Except.wrap(e)
if url[0:4] != "http":
if url.scheme != "http":
suggestion = " (did you forget \"http://\" prefix on the host name?)"
else:
suggestion = ""
@ -909,7 +911,7 @@ class Cluster(object):
Log.error(
"Problem with call to {{url}}" + suggestion + "\n{{body|left(10000)}}",
url=url,
body=strings.limit(kwargs[DATA_KEY], 100 if self.debug else 10000),
body=strings.limit(utf82unicode(kwargs[DATA_KEY]), 100 if self.debug else 10000),
cause=e
)
else:
@ -921,8 +923,7 @@ class Cluster(object):
response = http.delete(url, **kwargs)
if response.status_code not in [200]:
Log.error(response.reason+": "+response.all_content)
if self.debug:
Log.note("response: {{response}}", response=strings.limit(utf82unicode(response.all_content), 130))
self.debug and Log.note("response: {{response}}", response=strings.limit(utf82unicode(response.all_content), 130))
details = wrap(json2value(utf82unicode(response.all_content)))
if details.error:
Log.error(details.error)
@ -933,13 +934,11 @@ class Cluster(object):
def get(self, path, **kwargs):
url = self.settings.host + ":" + text_type(self.settings.port) + path
try:
if self.debug:
Log.note("GET {{url}}", url=url)
self.debug and Log.note("GET {{url}}", url=url)
response = http.get(url, **kwargs)
if response.status_code not in [200]:
Log.error(response.reason + ": " + response.all_content)
if self.debug:
Log.note("response: {{response}}", response=strings.limit(utf82unicode(response.all_content), 130))
self.debug and Log.note("response: {{response}}", response=strings.limit(utf82unicode(response.all_content), 130))
details = wrap(json2value(utf82unicode(response.all_content)))
if details.error:
Log.error(details.error)
@ -953,8 +952,7 @@ class Cluster(object):
response = http.head(url, **kwargs)
if response.status_code not in [200]:
Log.error(response.reason+": "+response.all_content)
if self.debug:
Log.note("response: {{response}}", response=strings.limit(utf82unicode(response.all_content), 130))
self.debug and Log.note("response: {{response}}", response=strings.limit(utf82unicode(response.all_content), 130))
if response.all_content:
details = wrap(json2value(utf82unicode(response.all_content)))
if details.error:
@ -988,9 +986,8 @@ class Cluster(object):
try:
response = http.put(url, **kwargs)
if response.status_code not in [200]:
Log.error(response.reason + ": " + utf82unicode(response.all_content))
if self.debug:
Log.note("response: {{response}}", response=utf82unicode(response.all_content)[0:300:])
Log.error(response.reason + ": " + utf82unicode(response.content))
self.debug and Log.note("response: {{response}}", response=utf82unicode(response.content)[0:300:])
details = json2value(utf82unicode(response.content))
if details.error:
@ -1038,7 +1035,7 @@ def _scrub(r):
return convert.value2number(r)
elif isinstance(r, Mapping):
if isinstance(r, Data):
r = object.__getattribute__(r, "_dict")
r = object.__getattribute__(r, SLOT)
output = {}
for k, v in r.items():
v = _scrub(v)
@ -1079,8 +1076,7 @@ class Alias(Features):
kwargs=None
):
self.debug = debug
if self.debug:
Log.alert("Elasticsearch debugging on {{index|quote}} is on", index= kwargs.index)
self.debug and Log.alert("Elasticsearch debugging on {{index|quote}} is on", index= kwargs.index)
if alias == None:
Log.error("Alias can not be None")
self.settings = kwargs
@ -1112,7 +1108,7 @@ class Alias(Features):
@property
def url(self):
return self.cluster.path.rstrip("/") + "/" + self.path.lstrip("/")
return self.cluster.url / self.path
def get_snowflake(self, retry=True):
if self.settings.explore_metadata:
@ -1172,8 +1168,7 @@ class Alias(Features):
else:
raise NotImplementedError
if self.debug:
Log.note("Delete documents:\n{{query}}", query=query)
self.debug and Log.note("Delete documents:\n{{query}}", query=query)
keep_trying = True
while keep_trying:
@ -1269,8 +1264,15 @@ def parse_properties(parent_index_name, parent_name, esProperties):
continue
if not property.type:
continue
cardinality = 0 if not property.store and not name != '_id' else None
if property.fields:
child_columns = parse_properties(index_name, column_name, property.fields)
if cardinality is None:
for cc in child_columns:
cc.cardinality = None
columns.extend(child_columns)
if property.type in es_type_to_json_type.keys():
@ -1279,6 +1281,7 @@ def parse_properties(parent_index_name, parent_name, esProperties):
es_column=column_name,
names={".": jx_name},
nested_path=ROOT_PATH,
cardinality=cardinality,
es_type=property.type
))
if property.index_name and name != property.index_name:
@ -1287,6 +1290,7 @@ def parse_properties(parent_index_name, parent_name, esProperties):
es_column=column_name,
names={".": jx_name},
nested_path=ROOT_PATH,
cardinality=0 if property.store else None,
es_type=property.type
))
elif property.enabled == None or property.enabled == False:
@ -1295,10 +1299,11 @@ def parse_properties(parent_index_name, parent_name, esProperties):
es_column=column_name,
names={".": jx_name},
nested_path=ROOT_PATH,
cardinality=0 if property.store else None,
es_type="source" if property.enabled == False else "object"
))
else:
Log.warning("unknown type {{type}} for property {{path}}", type=property.type, path=query_path)
Log.warning("unknown type {{type}} for property {{path}}", type=property.type, path=parent_name)
return columns
@ -1482,6 +1487,8 @@ def diff_schema(A, B):
output =[]
def _diff_schema(path, A, B):
for k, av in A.items():
if k == "_id" and path == ".":
continue # DO NOT ADD _id TO ANY SCHEMA DIFF
bv = B[k]
if bv == None:
output.append((concat_field(path, k), av))
@ -1534,6 +1541,24 @@ DEFAULT_DYNAMIC_TEMPLATES = wrap([
"mapping": {"type": "keyword", "store": True},
"match_mapping_type": "string"
}
},
{
"default_long": {
"mapping": {"type": "long", "store": True},
"match_mapping_type": "long"
}
},
{
"default_double": {
"mapping": {"type": "double", "store": True},
"match_mapping_type": "double"
}
},
{
"default_integer": {
"mapping": {"type": "integer", "store": True},
"match_mapping_type": "integer"
}
}
])

56
vendor/pyLibrary/env/flask_wrappers.py поставляемый
Просмотреть файл

@ -12,9 +12,12 @@ from __future__ import unicode_literals
import flask
from flask import Response
from mo_dots import coalesce
from mo_future import binary_type
from mo_dots import coalesce
from mo_files import File
from mo_json import value2json
from mo_logs import Log
from mo_logs.strings import unicode2utf8
from pyLibrary.env.big_data import ibytes2icompressed
TOO_SMALL_TO_COMPRESS = 510 # DO NOT COMPRESS DATA WITH LESS THAN THIS NUMBER OF BYTES
@ -63,4 +66,53 @@ def cors_wrapper(func):
return output
def dockerflow(flask_app, backend_check):
"""
ADD ROUTING TO HANDLE DOCKERFLOW APP REQUIREMENTS
(see https://github.com/mozilla-services/Dockerflow#containerized-app-requirements)
:param flask_app: THE (Flask) APP
:param backend_check: METHOD THAT WILL CHECK THE BACKEND IS WORKING AND RAISE AN EXCEPTION IF NOT
:return:
"""
global VERSION_JSON
try:
VERSION_JSON = File("version.json").read_bytes()
@cors_wrapper
def version():
return Response(
VERSION_JSON,
status=200,
headers={
"Content-Type": "application/json"
}
)
@cors_wrapper
def heartbeat():
try:
backend_check()
return Response(status=200)
except Exception as e:
Log.warning("heartbeat failure", cause=e)
return Response(
unicode2utf8(value2json(e)),
status=500,
headers={
"Content-Type": "application/json"
}
)
@cors_wrapper
def lbheartbeat():
return Response(status=200)
flask_app.add_url_rule(str('/__version__'), None, version, defaults={}, methods=[str('GET'), str('POST')])
flask_app.add_url_rule(str('/__heartbeat__'), None, heartbeat, defaults={}, methods=[str('GET'), str('POST')])
flask_app.add_url_rule(str('/__lbheartbeat__'), None, lbheartbeat, defaults={}, methods=[str('GET'), str('POST')])
except Exception as e:
Log.error("Problem setting up listeners for dockerflow", cause=e)
VERSION_JSON = None

31
vendor/pyLibrary/env/git.py поставляемый
Просмотреть файл

@ -8,18 +8,17 @@
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
from __future__ import division
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
import mo_threads
from mo_logs.exceptions import suppress_exception
from pyLibrary.meta import cache
from mo_threads import Process
from pyLibrary.meta import cache
@cache
def get_git_revision():
def get_revision():
"""
GET THE CURRENT GIT REVISION
"""
@ -36,13 +35,12 @@ def get_git_revision():
with suppress_exception:
proc.join()
@cache
def get_remote_revision(url, branch):
"""
GET REVISION OF A REMOTE BRANCH
"""
mo_threads.DEBUG = True
proc = Process("git remote revision", ["git", "ls-remote", url, "refs/heads/" + branch])
try:
@ -58,5 +56,22 @@ def get_remote_revision(url, branch):
except Exception:
pass
return None
@cache
def get_branch():
"""
GET THE CURRENT GIT BRANCH
"""
proc = Process("git status", ["git", "status"])
try:
while True:
raw_line = proc.stdout.pop()
line = raw_line.decode('utf8').strip()
if line.startswith("On branch "):
return line[10:]
finally:
try:
proc.join()
except Exception:
pass

61
vendor/pyLibrary/env/http.py поставляемый
Просмотреть файл

@ -29,6 +29,7 @@ from requests import sessions, Response
from jx_python import jx
from mo_dots import Data, coalesce, wrap, set_default, unwrap, Null
from mo_files.url import URL
from mo_future import text_type, PY2
from mo_json import value2json, json2value
from mo_logs import Log
@ -54,14 +55,13 @@ DEFAULTS = {
"verify": True,
"timeout": 600,
"zip": False,
"retry": {"times": 1, "sleep": 0}
"retry": {"times": 1, "sleep": 0, "http": False}
}
_warning_sent = False
request_count = 0
def request(method, url, zip=None, retry=None, **kwargs):
def request(method, url, headers=None, zip=None, retry=None, **kwargs):
"""
JUST LIKE requests.request() BUT WITH DEFAULT HEADERS AND FIXES
DEMANDS data IS ONE OF:
@ -81,14 +81,14 @@ def request(method, url, zip=None, retry=None, **kwargs):
global _warning_sent
global request_count
if not default_headers and not _warning_sent:
_warning_sent = True
if not _warning_sent and not default_headers:
Log.warning(text_type(
"The pyLibrary.env.http module was meant to add extra " +
"default headers to all requests, specifically the 'Referer' " +
"header with a URL to the project. Use the `pyLibrary.debug.constants.set()` " +
"function to set `pyLibrary.env.http.default_headers`"
))
_warning_sent = True
if isinstance(url, list):
# TRY MANY URLS
@ -111,42 +111,42 @@ def request(method, url, zip=None, retry=None, **kwargs):
sess = Null
else:
sess = session = sessions.Session()
session.headers.update(default_headers)
with closing(sess):
if PY2 and isinstance(url, text_type):
# httplib.py WILL **FREAK OUT** IF IT SEES ANY UNICODE
url = url.encode('ascii')
if retry == None:
retry = Data(times=1, sleep=0)
elif isinstance(retry, Number):
retry = Data(times=retry, sleep=1)
else:
retry = wrap(retry)
try:
set_default(kwargs, {"zip":zip, "retry": retry}, DEFAULTS)
_to_ascii_dict(kwargs)
# HEADERS
headers = kwargs['headers'] = unwrap(set_default(headers, session.headers, default_headers))
_to_ascii_dict(headers)
del kwargs['headers']
# RETRY
retry = wrap(kwargs['retry'])
if isinstance(retry, Number):
retry = set_default({"times":retry}, DEFAULTS['retry'])
if isinstance(retry.sleep, Duration):
retry.sleep = retry.sleep.seconds
set_default(retry, {"times": 1, "sleep": 0})
del kwargs['retry']
_to_ascii_dict(kwargs)
set_default(kwargs, DEFAULTS)
# JSON
if 'json' in kwargs:
kwargs['data'] = value2json(kwargs['json']).encode('utf8')
del kwargs['json']
if 'json' in kwargs:
kwargs['data'] = value2json(kwargs['json']).encode('utf8')
del kwargs['json']
try:
headers = kwargs['headers'] = unwrap(coalesce(kwargs.get('headers'), {}))
# ZIP
set_default(headers, {'Accept-Encoding': 'compress, gzip'})
if kwargs['zip'] and len(coalesce(kwargs.get('data'))) > 1000:
compressed = convert.bytes2zip(kwargs['data'])
headers['content-encoding'] = 'gzip'
kwargs['data'] = compressed
_to_ascii_dict(headers)
else:
_to_ascii_dict(headers)
del kwargs['zip']
except Exception as e:
Log.error(u"Request setup failure on {{url}}", url=url, cause=e)
@ -158,12 +158,13 @@ def request(method, url, zip=None, retry=None, **kwargs):
try:
DEBUG and Log.note(u"http {{method|upper}} to {{url}}", method=method, url=text_type(url))
request_count += 1
del kwargs['retry']
del kwargs['zip']
return session.request(method=method, url=url, **kwargs)
return session.request(method=method, headers=headers, url=str(url), **kwargs)
except Exception as e:
errors.append(Except.wrap(e))
e = Except.wrap(e)
if retry['http'] and str(url).startswith("https://") and "EOF occurred in violation of protocol" in e:
url = URL("http://" + str(url)[8:])
Log.note("Changed {{url}} to http due to SSL EOF violation.", url=str(url))
errors.append(e)
if " Read timed out." in errors[0]:
Log.error(u"Tried {{times}} times: Timeout failure (timeout was {{timeout}}", timeout=kwargs['timeout'], times=retry.times, cause=errors[0])

2
vendor/pyLibrary/env/pulse.py поставляемый
Просмотреть файл

@ -214,7 +214,7 @@ class ModifiedGenericConsumer(GenericConsumer):
while True:
try:
self.connection.drain_events(timeout=self.timeout)
except socket_timeout, e:
except socket_timeout as e:
Log.warning("timeout! Restarting {{name}} pulse consumer.", name=self.exchange, cause=e)
try:
self.disconnect()

4
vendor/pyLibrary/env/rollover_index.py поставляемый
Просмотреть файл

@ -120,7 +120,7 @@ class RolloverIndex(object):
self.cluster.delete_index(c.index)
except Exception as e:
Log.warning("could not delete index {{index}}", index=c.index, cause=e)
for t, q in list(self.known_queues.items()):
for t, q in items(self.known_queues):
if unix2Date(t) + self.rollover_interval < Date.today() - self.rollover_max:
with self.locker:
del self.known_queues[t]
@ -189,7 +189,7 @@ class RolloverIndex(object):
queue = None
pending = [] # FOR WHEN WE DO NOT HAVE QUEUE YET
for key in keys:
timer = Timer("Process {{key}}", param={"key": key}, debug=DEBUG)
timer = Timer("Process {{key}}", param={"key": key}, silent=not DEBUG)
try:
with timer:
for rownum, line in enumerate(source.read_lines(strip_extension(key))):

3
vendor/pyLibrary/env/typed_inserter.py поставляемый
Просмотреть файл

@ -13,14 +13,13 @@ from __future__ import unicode_literals
from collections import Mapping
from jx_base import NESTED, OBJECT
from jx_python.expressions import jx_expression_to_function
from mo_dots import Data, unwrap
from pyLibrary.env.elasticsearch import parse_properties, random_id
from mo_json import json2value
from mo_json.encoder import UnicodeBuilder
from mo_json.typed_encoder import typed_encode
from mo_json.typed_encoder import typed_encode, OBJECT, NESTED
class TypedInserter(object):

43
vendor/pyLibrary/graphs/__init__.py поставляемый
Просмотреть файл

@ -1,43 +0,0 @@
# encoding: utf-8
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
from __future__ import division
from __future__ import absolute_import
class Graph(object):
def __init__(self, node_type=None):
self.nodes = []
self.edges = []
self.node_type = node_type
def add_edge(self, edge):
self.edges.append(edge)
def remove_children(self, node):
self.edges = [e for e in self.edges if e[0] != node]
def get_children(self, node):
#FIND THE REVISION
#
return [c for p, c in self.edges if p == node]
def get_parents(self, node):
return [p for p, c in self.edges if c == node]
def get_edges(self, node):
return [(p, c) for p, c in self.edges if p == node or c == node]
def get_family(self, node):
"""
RETURN ALL ADJACENT NODES
"""
return set([p if c == node else c for p, c in self.edges])

101
vendor/pyLibrary/graphs/algorithms.py поставляемый
Просмотреть файл

@ -1,101 +0,0 @@
# encoding: utf-8
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
from __future__ import division
from __future__ import absolute_import
from collections import deque
from mo_math import INTERSECT
from pyLibrary.graphs.paths import Step, Path
from mo_dots import Data
def dfs(graph, func, head, reverse=None):
"""
DEPTH FIRST SEARCH
IF func RETURNS FALSE, THEN PATH IS NO LONGER TAKEN
IT'S EXPECTED func TAKES 3 ARGUMENTS
node - THE CURRENT NODE IN THE
path - PATH FROM head TO node
graph - THE WHOLE GRAPH
"""
todo = deque()
todo.append(head)
path = deque()
done = set()
while todo:
node = todo.popleft()
if node in done:
path.pop()
continue
done.add(node)
path.append(node)
result = func(node, path, graph)
if result:
if reverse:
children = graph.get_parents(node)
else:
children = graph.get_children(node)
todo.extend(children)
def bfs(graph, func, head, reverse=None):
"""
BREADTH FIRST SEARCH
IF func RETURNS FALSE, THEN NO MORE PATHS DOWN THE BRANCH ARE TAKEN
IT'S EXPECTED func TAKES 3 ARGUMENTS
node - THE CURRENT NODE IN THE
path - PATH FROM head TO node
graph - THE WHOLE GRAPH
todo - WHAT'S IN THE QUEUE TO BE DONE
"""
todo = deque() # LIST OF PATHS
todo.append(Step(None, head))
while True:
path = todo.popleft()
keep_going = func(path.node, Path(path), graph, todo)
if keep_going:
todo.extend(Step(path, c) for c in graph.get_children(path.node))
def dominator(graph, head):
# WE WOULD NEED DOMINATORS IF WE DO NOT KNOW THE TOPOLOGICAL ORDERING
# DOMINATORS ALLOW US TO USE A REFERENCE TEST RESULT: EVERYTHING BETWEEN
# dominator(node) AND node CAN BE TREATED AS PARALLEL-APPLIED CHANGESETS
#
# INSTEAD OF DOMINATORS, WE COULD USE MANY PERF RESULTS, FROM EACH OF THE
# PARENT BRANCHES, AND AS LONG AS THEY ALL ARE PART OF A LONG LINE OF
# STATISTICALLY IDENTICAL PERF RESULTS, WE CAN ASSUME THEY ARE A DOMINATOR
visited = set()
dom = Data(output=None)
def find_dominator(node, path, graph, todo):
if dom.output:
return False
if not todo:
dom.output = node
return False
if node in visited:
common = INTERSECT(p[1::] for p in todo) # DO NOT INCLUDE head
if node in common:
dom.output = node #ALL REMAINING PATHS HAVE node IN COMMON TOO
return False
return True
bfs(graph, find_dominator, head)
return dom.output

123
vendor/pyLibrary/graphs/paths.py поставляемый
Просмотреть файл

@ -1,123 +0,0 @@
# encoding: utf-8
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
from __future__ import division
from __future__ import absolute_import
from collections import namedtuple, deque
from mo_logs import Log
Step = namedtuple("Step", ["parent", "node"])
class Path(list):
"""
USES Steps TO DEFINE A LIST
Steps POINT TO parent, SO THIS CLASS HANDLES THE REVERSE NATURE
"""
def __init__(self, last_step):
self.last = last_step
self.list = None
def _build_list(self):
output = deque()
s = self.last
while s:
output.appendleft(s.node)
s = s.parent
self.list = list(output)
def __getitem__(self, index):
if index < 0:
return None
if not self.list:
self._build_list()
if index>=len(self.list):
return None
return self.list[index]
def __setitem__(self, i, y):
if not self.list:
self._build_list()
self.list[i]=y
def __iter__(self):
if not self.list:
self._build_list()
return self.list.__iter__()
def __contains__(self, item):
if not self.list:
self._build_list()
return item in self.list
def append(self, val):
Log.error("not implemented")
def __str__(self):
Log.error("not implemented")
def __len__(self):
if not self.list:
self._build_list()
return len(self.list)
def __getslice__(self, i, j):
Log.error("slicing is broken in Python 2.7: a[i:j] == a[i+len(a), j] sometimes. Use [start:stop:step]")
def copy(self):
if not self.list:
self._build_list()
return self.list.copy()
def remove(self, x):
Log.error("not implemented")
def extend(self, values):
Log.error("not implemented")
def pop(self):
Log.error("not implemented")
def right(self, num=None):
"""
WITH SLICES BEING FLAT, WE NEED A SIMPLE WAY TO SLICE FROM THE RIGHT [-num:]
"""
if num == None:
return self.last.node
if num <= 0:
return []
if not self.list:
self._build_list()
return self.list[-num:]
def not_right(self, num):
"""
WITH SLICES BEING FLAT, WE NEED A SIMPLE WAY TO SLICE FROM THE LEFT [:-num:]
"""
if not self.list:
self._build_list()
if num == None:
return self.list[:-1:]
if num <= 0:
return []
return self.list[:-num:]
def last(self):
"""
RETURN LAST ELEMENT IN FlatList [-1]
"""
return self.last.node

20
vendor/pyLibrary/meta.py поставляемый
Просмотреть файл

@ -11,6 +11,7 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from collections import namedtuple
from types import FunctionType
import mo_json
@ -135,7 +136,10 @@ def wrap_function(cache_store, func_):
using_self = False
func = lambda self, *args: func_(*args)
def output(*args):
def output(*args, **kwargs):
if kwargs:
Log.error("Sorry, caching only works with ordered parameter, not keyword arguments")
with cache_store.locker:
if using_self:
self = args[0]
@ -152,7 +156,7 @@ def wrap_function(cache_store, func_):
if Random.int(100) == 0:
# REMOVE OLD CACHE
_cache = {k: v for k, v in _cache.items() if v[0]==None or v[0] > now}
_cache = {k: v for k, v in _cache.items() if v.timeout == None or v.timeout > now}
setattr(self, attr_name, _cache)
timeout, key, value, exception = _cache.get(args, (Null, Null, Null, Null))
@ -160,7 +164,7 @@ def wrap_function(cache_store, func_):
if now >= timeout:
value = func(self, *args)
with cache_store.locker:
_cache[args] = (now + cache_store.timeout, args, value, None)
_cache[args] = CacheElement(now + cache_store.timeout, args, value, None)
return value
if value == None:
@ -168,12 +172,12 @@ def wrap_function(cache_store, func_):
try:
value = func(self, *args)
with cache_store.locker:
_cache[args] = (now + cache_store.timeout, args, value, None)
_cache[args] = CacheElement(now + cache_store.timeout, args, value, None)
return value
except Exception as e:
e = Except.wrap(e)
with cache_store.locker:
_cache[args] = (now + cache_store.timeout, args, None, e)
_cache[args] = CacheElement(now + cache_store.timeout, args, None, e)
raise e
else:
raise exception
@ -183,9 +187,10 @@ def wrap_function(cache_store, func_):
return output
CacheElement = namedtuple("CacheElement", ("timeout", "key", "value", "exception"))
class _FakeLock():
def __enter__(self):
pass
@ -193,7 +198,6 @@ class _FakeLock():
pass
def value2quote(value):
# RETURN PRETTY PYTHON CODE FOR THE SAME
if isinstance(value, text_type):

10
vendor/pyLibrary/sql/__init__.py поставляемый
Просмотреть файл

@ -12,10 +12,15 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from itertools import groupby
from operator import itemgetter
from mo_future import text_type, PY3
from mo_logs import Log
from mo_logs.strings import expand_template
import pyLibrary.sql
class SQL(text_type):
"""
@ -127,9 +132,14 @@ def sql_concat(list_):
return SQL(" || ").join(sql_iso(l) for l in list_)
def quote_set(list_):
return sql_iso(sql_list(map(pyLibrary.sql.sqlite.quote_value, list_)))
def sql_alias(value, alias):
return SQL(value.template + " AS " + alias.template)
def sql_coalesce(list_):
return "COALESCE(" + SQL_COMMA.join(list_) + ")"

226
vendor/pyLibrary/sql/mysql.py поставляемый
Просмотреть файл

@ -15,23 +15,20 @@ from __future__ import unicode_literals
import subprocess
from collections import Mapping
from datetime import datetime
from zipfile import ZipFile
from pymysql import connect, InterfaceError, cursors
import mo_json
from jx_python import jx
from mo_dots import coalesce, wrap, listwrap, unwrap
from mo_dots import coalesce, wrap, listwrap, unwrap, split_field
from mo_files import File
from mo_future import text_type, utf8_json_encoder, binary_type
from mo_future import text_type, utf8_json_encoder, binary_type, transpose
from mo_kwargs import override
from mo_logs import Log
from mo_logs.exceptions import Except, suppress_exception
from mo_logs.strings import expand_template, indent, outdent
from mo_math import Math
from mo_times import Date
from pyLibrary.convert import zip2bytes
from pyLibrary.env.big_data import ibytes2ilines
from pyLibrary.sql import SQL, SQL_NULL, SQL_SELECT, SQL_LIMIT, SQL_WHERE, SQL_LEFT_JOIN, SQL_FROM, SQL_AND, sql_list, sql_iso, SQL_ASC, SQL_TRUE, SQL_ONE, SQL_DESC, SQL_IS_NULL, sql_alias
from pyLibrary.sql.sqlite import join_column
@ -106,11 +103,12 @@ class MySQL(object):
)
except Exception as e:
if self.settings.host.find("://") == -1:
Log.error(u"Failure to connect to {{host}}:{{port}}",
host=self.settings.host,
port=self.settings.port,
cause=e
)
Log.error(
u"Failure to connect to {{host}}:{{port}}",
host=self.settings.host,
port=self.settings.port,
cause=e
)
else:
Log.error(u"Failure to connect. PROTOCOL PREFIX IS PROBABLY BAD", e)
self.cursor = None
@ -180,7 +178,8 @@ class MySQL(object):
try:
self.db.close()
except Exception as e:
if e.message.find("Already closed") >= 0:
e = Except.wrap(e)
if "Already closed" in e:
return
Log.warning("can not close()", e)
@ -262,8 +261,7 @@ class MySQL(object):
if param:
sql = expand_template(sql, quote_param(param))
sql = self.preamble + outdent(sql)
if self.debug:
Log.note("Execute SQL:\n{{sql}}", sql=indent(sql))
self.debug and Log.note("Execute SQL:\n{{sql}}", sql=indent(sql))
self.cursor.execute(sql)
if row_tuples:
@ -301,13 +299,12 @@ class MySQL(object):
if param:
sql = expand_template(sql, quote_param(param))
sql = self.preamble + outdent(sql)
if self.debug:
Log.note("Execute SQL:\n{{sql}}", sql=indent(sql))
self.debug and Log.note("Execute SQL:\n{{sql}}", sql=indent(sql))
self.cursor.execute(sql)
grid = [[utf8_to_unicode(c) for c in row] for row in self.cursor]
# columns = [utf8_to_unicode(d[0]) for d in coalesce(self.cursor.description, [])]
result = zip(*grid)
result = transpose(*grid)
if not old_cursor: # CLEANUP AFTER NON-TRANSACTIONAL READS
self.cursor.close()
@ -333,8 +330,7 @@ class MySQL(object):
if param:
sql = expand_template(sql, quote_param(param))
sql = self.preamble + outdent(sql)
if self.debug:
Log.note("Execute SQL:\n{{sql}}", sql=indent(sql))
self.debug and Log.note("Execute SQL:\n{{sql}}", sql=indent(sql))
self.cursor.execute(sql)
columns = tuple([utf8_to_unicode(d[0]) for d in self.cursor.description])
@ -362,85 +358,6 @@ class MySQL(object):
if self.debug or len(self.backlog) >= MAX_BATCH_SIZE:
self._execute_backlog()
@staticmethod
@override
def execute_sql(
host,
username,
password,
sql,
schema=None,
param=None,
kwargs=None
):
"""EXECUTE MANY LINES OF SQL (FROM SQLDUMP FILE, MAYBE?"""
kwargs.schema = coalesce(kwargs.schema, kwargs.database)
if param:
with MySQL(kwargs) as temp:
sql = expand_template(sql, quote_param(param))
# We have no way to execute an entire SQL file in bulk, so we
# have to shell out to the commandline client.
args = [
"mysql",
"-h{0}".format(host),
"-u{0}".format(username),
"-p{0}".format(password)
]
if schema:
args.append("{0}".format(schema))
try:
proc = subprocess.Popen(
args,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
bufsize=-1
)
if isinstance(sql, text_type):
sql = sql.encode("utf8")
(output, _) = proc.communicate(sql)
except Exception as e:
raise Log.error("Can not call \"mysql\"", e)
if proc.returncode:
if len(sql) > 10000:
sql = "<" + text_type(len(sql)) + " bytes of sql>"
Log.error(
"Unable to execute sql: return code {{return_code}}, {{output}}:\n {{sql}}\n",
sql=indent(sql),
return_code=proc.returncode,
output=output
)
@staticmethod
@override
def execute_file(
filename,
host,
username,
password,
schema=None,
param=None,
ignore_errors=False,
kwargs=None
):
# MySQLdb provides no way to execute an entire SQL file in bulk, so we
# have to shell out to the commandline client.
file = File(filename)
if file.extension == 'zip':
sql = file.read_zipfile()
else:
sql = File(filename).read()
if ignore_errors:
with suppress_exception:
MySQL.execute_sql(sql=sql, param=param, kwargs=kwargs)
else:
MySQL.execute_sql(sql=sql, param=param, kwargs=kwargs)
def _execute_backlog(self):
if not self.backlog: return
@ -451,8 +368,7 @@ class MySQL(object):
for b in backlog:
sql = self.preamble + b
try:
if self.debug:
Log.note("Execute SQL:\n{{sql|indent}}", sql=sql)
self.debug and Log.note("Execute SQL:\n{{sql|indent}}", sql=sql)
self.cursor.execute(b)
except Exception as e:
Log.error("Can not execute sql:\n{{sql}}", sql=sql, cause=e)
@ -463,8 +379,7 @@ class MySQL(object):
for i, g in jx.groupby(backlog, size=MAX_BATCH_SIZE):
sql = self.preamble + ";\n".join(g)
try:
if self.debug:
Log.note("Execute block of SQL:\n{{sql|indent}}", sql=sql)
self.debug and Log.note("Execute block of SQL:\n{{sql|indent}}", sql=sql)
self.cursor.execute(sql)
self.cursor.close()
self.cursor = self.db.cursor()
@ -567,19 +482,93 @@ class MySQL(object):
sort = jx.normalize_sort_parameters(sort)
return sql_list([quote_column(s.field) + (SQL_DESC if s.sort == -1 else SQL_ASC) for s in sort])
@override
def execute_sql(
host,
username,
password,
sql,
schema=None,
param=None,
kwargs=None
):
"""EXECUTE MANY LINES OF SQL (FROM SQLDUMP FILE, MAYBE?"""
kwargs.schema = coalesce(kwargs.schema, kwargs.database)
if param:
with MySQL(kwargs) as temp:
sql = expand_template(sql, quote_param(param))
# We have no way to execute an entire SQL file in bulk, so we
# have to shell out to the commandline client.
args = [
"mysql",
"-h{0}".format(host),
"-u{0}".format(username),
"-p{0}".format(password)
]
if schema:
args.append("{0}".format(schema))
try:
proc = subprocess.Popen(
args,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
bufsize=-1
)
if isinstance(sql, text_type):
sql = sql.encode("utf8")
(output, _) = proc.communicate(sql)
except Exception as e:
raise Log.error("Can not call \"mysql\"", e)
if proc.returncode:
if len(sql) > 10000:
sql = "<" + text_type(len(sql)) + " bytes of sql>"
Log.error(
"Unable to execute sql: return code {{return_code}}, {{output}}:\n {{sql}}\n",
sql=indent(sql),
return_code=proc.returncode,
output=output
)
@override
def execute_file(
filename,
host,
username,
password,
schema=None,
param=None,
ignore_errors=False,
kwargs=None
):
# MySQLdb provides no way to execute an entire SQL file in bulk, so we
# have to shell out to the commandline client.
file = File(filename)
if file.extension == 'zip':
sql = file.read_zipfile()
else:
sql = File(filename).read()
if ignore_errors:
with suppress_exception:
execute_sql(sql=sql, kwargs=kwargs)
else:
execute_sql(sql=sql, kwargs=kwargs)
ESCAPE_DCT = {
u"\\": u"\\\\",
# u"\0": u"\\0",
# u"\"": u'\\"',
u"\0": u"\\0",
u"\"": u'\\"',
u"\'": u"''",
# u"\b": u"\\b",
# u"\f": u"\\f",
# u"\n": u"\\n",
# u"\r": u"\\r",
# u"\t": u"\\t",
# u"%": u"\\%",
# u"_": u"\\_"
u"\b": u"\\b",
u"\f": u"\\f",
u"\n": u"\\n",
u"\r": u"\\r",
u"\t": u"\\t"
}
@ -615,13 +604,12 @@ def quote_column(column_name, table=None):
if column_name == None:
Log.error("missing column_name")
elif isinstance(column_name, text_type):
if table:
column_name = join_column(table, column_name)
return SQL("`" + column_name.replace(".", "`.`") + "`") # MY SQL QUOTE OF COLUMN NAMES
elif isinstance(column_name, binary_type):
if table:
column_name = join_column(table, column_name)
return SQL("`" + column_name.decode('utf8').replace(".", "`.`") + "`")
return join_column(table, column_name)
else:
return SQL("`" + '`.`'.join(split_field(column_name)) + "`") # MYSQL QUOTE OF COLUMN NAMES
elif isinstance(column_name, binary_type):
return quote_column(column_name.decode('utf8'), table)
elif isinstance(column_name, list):
if table:
return sql_list(join_column(table, c) for c in column_name)
@ -631,10 +619,6 @@ def quote_column(column_name, table=None):
return SQL(sql_alias(column_name.value, quote_column(column_name.name)))
def quote_list(value):
return sql_iso(sql_list(map(quote_value, value)))
def quote_sql(value, param=None):
"""
USED TO EXPAND THE PARAMETERS TO THE SQL() OBJECT
@ -650,7 +634,7 @@ def quote_sql(value, param=None):
elif isinstance(value, Mapping):
return quote_value(json_encode(value))
elif hasattr(value, '__iter__'):
return sql_iso(sql_list(map(quote_value, value)))
return quote_list(value)
else:
return text_type(value)
except Exception as e:

133
vendor/pyLibrary/sql/sqlite.py поставляемый
Просмотреть файл

@ -17,7 +17,8 @@ import re
import sys
from collections import Mapping, namedtuple
from mo_dots import Data, coalesce, unwraplist
from jx_base.expressions import jx_expression
from mo_dots import Data, coalesce, unwraplist, Null
from mo_files import File
from mo_future import allocate_lock as _allocate_lock, text_type
from mo_kwargs import override
@ -25,21 +26,23 @@ from mo_logs import Log
from mo_logs.exceptions import Except, extract_stack, ERROR, format_trace
from mo_logs.strings import quote
from mo_math.stats import percentile
from mo_threads import Queue, Signal, Thread, Lock, Till
from mo_threads import Queue, Thread, Lock, Till
from mo_times import Date, Duration
from mo_times.timer import Timer
from pyLibrary import convert
from pyLibrary.sql import DB, SQL, SQL_TRUE, SQL_FALSE, SQL_NULL, SQL_SELECT, sql_iso
from pyLibrary.sql import DB, SQL, SQL_TRUE, SQL_FALSE, SQL_NULL, SQL_SELECT, sql_iso, sql_list
DEBUG = False
TRACE = True
FORMAT_COMMAND = "Running command\n{{command|limit(100)|indent}}"
DOUBLE_TRANSACTION_ERROR = "You can not query outside a transaction you have open already"
TOO_LONG_TO_HOLD_TRANSACTION = 10
sqlite3 = None
_load_extension_warning_sent = False
_upgraded = False
known_databases = {Null: None}
class Sqlite(DB):
@ -63,12 +66,18 @@ class Sqlite(DB):
self.settings = kwargs
self.filename = File(filename).abspath
if known_databases.get(self.filename):
Log.error("Not allowed to create more than one Sqlite instance for {{file}}", file=self.filename)
# SETUP DATABASE
DEBUG and Log.note("Sqlite version {{version}}", version=sqlite3.sqlite_version)
try:
if db == None:
self.db = sqlite3.connect(coalesce(self.filename, ':memory:'), check_same_thread=False, isolation_level=None)
self.db = sqlite3.connect(
database=coalesce(self.filename, ":memory:"),
check_same_thread=False,
isolation_level=None
)
else:
self.db = db
except Exception as e:
@ -76,7 +85,7 @@ class Sqlite(DB):
load_functions and self._load_functions()
self.locker = Lock()
self.available_transactions = []
self.available_transactions = [] # LIST OF ALL THE TRANSACTIONS BEING MANAGED
self.queue = Queue("sql commands") # HOLD (command, result, signal, stacktrace) TUPLES
self.get_trace = coalesce(get_trace, TRACE)
@ -136,8 +145,17 @@ class Sqlite(DB):
signal.acquire()
result = Data()
trace = extract_stack(1) if self.get_trace else None
if self.get_trace:
current_thread = Thread.current()
with self.locker:
for t in self.available_transactions:
if t.thread is current_thread:
Log.error(DOUBLE_TRANSACTION_ERROR)
self.queue.add(CommandItem(command, result, signal, trace, None))
signal.acquire()
if result.exception:
Log.error("Problem with Sqlite call", cause=result.exception)
return result
@ -151,7 +169,7 @@ class Sqlite(DB):
self.closed = True
signal = _allocate_lock()
signal.acquire()
self.queue.add((COMMIT, None, signal, None))
self.queue.add(CommandItem(COMMIT, None, signal, None, None))
signal.acquire()
self.worker.please_stop.go()
return
@ -189,44 +207,62 @@ class Sqlite(DB):
self.db.create_function("REGEXP", 2, regexp)
def show_warning(self):
blocked = (self.delayed_queries+self.delayed_transactions)[0]
def show_transactions_blocked_warning(self):
blocker = self.last_command_item
blocked = (self.delayed_queries+self.delayed_transactions)[0]
Log.warning(
"Query for thread {{blocked_thread|quote}} at\n{{blocked_trace|indent}}is blocked by {{blocker_thread|quote}} at\n{{blocker_trace|indent}}this message brought to you by....",
blocker_thread=blocker.thread.name,
"Query on thread {{blocked_thread|json}} at\n"
"{{blocked_trace|indent}}"
"is blocked by {{blocker_thread|json}} at\n"
"{{blocker_trace|indent}}"
"this message brought to you by....",
blocker_trace=format_trace(blocker.trace),
blocked_thread=blocked.thread.name,
blocked_trace=format_trace(blocked.trace)
blocked_trace=format_trace(blocked.trace),
blocker_thread=blocker.transaction.thread.name if blocker.transaction is not None else None,
blocked_thread=blocked.transaction.thread.name if blocked.transaction is not None else None
)
def _close_transaction(self, command_item):
query, result, signal, trace, transaction = command_item
transaction.end_of_life = True
DEBUG and Log.note(FORMAT_COMMAND, command=query)
with self.locker:
self.available_transactions.remove(transaction)
assert transaction not in self.available_transactions
old_length = len(self.transaction_stack)
old_trans = self.transaction_stack[-1]
del self.transaction_stack[-1]
assert old_length - 1 == len(self.transaction_stack)
assert old_trans
assert old_trans not in self.transaction_stack
if not self.transaction_stack:
# NESTED TRANSACTIONS NOT ALLOWED IN sqlite3
DEBUG and Log.note(FORMAT_COMMAND, command=query)
self.db.execute(query)
# PUT delayed BACK ON THE QUEUE, IN THE ORDER FOUND, BUT WITH QUERIES FIRST
if self.too_long is not None:
with self.too_long.lock:
self.too_long.job_queue.clear()
self.too_long = None
has_been_too_long = False
with self.locker:
if self.too_long is not None:
self.too_long, too_long = None, self.too_long
# WE ARE CHEATING HERE: WE REACH INTO THE Signal MEMBERS AND REMOVE WHAT WE ADDED TO THE INTERNAL job_queue
with too_long.lock:
has_been_too_long = bool(too_long)
too_long.job_queue = None
if self.delayed_transactions:
for c in reversed(self.delayed_transactions):
self.queue.push(c)
del self.delayed_transactions[:]
if self.delayed_queries:
for c in reversed(self.delayed_queries):
self.queue.push(c)
del self.delayed_queries[:]
# PUT delayed BACK ON THE QUEUE, IN THE ORDER FOUND, BUT WITH QUERIES FIRST
if self.delayed_transactions:
for c in reversed(self.delayed_transactions):
self.queue.push(c)
del self.delayed_transactions[:]
if self.delayed_queries:
for c in reversed(self.delayed_queries):
self.queue.push(c)
del self.delayed_queries[:]
if has_been_too_long:
Log.note("Transaction blockage cleared")
def _worker(self, please_stop):
try:
@ -235,11 +271,14 @@ class Sqlite(DB):
command_item = self.queue.pop(till=please_stop)
if command_item is None:
break
self._process_command_item(command_item)
try:
self._process_command_item(command_item)
except Exception as e:
Log.warning("worker can not execute command", cause=e)
except Exception as e:
e = Except.wrap(e)
if not please_stop:
Log.warning("Problem with sql thread", cause=e)
Log.warning("Problem with sql", cause=e)
finally:
self.closed = True
DEBUG and Log.note("Database is closed")
@ -248,31 +287,34 @@ class Sqlite(DB):
def _process_command_item(self, command_item):
query, result, signal, trace, transaction = command_item
with Timer("SQL Timing", debug=DEBUG):
with Timer("SQL Timing", silent=not DEBUG):
if transaction is None:
# THIS IS A TRANSACTIONLESS QUERY, DELAY IT IF THERE IS A CURRENT TRANSACTION
if self.transaction_stack:
if self.too_long is None:
self.too_long = Till(seconds=TOO_LONG_TO_HOLD_TRANSACTION)
self.too_long.on_go(self.show_warning)
self.delayed_queries.append(command_item)
with self.locker:
if self.too_long is None:
self.too_long = Till(seconds=TOO_LONG_TO_HOLD_TRANSACTION)
self.too_long.on_go(self.show_transactions_blocked_warning)
self.delayed_queries.append(command_item)
return
elif self.transaction_stack and self.transaction_stack[-1] not in [transaction, transaction.parent]:
# THIS TRANSACTION IS NOT THE CURRENT TRANSACTION, DELAY IT
if self.too_long is None:
self.too_long = Till(seconds=TOO_LONG_TO_HOLD_TRANSACTION)
self.too_long.on_go(self.show_warning)
self.delayed_transactions.append(command_item)
with self.locker:
if self.too_long is None:
self.too_long = Till(seconds=TOO_LONG_TO_HOLD_TRANSACTION)
self.too_long.on_go(self.show_transactions_blocked_warning)
self.delayed_transactions.append(command_item)
return
else:
# ENSURE THE CURRENT TRANSACTION IS UP TO DATE FOR THIS query
if not self.transaction_stack:
# sqlite3 ALLOWS ONLY ONE TRANSACTION AT A TIME
DEBUG and Log.note(FORMAT_COMMAND, command=BEGIN)
self.db.execute(BEGIN)
self.transaction_stack.append(transaction)
elif transaction != self.transaction_stack[-1]:
elif transaction is not self.transaction_stack[-1]:
self.transaction_stack.append(transaction)
elif transaction.exception:
elif transaction.exception and query is not ROLLBACK:
result.exception = Except(
type=ERROR,
template="Not allowed to continue using a transaction that failed",
@ -374,10 +416,12 @@ class Transaction(object):
def do_all(self):
# ENSURE PARENT TRANSACTION IS UP TO DATE
if self.parent:
self.parent.do_all()
c = None
try:
if self.parent == self:
Log.warning("Transactions parent is equal to itself.")
if self.parent:
self.parent.do_all()
# GET THE REMAINING COMMANDS
with self.locker:
todo = self.todo[self.complete:]
@ -387,8 +431,6 @@ class Transaction(object):
for c in todo:
DEBUG and Log.note(FORMAT_COMMAND, command=c.command)
self.db.db.execute(c.command)
if c.command in [COMMIT, ROLLBACK]:
Log.error("logic error")
except Exception as e:
Log.error("problem running commands", current=c, cause=e)
@ -453,6 +495,9 @@ def quote_value(value):
return SQL(text_type(value))
def quote_list(list):
return sql_iso(sql_list(map(quote_value, list)))
def join_column(a, b):
a = quote_column(a)
b = quote_column(b)

63
vendor/pyLibrary/testing/elasticsearch.py поставляемый
Просмотреть файл

@ -12,57 +12,58 @@ from __future__ import division
from __future__ import unicode_literals
import mo_json
from mo_files import File
from mo_logs import Log
from mo_dots import Data
from mo_dots import unwrap, wrap
from pyLibrary import convert
from pyLibrary.env.elasticsearch import Index, Cluster
from mo_kwargs import override
from jx_python import jx
from mo_dots import Data, Null, unwrap, wrap
from mo_files import File
from mo_kwargs import override
from mo_logs import Log
from pyLibrary.env.elasticsearch import Cluster
def make_test_instance(name, settings):
if settings.filename:
File(settings.filename).delete()
return open_test_instance(name, settings)
@override
def make_test_instance(name, filename=None, kwargs=None):
if filename != None:
File(filename).delete()
return open_test_instance(kwargs)
def open_test_instance(name, settings):
if settings.filename:
@override
def open_test_instance(name, filename=None, es=None, kwargs=None):
if filename != None:
Log.note(
"Using {{filename}} as {{type}}",
filename=settings.filename,
filename=filename,
type=name
)
return FakeES(settings)
return FakeES(filename=filename)
else:
Log.note(
"Using ES cluster at {{host}} as {{type}}",
host=settings.host,
host=es.host,
type=name
)
cluster = Cluster(settings)
cluster = Cluster(es)
try:
old_index = cluster.get_index(kwargs=settings)
old_index = cluster.get_index(es)
cluster.delete_index(old_index.settings.index)
except Exception as e:
if "Can not find index" not in e:
Log.error("unexpected", cause=e)
es = cluster.create_index(limit_replicas=True, limit_replicas_warning=False, kwargs=settings)
es.delete_all_but_self()
es.add_alias(settings.index)
return es
output = cluster.create_index(limit_replicas=True, limit_replicas_warning=False, kwargs=es)
output.delete_all_but_self()
output.add_alias(es.index)
return output
class FakeES():
@override
def __init__(self, filename, host="fake", index="fake", kwargs=None):
self.settings = kwargs
self.filename = filename
self.file = File(filename)
self.cluster= Null
try:
self.data = mo_json.json2value(File(self.filename).read())
self.data = mo_json.json2value(self.file.read())
except Exception as e:
self.data = Data()
@ -85,11 +86,8 @@ class FakeES():
}
unwrap(self.data).update(records)
data_as_json = mo_json.value2json(self.data, pretty=True)
File(self.filename).write(data_as_json)
Log.note("{{num}} documents added", num= len(records))
self.refresh()
Log.note("{{num}} documents added", num=len(records))
def add(self, record):
if isinstance(record, list):
@ -97,9 +95,14 @@ class FakeES():
return self.extend([record])
def delete_record(self, filter):
f = convert.esfilter2where(filter)
f = esfilter2where(filter)
self.data = wrap({k: v for k, v in self.data.items() if not f(v)})
def refresh(self, *args, **kwargs):
data_as_json = mo_json.value2json(self.data, pretty=True)
self.file.write(data_as_json)
def set_refresh_interval(self, seconds):
pass

41
vendor/tuid/app.py поставляемый
Просмотреть файл

@ -17,7 +17,7 @@ from flask import Flask, Response
from mo_dots import listwrap, coalesce, unwraplist
from mo_json import value2json, json2value
from mo_logs import Log, constants, startup
from mo_logs import Log, constants, startup, Except
from mo_logs.strings import utf82unicode, unicode2utf8
from mo_times import Timer, Date
from pyLibrary.env.flask_wrappers import cors_wrapper
@ -26,6 +26,8 @@ from tuid.util import map_to_array
OVERVIEW = None
QUERY_SIZE_LIMIT = 10 * 1000 * 1000
EXPECTING_QUERY = b"expecting query\r\n"
TOO_BUSY = 10
class TUIDApp(Flask):
@ -51,7 +53,7 @@ def tuid_endpoint(path):
if flask.request.headers.get("content-length", "") in ["", "0"]:
# ASSUME A BROWSER HIT THIS POINT, SEND text/html RESPONSE BACK
return Response(
unicode2utf8("expecting query"),
EXPECTING_QUERY,
status=400,
headers={
"Content-Type": "text/html"
@ -85,19 +87,31 @@ def tuid_endpoint(path):
rev = None
paths = None
branch_name = None
for a in ands:
rev = coalesce(rev, a.eq.revision)
paths = unwraplist(coalesce(paths, a['in'].path, a.eq.path))
branch_name = coalesce(rev, a.eq.branch)
branch_name = coalesce(branch_name, a.eq.branch)
paths = listwrap(paths)
if len(paths) <= 0:
Log.warning("Can't find file paths found in request: {{request}}", request=request_body)
response = [("Error in app.py - no paths found", [])]
if len(paths) == 0:
response, completed = [], True
elif service.conn.pending_transactions > TOO_BUSY: # CHECK IF service IS VERY BUSY
# TODO: BE SURE TO UPDATE STATS TOO
Log.note("Too many open transactions")
response, completed = [], False
else:
# RETURN TUIDS
with Timer("tuid internal response time for {{num}} files", {"num": len(paths)}):
response = service.get_tuids_from_files(revision=rev, files=paths, going_forward=True, repo=branch_name)
response, completed = service.get_tuids_from_files(
revision=rev, files=paths, going_forward=True, repo=branch_name
)
if not completed:
Log.note(
"Request for {{num}} files is incomplete for revision {{rev}}.",
num=len(paths), rev=rev
)
if query.meta.format == 'list':
formatter = _stream_list
@ -106,12 +120,13 @@ def tuid_endpoint(path):
return Response(
formatter(response),
status=200,
status=200 if completed else 202,
headers={
"Content-Type": "application/json"
}
)
except Exception as e:
e = Except.wrap(e)
Log.warning("could not handle request", cause=e)
return Response(
unicode2utf8(value2json(e, pretty=True)),
@ -130,6 +145,10 @@ def _stream_table(files):
def _stream_list(files):
if not files:
yield b'{"format":"list", "data":[]}'
return
sep = b'{"format":"list", "data":['
for f, pairs in files:
yield sep
@ -168,7 +187,7 @@ if __name__ in ("__main__",):
Log.start(config.debug)
service = TUIDService(config.tuid)
Log.note("Started TUID Service.")
Log.note("Started TUID Service")
except BaseException as e: # MUST CATCH BaseException BECAUSE argparse LIKES TO EXIT THAT WAY, AND gunicorn WILL NOT REPORT
try:
Log.error("Serious problem with TUID service construction! Shutdown!", cause=e)
@ -178,7 +197,7 @@ if __name__ in ("__main__",):
if config.flask:
if config.flask.port and config.args.process_num:
config.flask.port += config.args.process_num
Log.note("Running Service.")
Log.note("Running Flask...")
flask_app.run(**config.flask)

6
vendor/tuid/client.py поставляемый
Просмотреть файл

@ -19,7 +19,7 @@ from mo_times import Timer, Date
from pyLibrary import aws
from pyLibrary.env import http
from pyLibrary.sql import sql_iso, sql_list
from pyLibrary.sql.sqlite import Sqlite, quote_value
from pyLibrary.sql.sqlite import Sqlite, quote_value, quote_list
DEBUG = True
SLEEP_ON_ERROR = 30
@ -84,7 +84,7 @@ class TuidClient(object):
):
response = self.db.query(
"SELECT file, tuids FROM tuid WHERE revision=" + quote_value(revision) +
" AND file IN " + sql_iso(sql_list(map(quote_value, files)))
" AND file IN " + quote_list(files)
)
found = {file: json2value(tuids) for file, tuids in response.data}
@ -124,7 +124,7 @@ class TuidClient(object):
with self.db.transaction() as transaction:
command = "INSERT INTO tuid (revision, file, tuids) VALUES " + sql_list(
sql_iso(sql_list(map(quote_value, (revision, r.path, value2json(r.tuids)))))
quote_list((revision, r.path, value2json(r.tuids)))
for r in new_response.data
if r.tuids != None
)

764
vendor/tuid/clogger.py поставляемый Normal file
Просмотреть файл

@ -0,0 +1,764 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
import time
# Use import as follows to prevent
# circular dependency conflict for
# TUIDService, which makes use of the
# Clogger
import tuid.service
from jx_python import jx
from mo_dots import Null, coalesce
from mo_hg.hg_mozilla_org import HgMozillaOrg
from mo_logs import Log
from mo_threads import Till, Thread, Lock, Queue, Signal
from mo_times.durations import DAY
from pyLibrary.env import http
from pyLibrary.sql import sql_list, quote_set
from tuid import sql
RETRY = {"times": 3, "sleep": 5}
SQL_CSET_BATCH_SIZE = 500
CSET_TIP_WAIT_TIME = 5 * 60 # seconds
CSET_BACKFILL_WAIT_TIME = 1 * 60 # seconds
CSET_MAINTENANCE_WAIT_TIME = 30 * 60 # seconds
CSET_DELETION_WAIT_TIME = 1 * 60 # seconds
TUID_EXISTENCE_WAIT_TIME = 1 * 60 # seconds
TIME_TO_KEEP_ANNOTATIONS = 5 * DAY
MAX_TIPFILL_CLOGS = 60 # changeset logs
MAX_BACKFILL_CLOGS = 200 # changeset logs
CHANGESETS_PER_CLOG = 20 # changesets
BACKFILL_REVNUM_TIMEOUT = int(MAX_BACKFILL_CLOGS * 2.5) # Assume 2.5 seconds per clog
MINIMUM_PERMANENT_CSETS = 1000 # changesets
MAXIMUM_NONPERMANENT_CSETS = 20000 # changesets
SIGNAL_MAINTENACE_CSETS = MAXIMUM_NONPERMANENT_CSETS + (0.1 * MAXIMUM_NONPERMANENT_CSETS)
UPDATE_VERY_OLD_FRONTIERS = False
class Clogger:
def __init__(self, conn=None, tuid_service=None, kwargs=None):
try:
self.config = kwargs
self.conn = conn if conn else sql.Sql(self.config.database.name)
self.hg_cache = HgMozillaOrg(kwargs=self.config.hg_cache, use_cache=True) if self.config.hg_cache else Null
self.tuid_service = tuid_service if tuid_service else tuid.service.TUIDService(
database=None, hg=None, kwargs=self.config, conn=self.conn, clogger=self
)
self.rev_locker = Lock()
self.working_locker = Lock()
self.init_db()
self.next_revnum = coalesce(self.conn.get_one("SELECT max(revnum)+1 FROM csetLog")[0], 1)
self.csets_todo_backwards = Queue(name="Clogger.csets_todo_backwards")
self.deletions_todo = Queue(name="Clogger.deletions_todo")
self.maintenance_signal = Signal(name="Clogger.maintenance_signal")
self.config = self.config.tuid
self.disable_backfilling = False
self.disable_tipfilling = False
self.disable_deletion = False
self.disable_maintenance = False
# Make sure we are filled before allowing queries
numrevs = self.conn.get_one("SELECT count(revnum) FROM csetLog")[0]
if numrevs < MINIMUM_PERMANENT_CSETS:
Log.note("Filling in csets to hold {{minim}} csets.", minim=MINIMUM_PERMANENT_CSETS)
oldest_rev = 'tip'
with self.conn.transaction() as t:
tmp = t.query("SELECT min(revnum), revision FROM csetLog").data[0][1]
if tmp:
oldest_rev = tmp
self._fill_in_range(
MINIMUM_PERMANENT_CSETS - numrevs,
oldest_rev,
timestamp=False
)
Log.note(
"Table is filled with atleast {{minim}} entries. Starting workers...",
minim=MINIMUM_PERMANENT_CSETS
)
Thread.run('clogger-tip', self.fill_forward_continuous)
Thread.run('clogger-backfill', self.fill_backward_with_list)
Thread.run('clogger-maintenance', self.csetLog_maintenance)
Thread.run('clogger-deleter', self.csetLog_deleter)
Log.note("Started clogger workers.")
except Exception as e:
Log.warning("Cannot setup clogger: {{cause}}", cause=str(e))
def init_db(self):
with self.conn.transaction() as t:
t.execute('''
CREATE TABLE IF NOT EXISTS csetLog (
revnum INTEGER PRIMARY KEY,
revision CHAR(12) NOT NULL,
timestamp INTEGER
);''')
def revnum(self):
"""
:return: max revnum that was added
"""
return coalesce(self.conn.get_one("SELECT max(revnum) as revnum FROM csetLog")[0], 0)
def get_tip(self, transaction):
return transaction.get_one(
"SELECT max(revnum) as revnum, revision FROM csetLog"
)
def get_tail(self, transaction):
return transaction.get_one(
"SELECT min(revnum) as revnum, revision FROM csetLog"
)
def _get_clog(self, clog_url):
try:
Log.note("Searching through changelog {{url}}", url=clog_url)
clog_obj = http.get_json(clog_url, retry=RETRY)
return clog_obj
except Exception as e:
Log.error(
"Unexpected error getting changset-log for {{url}}: {{error}}",
url=clog_url,
error=e
)
def _get_one_revision(self, transaction, cset_entry):
# Returns a single revision if it exists
_, rev, _ = cset_entry
return transaction.get_one("SELECT revision FROM csetLog WHERE revision=?", (rev,))
def _get_one_revnum(self, transaction, rev):
# Returns a single revnum if it exists
return transaction.get_one("SELECT revnum FROM csetLog WHERE revision=?", (rev,))
def _get_revnum_range(self, transaction, revnum1, revnum2):
# Returns a range of revision numbers (that is inclusive)
high_num = max(revnum1, revnum2)
low_num = min(revnum1, revnum2)
return transaction.query(
"SELECT revnum, revision FROM csetLog WHERE "
"revnum >= " + str(low_num) + " AND revnum <= " + str(high_num)
).data
def recompute_table_revnums(self):
'''
Recomputes the revnums for the csetLog table
by creating a new table, and copying csetLog to
it. The INTEGER PRIMARY KEY in the temp table auto increments
as rows are added.
IMPORTANT: Only call this after acquiring the
lock `self.working_locker`.
:return:
'''
with self.conn.transaction() as t:
t.execute('''
CREATE TABLE temp (
revnum INTEGER PRIMARY KEY,
revision CHAR(12) NOT NULL,
timestamp INTEGER
);''')
t.execute(
"INSERT INTO temp (revision, timestamp) "
"SELECT revision, timestamp FROM csetlog ORDER BY revnum ASC"
)
t.execute("DROP TABLE csetLog;")
t.execute("ALTER TABLE temp RENAME TO csetLog;")
def check_for_maintenance(self):
'''
Returns True if the maintenance worker should be run now,
and False otherwise.
:return:
'''
numrevs = self.conn.get_one("SELECT count(revnum) FROM csetLog")[0]
if numrevs >= SIGNAL_MAINTENACE_CSETS:
return True
return False
def add_cset_entries(self, ordered_rev_list, timestamp=False, number_forward=True):
'''
Adds a list of revisions to the table. Assumes ordered_rev_list is an ordered
based on how changesets are found in the changelog. Going forwards or backwards is dealt
with by flipping the list
:param ordered_cset_list: Order given from changeset log searching.
:param timestamp: If false, records are kept indefinitely
but if holes exist: (delete, None, delete, None)
those delete's with None's around them
will not be deleted.
:param numbered: If True, this function will number the revision list
by going forward from max(revNum), else it'll go backwards
from revNum, then add X to all revnums and self.next_revnum
where X is the length of ordered_rev_list
:return:
'''
with self.conn.transaction() as t:
current_min = t.get_one("SELECT min(revnum) FROM csetlog")[0]
current_max = t.get_one("SELECT max(revnum) FROM csetlog")[0]
if not current_min or not current_max:
current_min = 0
current_max = 0
direction = -1
start = current_min - 1
if number_forward:
direction = 1
start = current_max + 1
ordered_rev_list = ordered_rev_list[::-1]
insert_list = [
(
start + direction * count,
rev,
int(time.time()) if timestamp else -1
)
for count, rev in enumerate(ordered_rev_list)
]
# In case of overlapping requests
fmt_insert_list = []
for cset_entry in insert_list:
tmp = self._get_one_revision(t, cset_entry)
if not tmp:
fmt_insert_list.append(cset_entry)
for _, tmp_insert_list in jx.groupby(fmt_insert_list, size=SQL_CSET_BATCH_SIZE):
t.execute(
"INSERT INTO csetLog (revnum, revision, timestamp)" +
" VALUES " +
sql_list(
quote_set((revnum, revision, timestamp))
for revnum, revision, timestamp in tmp_insert_list
)
)
# Move the revision numbers forward if needed
self.recompute_table_revnums()
# Start a maintenance run if needed
if self.check_for_maintenance():
self.maintenance_signal.go()
def _fill_in_range(self, parent_cset, child_cset, timestamp=False, number_forward=True):
'''
Fills cset logs in a certain range. 'parent_cset' can be an int and in that case,
we get that many changesets instead. If parent_cset is an int, then we consider
that we are going backwards (number_forward is False) and we ignore the first
changeset of the first log, and we ignore the setting for number_forward.
Otherwise, we continue until we find the given 'parent_cset'.
:param parent_cset:
:param child_cset:
:param timestamp:
:param number_forward:
:return:
'''
csets_to_add = []
found_parent = False
find_parent = False
if type(parent_cset) != int:
find_parent = True
elif parent_cset >= MAX_BACKFILL_CLOGS * CHANGESETS_PER_CLOG:
Log.warning(
"Requested number of new changesets {{num}} is too high. "
"Max number that can be requested is {{maxnum}}.",
num=parent_cset,
maxnum=MAX_BACKFILL_CLOGS * CHANGESETS_PER_CLOG
)
return None
csets_found = 0
clogs_seen = 0
final_rev = child_cset
while not found_parent and clogs_seen < MAX_BACKFILL_CLOGS:
clog_url = self.tuid_service.hg_url / self.config.hg.branch / 'json-log' / final_rev
clog_obj = self._get_clog(clog_url)
clog_csets_list = list(clog_obj['changesets'])
for clog_cset in clog_csets_list[:-1]:
if not number_forward and csets_found <= 0:
# Skip this entry it already exists
csets_found += 1
continue
nodes_cset = clog_cset['node'][:12]
if find_parent:
if nodes_cset == parent_cset:
found_parent = True
if not number_forward:
# When going forward this entry is
# the given parent
csets_to_add.append(nodes_cset)
break
else:
if csets_found + 1 > parent_cset:
found_parent = True
if not number_forward:
# When going forward this entry is
# the given parent (which is supposed
# to already exist)
csets_to_add.append(nodes_cset)
break
csets_found += 1
csets_to_add.append(nodes_cset)
if found_parent == True:
break
clogs_seen += 1
final_rev = clog_csets_list[-1]['node'][:12]
if found_parent:
self.add_cset_entries(csets_to_add, timestamp=timestamp, number_forward=number_forward)
else:
Log.warning(
"Couldn't find the end of the request for {{request}}. "
"Max number that can be requested through _fill_in_range is {{maxnum}}.",
request={
'parent_cset': parent_cset,
'child_cset':child_cset,
'number_forward': number_forward
},
maxnum=MAX_BACKFILL_CLOGS * CHANGESETS_PER_CLOG
)
return None
return csets_to_add
def fill_backward_with_list(self, please_stop=None):
'''
Expects requests of the tuple form: (parent_cset, timestamp)
parent_cset can be an int X to go back by X changesets, or
a string to search for going backwards in time. If timestamp
is false, no timestamps will be added to the entries.
:param please_stop:
:return:
'''
while not please_stop:
try:
request = self.csets_todo_backwards.pop(till=please_stop)
if please_stop:
break
# If backfilling is disabled, all requests
# are ignored.
if self.disable_backfilling:
Till(till=CSET_BACKFILL_WAIT_TIME).wait()
continue
if request:
parent_cset, timestamp = request
else:
continue
with self.working_locker:
with self.conn.transaction() as t:
parent_revnum = self._get_one_revnum(t, parent_cset)
if parent_revnum:
continue
with self.conn.transaction() as t:
_, oldest_revision = self.get_tail(t)
self._fill_in_range(
parent_cset,
oldest_revision,
timestamp=timestamp,
number_forward=False
)
Log.note("Finished {{cset}}", cset=parent_cset)
except Exception as e:
Log.warning("Unknown error occurred during backfill: ", cause=e)
def update_tip(self):
'''
Returns False if the tip is already at the newest, or True
if an update has taken place.
:return:
'''
clog_obj = self._get_clog(self.tuid_service.hg_url / self.config.hg.branch / 'json-log' / 'tip')
# Get current tip in DB
with self.conn.transaction() as t:
_, newest_known_rev = self.get_tip(t)
# If we are still at the newest, wait for CSET_TIP_WAIT_TIME seconds
# before checking again.
first_clog_entry = clog_obj['changesets'][0]['node'][:12]
if newest_known_rev == first_clog_entry:
return False
csets_to_gather = None
if not newest_known_rev:
Log.note(
"No revisions found in table, adding {{minim}} entries...",
minim=MINIMUM_PERMANENT_CSETS
)
csets_to_gather = MINIMUM_PERMANENT_CSETS
found_newest_known = False
csets_to_add = []
csets_found = 0
clogs_seen = 0
Log.note("Found new revisions. Updating csetLog tip to {{rev}}...", rev=first_clog_entry)
while not found_newest_known and clogs_seen < MAX_TIPFILL_CLOGS:
clog_csets_list = list(clog_obj['changesets'])
for clog_cset in clog_csets_list[:-1]:
nodes_cset = clog_cset['node'][:12]
if not csets_to_gather:
if nodes_cset == newest_known_rev:
found_newest_known = True
break
else:
if csets_found >= csets_to_gather:
found_newest_known = True
break
csets_found += 1
csets_to_add.append(nodes_cset)
if not found_newest_known:
# Get the next page
clogs_seen += 1
final_rev = clog_csets_list[-1]['node'][:12]
clog_url = self.tuid_service.hg_url / self.config.hg.branch / 'json-log' / final_rev
clog_obj = self._get_clog(clog_url)
if clogs_seen >= MAX_TIPFILL_CLOGS:
Log.error(
"Too many changesets, can't find last tip or the number is too high: {{rev}}. "
"Maximum possible to request is {{maxnum}}",
rev=coalesce(newest_known_rev, csets_to_gather),
maxnum=MAX_TIPFILL_CLOGS * CHANGESETS_PER_CLOG
)
return False
with self.working_locker:
Log.note("Adding {{csets}}", csets=csets_to_add)
self.add_cset_entries(csets_to_add, timestamp=False)
return True
def fill_forward_continuous(self, please_stop=None):
while not please_stop:
try:
waiting_a_bit = False
if self.disable_tipfilling:
waiting_a_bit = True
if not waiting_a_bit:
# If an update was done, check if there are
# more changesets that have arrived just in case,
# otherwise, we wait.
did_an_update = self.update_tip()
if not did_an_update:
waiting_a_bit = True
if waiting_a_bit:
(please_stop | Till(seconds=CSET_TIP_WAIT_TIME)).wait()
continue
except Exception as e:
Log.warning("Unknown error occurred during tip maintenance:", cause=e)
def csetLog_maintenance(self, please_stop=None):
'''
Handles deleting old csetLog entries and timestamping
revisions once they pass the length for permanent
storage for deletion later.
:param please_stop:
:return:
'''
while not please_stop:
try:
# Wait until something signals the maintenance cycle
# to begin (or end).
(self.maintenance_signal | please_stop).wait()
if please_stop:
break
if self.disable_maintenance:
continue
# Reset signal so we don't request
# maintenance infinitely.
with self.maintenance_signal.lock:
self.maintenance_signal._go = False
with self.working_locker:
all_data = None
with self.conn.transaction() as t:
all_data = sorted(
t.get("SELECT revnum, revision, timestamp FROM csetLog"),
key=lambda x: int(x[0])
)
# Restore maximum permanents (if overflowing)
new_data = []
modified = False
for count, (revnum, revision, timestamp) in enumerate(all_data[::-1]):
if count < MINIMUM_PERMANENT_CSETS:
if timestamp != -1:
modified = True
new_data.append((revnum, revision, -1))
else:
new_data.append((revnum, revision, timestamp))
elif type(timestamp) != int or timestamp == -1:
modified = True
new_data.append((revnum, revision, int(time.time())))
else:
new_data.append((revnum, revision, timestamp))
# Delete annotations at revisions with timestamps
# that are too old. The csetLog entries will have
# their timestamps reset here.
new_data1 = []
annrevs_to_del = []
current_time = time.time()
for count, (revnum, revision, timestamp) in enumerate(new_data[::-1]):
new_timestamp = timestamp
if timestamp != -1:
if current_time >= timestamp + TIME_TO_KEEP_ANNOTATIONS.seconds:
modified = True
new_timestamp = current_time
annrevs_to_del.append(revision)
new_data1.append((revnum, revision, new_timestamp))
if len(annrevs_to_del) > 0:
# Delete any latestFileMod and annotation entries
# that are too old.
Log.note(
"Deleting annotations and latestFileMod for revisions for being "
"older than {{oldest}}: {{revisions}}",
oldest=TIME_TO_KEEP_ANNOTATIONS,
revisions=annrevs_to_del
)
with self.conn.transaction() as t:
t.execute(
"DELETE FROM latestFileMod WHERE revision IN " +
quote_set(annrevs_to_del)
)
t.execute(
"DELETE FROM annotations WHERE revision IN " +
quote_set(annrevs_to_del)
)
# Delete any overflowing entries
new_data2 = new_data1
reved_all_data = all_data[::-1]
deleted_data = reved_all_data[MAXIMUM_NONPERMANENT_CSETS:]
delete_overflowing_revstart = None
if len(deleted_data) > 0:
_, delete_overflowing_revstart, _ = deleted_data[0]
new_data2 = set(all_data) - set(deleted_data)
# Update old frontiers if requested, otherwise
# they will all get deleted by the csetLog_deleter
# worker
if UPDATE_VERY_OLD_FRONTIERS:
_, max_revision, _ = all_data[-1]
for _, revision, _ in deleted_data:
with self.conn.transaction() as t:
old_files = t.get(
"SELECT file FROM latestFileMod WHERE revision=?",
(revision,)
)
if old_files is None or len(old_files) <= 0:
continue
self.tuid_service.get_tuids_from_files(
old_files,
max_revision,
going_forward=True,
)
still_exist = True
while still_exist and not please_stop:
Till(seconds=TUID_EXISTENCE_WAIT_TIME).wait()
with self.conn.transaction() as t:
old_files = t.get(
"SELECT file FROM latestFileMod WHERE revision=?",
(revision,)
)
if old_files is None or len(old_files) <= 0:
still_exist = False
# Update table and schedule a deletion
if modified:
with self.conn.transaction() as t:
t.execute(
"INSERT OR REPLACE INTO csetLog (revnum, revision, timestamp) VALUES " +
sql_list(
quote_set(cset_entry)
for cset_entry in new_data2
)
)
if not deleted_data:
continue
Log.note("Scheduling {{num_csets}} for deletion", num_csets=len(deleted_data))
self.deletions_todo.add(delete_overflowing_revstart)
except Exception as e:
Log.warning("Unexpected error occured while maintaining csetLog, continuing to try: ", cause=e)
return
def csetLog_deleter(self, please_stop=None):
'''
Deletes changesets from the csetLog table
and also changesets from the annotation table
that have revisions matching the given changesets.
Accepts lists of csets from self.deletions_todo.
:param please_stop:
:return:
'''
while not please_stop:
try:
request = self.deletions_todo.pop(till=please_stop)
if please_stop:
break
# If deletion is disabled, ignore the current
# request - it will need to be re-requested.
if self.disable_deletion:
Till(till=CSET_DELETION_WAIT_TIME).wait()
continue
with self.working_locker:
first_cset = request
# Since we are deleting and moving stuff around in the
# TUID tables, we need everything to be contained in
# one transaction with no interruptions.
with self.conn.transaction() as t:
revnum = self._get_one_revnum(t, first_cset)[0]
csets_to_del = t.get(
"SELECT revnum, revision FROM csetLog WHERE revnum <= ?", (revnum,)
)
csets_to_del = [cset for _, cset in csets_to_del]
existing_frontiers = t.query(
"SELECT revision FROM latestFileMod WHERE revision IN " +
quote_set(csets_to_del)
).data
existing_frontiers = [existing_frontiers[i][0] for i, _ in enumerate(existing_frontiers)]
Log.note(
"Deleting all annotations and changeset log entries with revisions in the list: {{csets}}",
csets=csets_to_del
)
if len(existing_frontiers) > 0:
# This handles files which no longer exist anymore in
# the main branch.
Log.note(
"Deleting existing frontiers for revisions: {{revisions}}",
revisions=existing_frontiers
)
t.execute(
"DELETE FROM latestFileMod WHERE revision IN " +
quote_set(existing_frontiers)
)
Log.note("Deleting annotations...")
t.execute(
"DELETE FROM annotations WHERE revision IN " +
quote_set(csets_to_del)
)
Log.note(
"Deleting {{num_entries}} csetLog entries...",
num_entries=len(csets_to_del)
)
t.execute(
"DELETE FROM csetLog WHERE revision IN " +
quote_set(csets_to_del)
)
# Recalculate the revnums
self.recompute_table_revnums()
except Exception as e:
Log.warning("Unexpected error occured while deleting from csetLog:", cause=e)
Till(seconds=CSET_DELETION_WAIT_TIME).wait()
return
def get_old_cset_revnum(self, revision):
self.csets_todo_backwards.add((revision, True))
revnum = None
timeout = Till(seconds=BACKFILL_REVNUM_TIMEOUT)
while not timeout:
with self.conn.transaction() as t:
revnum = self._get_one_revnum(t, revision)
if revnum and revnum[0] >= 0:
break
elif revnum[0] < 0:
Log.note("Waiting for table to recompute...")
else:
Log.note("Waiting for backfill to complete...")
Till(seconds=CSET_BACKFILL_WAIT_TIME).wait()
if timeout:
Log.error(
"Cannot find revision {{rev}} after waiting {{timeout}} seconds",
rev=revision,
timeout=BACKFILL_REVNUM_TIMEOUT
)
return revnum
def get_revnnums_from_range(self, revision1, revision2):
with self.conn.transaction() as t:
revnum1 = self._get_one_revnum(t, revision1)
revnum2 = self._get_one_revnum(t, revision2)
if not revnum1 or not revnum2:
did_an_update = self.update_tip()
if did_an_update:
with self.conn.transaction() as t:
revnum1 = self._get_one_revnum(t, revision1)
revnum2 = self._get_one_revnum(t, revision2)
if not revnum1:
revnum1 = self.get_old_cset_revnum(revision1)
# Refresh the second entry
with self.conn.transaction() as t:
revnum2 = self._get_one_revnum(t, revision2)
if not revnum2:
revnum2 = self.get_old_cset_revnum(revision2)
# The first revnum might change also
with self.conn.transaction() as t:
revnum1 = self._get_one_revnum(t, revision1)
with self.conn.transaction() as t:
result = self._get_revnum_range(t, revnum1[0], revnum2[0])
return sorted(
result,
key=lambda x: int(x[0])
)

50
vendor/tuid/pclogger.py поставляемый Normal file
Просмотреть файл

@ -0,0 +1,50 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
from mo_logs import Log
from mo_threads import Till, Lock, Thread
from mo_times.durations import MINUTE
DAEMON_WAIT_FOR_PC = 5 * MINUTE # Time until a percent complete log message is emitted.
class PercentCompleteLogger:
def __init__(self):
self.total_locker = Lock()
self.total_files_requested = 0
self.total_tuids_mapped = 0
Thread.run("pc-daemon", self.run_daemon)
def update_totals(self, num_files_req, num_tuids_mapped):
with self.total_locker:
self.total_files_requested += num_files_req
self.total_tuids_mapped += num_tuids_mapped
def reset_totals(self):
with self.total_locker:
self.total_files_requested = 0
self.total_tuids_mapped = 0
def run_daemon(self, please_stop=None):
while not please_stop:
try:
with self.total_locker:
requested = self.total_files_requested
if requested != 0:
mapped = self.total_tuids_mapped
Log.note(
"Percent complete {{mapped}}/{{requested}} = {{percent|percent(0)}}",
requested=requested,
mapped=mapped,
percent=mapped/requested
)
(Till(seconds=DAEMON_WAIT_FOR_PC.seconds) | please_stop).wait()
except Exception as e:
Log.warning("Unexpected error in pc-daemon: {{cause}}", cause=e)

1428
vendor/tuid/service.py поставляемый

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше