This commit is contained in:
Kyle Lahnakoski 2014-08-06 17:07:08 -04:00
Родитель 748e5bc164
Коммит 142becee34
96 изменённых файлов: 19725 добавлений и 2838 удалений

Просмотреть файл

@ -0,0 +1 @@

Просмотреть файл

@ -10,7 +10,7 @@
"threaded":true,
"processes":1
},
"whitelist":["bugs"],
"whitelist":["public_bugs"],
"debug":{
"log":[{
"filename": "./tests/results/logs/test_slow_server.log",

Просмотреть файл

@ -1,15 +1,12 @@
{
"elasticsearch":[{
"host":"http://elasticsearch4.metrics.scl3.mozilla.com",
"host":"http://elasticsearch1.bugs.scl3.mozilla.com",
"port":9200
},{
"host":"http://elasticsearch5.metrics.scl3.mozilla.com",
"host":"http://elasticsearch2.bugs.scl3.mozilla.com",
"port":9200
},{
"host":"http://elasticsearch7.metrics.scl3.mozilla.com",
"port":9200
},{
"host":"http://elasticsearch8.metrics.scl3.mozilla.com",
"host":"http://elasticsearch3.bugs.scl3.mozilla.com",
"port":9200
}],
"flask":{
@ -19,7 +16,7 @@
"threaded":true,
"processes":1
},
"whitelist":["bugs", "org_chart", "bug_summary", "reviews"],
"whitelist":["public_bugs"],
"debug":{
"log":[{
"filename": "./tests/results/logs/app.log",

Просмотреть файл

@ -1,5 +1,5 @@
from tests_by_bug_id import all_tests
from util.logs import Log
from util.env.logs import Log
url = "http://klahnakoski-es.corp.tor1.mozilla.com:9201"

Просмотреть файл

@ -18,13 +18,15 @@ import requests
from werkzeug.exceptions import abort
from esFrontLine.app import stream
from util.cnv import CNV
from util.logs import Log
from util.env.logs import Log
from util.strings import expand_template
from util.threads import Thread, Signal
from util.thread.threads import Thread, Signal
app = Flask(__name__)
PATH = '/bugs/_mapping'
WHITELISTED = "public_bugs" # ENSURE THIS IS IN THE slow_server_settings.json WHITELIST
PATH = '/'+WHITELISTED+'/_mapping'
SLOW_PORT = 9299
PROXY_PORT = 9298
RATE = 4.0 # per second
@ -37,15 +39,15 @@ server_is_ready = Signal()
def serve_slowly(path):
def octoberfest():
for bb in range(99, 2, -1):
yield ("0"*65535)+"\n" #BUG ENOUGH TO FILL THE INCOMING BUFFER
yield ("0"*65535)+"\n" # ENOUGH TO FILL THE INCOMING BUFFER
Thread.sleep(1.0/RATE)
yield CNV.unicode2utf8(expand_template("{{num}} bottles of beer on the wall! {{num}} bottles of beer! Take one down, pass it around! {{less}} bottles of beer on he wall!\n", {
"num": bb,
"less": bb - 1
}))
yield ("0"*65535)+"\n" #BUG ENOUGH TO FILL THE INCOMING BUFFER
yield ("0"*65535)+"\n" # ENOUGH TO FILL THE INCOMING BUFFER
yield CNV.unicode2utf8(u"2 bottles of beer on the wall! 2 bottles of beer! Take one down, pass it around! 1 bottle of beer on he wall!\n")
yield ("0"*65535)+"\n" #BUG ENOUGH TO FILL THE INCOMING BUFFER
yield ("0"*65535)+"\n" # ENOUGH TO FILL THE INCOMING BUFFER
yield CNV.unicode2utf8(u"1 bottle of beer on the wall! 1 bottle of beer! Take one down, pass it around! 0 bottles of beer on he wall.\n")
try:
@ -101,13 +103,13 @@ def run_proxy(please_stop):
proc.send_signal(signal.CTRL_C_EVENT)
def test_slow_server():
def test_slow_streaming():
"""
TEST THAT THE app ACTUALLY STREAMS. WE SHOULD GET A RESPONSE BEFORE THE SERVER
FINISHES DELIVERING
"""
slow_server_thread = Thread.run("run slow server", run_slow_server)
proxy_thread = Thread.run("run slow server", run_proxy)
proxy_thread = Thread.run("run proxy", run_proxy)
try:
proxy_is_ready.wait_for_go()
@ -118,7 +120,7 @@ def test_slow_server():
for i, data in enumerate(stream(response.raw)):
Log.note("CLIENT GOT RESPONSE:\n{{data|indent}}", {"data": data})
end = time.clock()
if i == 0 and end - start > 5: # IF WE GET DATA BEFORE 5sec, THEN WE KNOW WE ARE STREAMING
if i == 0 and end - start > 10: # IF WE GET DATA BEFORE 10sec, THEN WE KNOW WE ARE STREAMING
Log.error("should have something by now")
if response.status_code != 200:
Log.error("Expecting a positive response")

Просмотреть файл

@ -2,10 +2,12 @@ from _subprocess import CREATE_NEW_PROCESS_GROUP
import subprocess
import requests
import signal
import test_slow_server
from test_slow_server import test_slow_server
from util.logs import Log
from util.threads import Thread, Signal
from util.env.logs import Log
from util.thread.threads import Signal, Thread
from test_slow_server import test_slow_streaming
WHITELISTED = "public_bugs" # ENSURE THIS IS IN THE test_settings.json WHITELIST
NOT_WHITELISTED = "bug_hierarchy"
def test_943465(url):
@ -21,14 +23,14 @@ def test_943465(url):
def test_943472(url):
#https://bugzilla.mozilla.org/show_bug.cgi?id=943472
response = request("GET", url + "/bugs/_stats/")
response = request("GET", url + "/" + WHITELISTED + "/_stats/")
if response.status_code != 400:
Log.error("should not allow")
def test_943478(url):
#https://bugzilla.mozilla.org/show_bug.cgi?id=943478
response = request("POST", url + "/telemetry_agg_valid_201302/_search", data="""
response = request("POST", url + "/" + NOT_WHITELISTED + "/_search", data="""
{
"query":{"filtered":{
"query":{"match_all":{}}
@ -42,7 +44,7 @@ def test_943478(url):
Log.error("should not allow")
# VERIFY ALLOWED INDEX GETS THROUGH
response = request("POST", url + "/bugs/_search", data="""
response = request("POST", url + "/" + WHITELISTED + "/_search", data="""
{
"query":{"filtered":{
"query":{"match_all":{}},
@ -60,11 +62,25 @@ def test_943478(url):
def test_allow_3path_mapping(url):
#WE SHOULD ALLOW -mapping WITH INDEX AND TYPE IN PATH
#http://klahnakoski-es.corp.tor1.mozilla.com:9204/bugs/bug_version/_mapping
response = request("GET", url + "/bugs/bug_version/_mapping")
response = request("GET", url + "/" + WHITELISTED + "/bug_version/_mapping")
if response.status_code != 200:
Log.error("should be allowed")
def test_allow_head_request(url):
#WE SHOULD ALLOW HEAD REQUESTS TO /
response = request("HEAD", url + "/")
if response.status_code != 200:
Log.error("should be allowed")
response = request("HEAD", url)
if response.status_code != 200:
Log.error("should be allowed")
# ENVEN HEAD REQUESTS TO WHITELISTED INDEXES WILL BE DENIED
response = request("HEAD", url + "/" + WHITELISTED + "/bug_version/_mapping")
if response.status_code == 200:
Log.error("should NOT be allowed")
def request(type, url, data=None, **kwargs):
@ -107,11 +123,14 @@ def run_app(please_stop):
def all_tests(url):
# test_943465(url)
# test_943472(url)
# test_943478(url)
# test_allow_3path_mapping(url)
test_slow_server()
test_allow_head_request(url)
test_943465(url)
test_943472(url)
test_943478(url)
test_allow_3path_mapping(url)
test_slow_streaming()
Log.note("ALL TESTS PASS")

Просмотреть файл

@ -0,0 +1 @@
from .maths import stats

Просмотреть файл

@ -1,6 +1,5 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
@ -8,17 +7,27 @@
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
import StringIO
import base64
import datetime
import json
import re
import time
from .multiset import Multiset
from .jsons import json_decoder, json_encoder
from .logs import Log
import struct
from .strings import expand_template, indent
from .struct import StructList, Null
from urllib import urlencode
from . import struct
from . import jsons
from .jsons import json_encoder
from .collections.multiset import Multiset
from .env.profiles import Profiler
from .env.logs import Log
from .strings import expand_template
from .structs.wraps import wrap, wrap_dot
json_decoder = json.JSONDecoder().decode
class CNV:
@ -29,71 +38,97 @@ class CNV:
@staticmethod
def object2JSON(obj, pretty=False):
try:
return json_encoder.encode(obj, pretty=pretty)
json = json_encoder(obj, pretty=pretty)
if json == None:
Log.note(str(type(obj)) + " is not valid{{type}}JSON", {"type": " (pretty) " if pretty else " "})
Log.error("Not valid JSON: " + str(obj) + " of type " + str(type(obj)))
return json
except Exception, e:
Log.error("Can not encode into JSON: {{value}}", {"value": repr(obj)}, e)
@staticmethod
def JSON2object(json_string, params=None, flexible=False):
try:
#REMOVE """COMMENTS""", #COMMENTS, //COMMENTS, AND \n \r
if flexible: json_string = re.sub(r"\"\"\".*?\"\"\"|\s+//.*\n|#.*?\n|\n|\r", r" ",
json_string) #DERIVED FROM https://github.com/jeads/datasource/blob/master/datasource/bases/BaseHub.py#L58
def JSON2object(json_string, params=None, flexible=False, paths=False):
with Profiler("JSON2Object"):
try:
#REMOVE """COMMENTS""", #COMMENTS, //COMMENTS, AND \n \r
if flexible:
#DERIVED FROM https://github.com/jeads/datasource/blob/master/datasource/bases/BaseHub.py#L58
json_string = re.sub(r"\"\"\".*?\"\"\"|[ \t]+//.*\n|^//.*\n|#.*?\n", r"\n", json_string)
json_string = re.sub(r"\n//.*\n", r"\n\n", json_string)
if params:
params = dict([(k, CNV.value2quote(v)) for k, v in params.items()])
json_string = expand_template(json_string, params)
if isinstance(json_string, str):
Log.error("only unicode json accepted")
if params:
params = dict([(k, CNV.value2quote(v)) for k, v in params.items()])
json_string = expand_template(json_string, params)
value = wrap(json_decoder(json_string))
obj = json_decoder.decode(json_string)
if isinstance(obj, list): return StructList(obj)
return struct.wrap(obj)
except Exception, e:
Log.error("Can not decode JSON:\n\t" + json_string, e)
if paths:
value = wrap_dot(value)
return value
except Exception, e:
Log.error("Can not decode JSON:\n\t" + str(json_string), e)
@staticmethod
def string2datetime(value, format):
## http://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
if value == None:
return None
try:
return datetime.datetime.strptime(value, format)
except Exception, e:
Log.error("Can not format {{value}} with {{format}}", {"value": value, "format": format}, e)
@staticmethod
def datetime2string(value, format):
def datetime2string(value, format="%Y-%m-%d %H:%M:%S"):
try:
return value.strftime(format)
except Exception, e:
Log.error("Can not format {{value}} with {{format}}", {"value": value, "format": format}, e)
@staticmethod
def datetime2unix(d):
if d == None:
return None
return long(time.mktime(d.timetuple()))
@staticmethod
def datetime2milli(d):
try:
epoch = datetime.datetime(1970, 1, 1)
if d == None:
return None
elif isinstance(d, datetime.datetime):
epoch = datetime.datetime(1970, 1, 1)
elif isinstance(d, datetime.date):
epoch = datetime.date(1970, 1, 1)
else:
Log.error("Can not convert {{value}} of type {{type}}", {"value": d, "type": d.__class__})
diff = d - epoch
return (diff.days * 86400000) + \
(diff.seconds * 1000) + \
(diff.microseconds / 1000) # 86400000=24*3600*1000
return long(diff.total_seconds()) * 1000L + long(diff.microseconds / 1000)
except Exception, e:
Log.error("Can not convert {{value}}", {"value": d})
Log.error("Can not convert {{value}}", {"value": d}, e)
@staticmethod
def timedelta2milli(v):
return v.total_seconds()
@staticmethod
def unix2datetime(u):
return datetime.datetime.utcfromtimestamp(u)
try:
if u == None:
return None
if u == 9999999999: # PYPY BUG https://bugs.pypy.org/issue1697
return datetime.datetime(2286, 11, 20, 17, 46, 39)
return datetime.datetime.utcfromtimestamp(u)
except Exception, e:
Log.error("Can not convert {{value}} to datetime", {"value": u}, e)
@staticmethod
def milli2datetime(u):
return datetime.datetime.utcfromtimestamp(u / 1000)
return CNV.unix2datetime(u / 1000.0)
@staticmethod
def dict2Multiset(dic):
@ -113,14 +148,25 @@ class CNV:
return None
return dict(value.dic)
@staticmethod
def table2list(
column_names, #tuple of columns names
rows #list of tuples
):
return StructList([dict(zip(column_names, r)) for r in rows])
return wrap([dict(zip(column_names, r)) for r in rows])
@staticmethod
def list2tab(rows):
columns = set()
for r in rows:
columns |= set(r.keys())
keys = list(columns)
output = []
for r in rows:
output.append("\t".join(CNV.object2JSON(r[k]) for k in keys))
return "\t".join(keys) + "\n" + "\n".join(output)
#PROPER NULL HANDLING
@staticmethod
@ -140,8 +186,18 @@ class CNV:
@staticmethod
def string2quote(value):
# return repr(value)
return "\"" + value.replace("\\", "\\\\").replace("\"", "\\\"") + "\""
return jsons.quote(value)
@staticmethod
def value2url(value):
return urlencode(value)
@staticmethod
def quote2string(value):
if value[0] == "\"" and value[-1] == "\"":
value = value[1:-1]
return value.replace("\\\\", "\\").replace("\\\"", "\"").replace("\\'", "'").replace("\\\n", "\n").replace("\\\t", "\t")
#RETURN PYTHON CODE FOR THE SAME
@staticmethod
@ -170,11 +226,26 @@ class CNV:
def latin12hex(value):
return value.encode("hex")
@staticmethod
def int2hex(value, size):
return (("0" * size) + hex(value)[2:])[-size:]
@staticmethod
def hex2bytearray(value):
return bytearray(value.decode("hex"))
@staticmethod
def bytearray2hex(value):
return value.decode("latin1").encode("hex")
@staticmethod
def base642bytearray(value):
return bytearray(base64.b64decode(value))
@staticmethod
def bytearray2base64(value):
return base64.b64encode(value)
@staticmethod
def value2intlist(value):
if value == None:
@ -187,7 +258,6 @@ class CNV:
else:
return [int(value)]
@staticmethod
def value2int(value):
if value == None:
@ -195,7 +265,6 @@ class CNV:
else:
return int(value)
@staticmethod
def value2number(v):
try:
@ -219,79 +288,126 @@ class CNV:
@staticmethod
def latin12unicode(value):
return unicode(value.decode('iso-8859-1'))
if isinstance(value, unicode):
Log.error("can not convert unicode from latin1")
try:
return unicode(value.decode('iso-8859-1'))
except Exception, e:
Log.error("Can not convert {{value|quote}} to unicode", {"value": value})
@staticmethod
def esfilter2where(esfilter):
"""
CONVERT esfilter TO FUNCTION THAT WILL PERFORM THE FILTER
WILL ADD row, rownum, AND rows AS CONTEXT VARIABLES FOR {"script":} IF NEEDED
"""
output = None
condition = CNV._esfilter2where(esfilter)
exec \
"def result(row, rownum, rows):\n" + \
" if " + condition + ":\n" + \
" return True\n" + \
" return False" + \
"output = result"
def output(row, rownum=None, rows=None):
return _filter(esfilter, row, rownum, rows)
return output
@staticmethod
def _esfilter2where(esfilter):
def isolate(separator, list):
if len(list) > 1:
return u"(\n" + indent((" " + separator + " \\\n").join(list)) + u"\n)"
else:
return list[0]
def pipe2value(value):
type = value[0]
if type == '0':
return None
if type == 'n':
return CNV.value2number(value[1::])
esfilter = struct.wrap(esfilter)
if type != 's' and type != 'a':
Log.error("unknown pipe type ({{type}}) in {{value}}", {"type": type, "value": value})
if esfilter[u"and"]:
return isolate(u"and", [CNV._esfilter2where(a) for a in esfilter[u"and"]])
elif esfilter[u"or"]:
return isolate(u"or", [CNV._esfilter2where(a) for a in esfilter[u"or"]])
elif esfilter[u"not"]:
return u"not (" + CNV._esfilter2where(esfilter[u"not"]) + u")"
elif esfilter.term:
return isolate(u"and", [u"row." + col + u" == " + CNV.value2quote(val) for col, val in esfilter.term.items()])
elif esfilter.terms:
def single(col, vals):
has_null = False
for val in vals:
if val == None:
has_null = True
break
if has_null:
return u"(row." + col + u" == None or row." + col + u" in " + CNV.value2quote(v for v in vals if v != None)
else:
return u"row." + col + u" in " + CNV.value2quote(vals)
return isolate(u"and", [single(col, vals) for col, vals in esfilter.terms])
elif esfilter.script:
return u"(" + esfilter.script + u")"
elif esfilter.range:
name2sign = {
u"gt": u">",
u"gte": u">=",
u"lte": u"<=",
u"lt": u"<"
}
def single(col, ranges):
return u" and ".join(u"row." + col + name2sign[sign] + CNV.value2quote(value) for sign, value in ranges.items())
output = isolate(u"and", [single(col, ranges) for col, ranges in esfilter.range.items()])
# EXPECTING MOST STRINGS TO NOT HAVE ESCAPED CHARS
output = unPipe(value)
if type == 's':
return output
elif esfilter.missing:
if isinstance(esfilter.missing, basestring):
return esfilter.missing + u" == None"
else:
return esfilter.missing.field + u" == None"
elif esfilter.exists:
if isinstance(esfilter.exists, basestring):
return esfilter.exists + u" != None"
else:
return esfilter.exists.field + u" != None"
return [CNV.pipe2value(v) for v in output.split("|")]
def unPipe(value):
s = value.find("\\", 1)
if s < 0:
return value[1::]
result = ""
e = 1
while True:
c = value[s + 1]
if c == 'p':
result = result + value[e:s] + '|'
s += 2
e = s
elif c == '\\':
result = result + value[e:s] + '\\'
s += 2
e = s
else:
Log.error(u"Can not convert esfilter to SQL: {{esfilter}}", {u"esfilter": esfilter})
s += 1
s = value.find("\\", s)
if s < 0:
break
return result + value[e::]
def _filter(esfilter, row, rownum, rows):
esfilter = wrap(esfilter)
if esfilter[u"and"]:
for a in esfilter[u"and"]:
if not _filter(a, row, rownum, rows):
return False
return True
elif esfilter[u"or"]:
for a in esfilter[u"and"]:
if _filter(a, row, rownum, rows):
return True
return False
elif esfilter[u"not"]:
return not _filter(esfilter[u"not"], row, rownum, rows)
elif esfilter.term:
for col, val in esfilter.term.items():
if row[col] != val:
return False
return True
elif esfilter.terms:
for col, vals in esfilter.terms.items():
if not row[col] in vals:
return False
return True
elif esfilter.range:
for col, ranges in esfilter.range.items():
for sign, val in ranges.items():
if sign in ("gt", ">") and row[col] <= val:
return False
if sign == "gte" and row[col] < val:
return False
if sign == "lte" and row[col] > val:
return False
if sign == "lt" and row[col] >= val:
return False
return True
elif esfilter.missing:
if isinstance(esfilter.missing, basestring):
field = esfilter.missing
else:
field = esfilter.missing.field
if row[field] == None:
return True
return False
elif esfilter.exists:
if isinstance(esfilter.missing, basestring):
field = esfilter.missing
else:
field = esfilter.missing.field
if row[field] != None:
return True
return False
else:
Log.error(u"Can not convert esfilter to SQL: {{esfilter}}", {u"esfilter": esfilter})

Просмотреть файл

@ -0,0 +1,143 @@
import types
import math
from ..collections.multiset import Multiset
from ..struct import Null
__author__ = 'klahnakoski'
def reverse(values):
"""
REVERSE - WITH NO SIDE EFFECTS!
"""
output = list(values)
output.reverse()
return output
def MIN(*values):
if isinstance(values, tuple) and len(values) == 1 and isinstance(values[0], (list, set, tuple, Multiset, types.GeneratorType)):
values = values[0]
output = Null
for v in values:
if v == None:
continue
if isinstance(v, float) and math.isnan(v):
continue
if output == None:
output = v
continue
output = min(output, v)
return output
def MAX(*values):
if isinstance(values, tuple) and len(values) == 1 and isinstance(values[0], (list, set, tuple, Multiset, types.GeneratorType)):
values = values[0]
output = Null
for v in values:
if v == None:
continue
if isinstance(v, float) and math.isnan(v):
continue
if output == None:
output = v
continue
output = max(output, v)
return output
def PRODUCT(*values):
if isinstance(values, tuple) and len(values) == 1 and isinstance(values[0], (list, set, tuple, Multiset, types.GeneratorType)):
values = values[0]
output = Null
for v in values:
if v == None:
continue
if isinstance(v, float) and math.isnan(v):
continue
if output == None:
output = v
continue
output *= v
return output
def SUM(*values):
if isinstance(values, tuple) and len(values) == 1 and isinstance(values[0], (list, set, tuple, Multiset, types.GeneratorType)):
values = values[0]
output = Null
for v in values:
if v == None:
continue
if isinstance(v, float) and math.isnan(v):
continue
if output == None:
output = v
continue
output += v
return output
def COUNT(*values):
if isinstance(values, tuple) and len(values) == 1 and isinstance(values[0], (list, set, tuple, Multiset, types.GeneratorType)):
values = values[0]
output = 0
for v in values:
if v != None:
output += 1
return output
def SUM(*values):
if isinstance(values, tuple) and len(values) == 1 and isinstance(values[0], (list, set, tuple, Multiset, types.GeneratorType)):
values = values[0]
output = Null
for v in values:
if v == None:
continue
if isinstance(v, float) and math.isnan(v):
continue
if output == None:
output = v
continue
output += v
return output
def AND(*values):
if isinstance(values, tuple) and len(values) == 1 and isinstance(values[0], (list, set, tuple, Multiset, types.GeneratorType)):
values = values[0]
for v in values:
if v == None:
continue
if not v:
return False
return True
def OR(*values):
if isinstance(values, tuple) and len(values) == 1 and isinstance(values[0], (list, set, tuple, Multiset, types.GeneratorType)):
values = values[0]
for v in values:
if v == None:
continue
if v:
return True
return False
def UNION(*values):
if isinstance(values, tuple) and len(values) == 1 and isinstance(values[0], (list, set, tuple, Multiset, types.GeneratorType)):
values = values[0]
output = set()
for v in values:
if values == None:
continue
if isinstance(v, (list, set)):
output.update(v)
continue
else:
output.add(v)
return output

Просмотреть файл

@ -0,0 +1,276 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
from ..collections import PRODUCT, reverse, MAX, MIN
from ..cnv import CNV
from ..env.logs import Log
from ..struct import Null, Struct
from ..structs.wraps import wrap
class Matrix(object):
"""
SIMPLE n-DIMENSIONAL ARRAY OF OBJECTS
"""
ZERO = None
def __init__(self, *dims, **kwargs):
kwargs = wrap(kwargs)
list = kwargs.list
if list:
self.num = 1
self.dims = (len(list), )
self.cube = list
return
value = kwargs.value
if value != None:
self.num = 0
self.dims = tuple()
self.cube = value
return
self.num = len(dims)
self.dims = tuple(dims)
if self.num == 0:
self.cube = Null
else:
self.cube = _null(*dims)
@staticmethod
def wrap(array):
output = Matrix()
output.num = 1
output.dims = (len(array), )
output.cube = array
return output
def __getitem__(self, index):
if isinstance(index, list):
m = self.cube
for k in index:
m = m[k]
return m
if isinstance(index, slice):
pass
def __setitem__(self, key, value):
try:
if len(key) != self.num:
Log.error("Expecting coordinates to match the number of dimensions")
last = self.num - 1
m = self.cube
for k in key[0:last:]:
m = m[k]
m[key[last]] = value
except Exception, e:
Log.error("can not set item", e)
def __bool__(self):
return self.cube != None
def __nonzero__(self):
return self.cube != None
def __len__(self):
if self.num == 0:
return 0
return PRODUCT(self.dims)
@property
def value(self):
if self.num:
Log.error("can not get value of with dimension")
return self.cube
def __lt__(self, other):
return self.value < other
def __gt__(self, other):
return self.value > other
def __eq__(self, other):
if other == None:
return False
return self.value == other
def __add__(self, other):
return self.value + other
def __radd__(self, other):
return other + self.value
def __sub__(self, other):
return self.value - other
def __rsub__(self, other):
return other - self.value
def __mul__(self, other):
return self.value * other
def __rmul__(self, other):
return other * self.value
def __div__(self, other):
return self.value / other
def __rdiv__(self, other):
return other / self.value
def __iter__(self):
return (self[c] for c in self._all_combos())
def __float__(self):
return self.value
def groupby(self, io_select):
"""
SLICE THIS MATRIX INTO ONES WITH LESS DIMENSIONALITY
"""
#offsets WILL SERVE TO MASK DIMS WE ARE NOT GROUPING BY, AND SERVE AS RELATIVE INDEX FOR EACH COORDINATE
offsets = []
new_dim = []
acc = 1
for i, d in reverse(enumerate(self.dims)):
if not io_select[i]:
new_dim.insert(0, d)
offsets.insert(0, acc * io_select[i])
acc *= d
if not new_dim:
# WHEN groupby ALL DIMENSIONS, ONLY THE VALUES REMAIN
# RETURN AN ITERATOR OF PAIRS (c, v), WHERE
# c - COORDINATES INTO THE CUBE
# v - VALUE AT GIVEN COORDINATES
return ((c, self[c]) for c in self._all_combos())
else:
output = [[None, Matrix(*new_dim)] for i in range(acc)]
_groupby(self.cube, 0, offsets, 0, output, tuple(), [])
return output
def aggregate(self, type):
func = aggregates[type]
if not type:
Log.error("Aggregate of type {{type}} is not supported yet", {"type": type})
return func(self.num, self.cube)
def forall(self, method):
"""
IT IS EXPECTED THE method ACCEPTS (value, coord, cube), WHERE
value - VALUE FOUND AT ELEMENT
coord - THE COORDINATES OF THE ELEMENT (PLEASE, READ ONLY)
cube - THE WHOLE CUBE, FOR USE IN WINDOW FUNCTIONS
"""
for c in self._all_combos():
method(self[c], c, self.cube)
def _all_combos(self):
"""
RETURN AN ITERATOR OF ALL COORDINATES
"""
num = self.num
dim = self.dims
combos = PRODUCT(dim)
if not combos:
return
c = [0]*num # THE CORRECT SIZE
while True:
yield c
for i in range(num-1, -1, -1):
c[i] += 1
if c[i] < dim[i]:
break
c[i] = 0
else:
break
def __str__(self):
return "Matrix " + CNV.object2JSON(self.dims) + ": " + str(self.cube)
def __json__(self):
return CNV.object2JSON(self.cube)
Matrix.ZERO = Matrix(value=None)
def _max(depth, cube):
if depth == 0:
return cube
elif depth == 1:
return MAX(cube)
else:
return MAX(_max(depth - 1, c) for c in cube)
def _min(depth, cube):
if depth == 0:
return cube
elif depth == 1:
return MIN(cube)
else:
return MIN(_min(depth - 1, c) for c in cube)
aggregates = Struct(
max=_max,
maximum=_max,
min=_min,
minimum=_min
)
def _iter(cube, depth):
if depth == 1:
return cube.__iter__()
else:
def iterator():
for c in cube:
for b in _iter(c, depth - 1):
yield b
return iterator()
def _null(*dims):
d0 = dims[0]
if d0 == 0:
Log.error("Zero dimensions not allowed")
if len(dims) == 1:
return [Null for i in range(d0)]
else:
return [_null(*dims[1::]) for i in range(d0)]
def _groupby(cube, depth, intervals, offset, output, group, new_coord):
if depth == len(intervals):
output[offset][0] = group
output[offset][1][new_coord] = cube
return
interval = intervals[depth]
if interval:
for i, c in enumerate(cube):
_groupby(c, depth + 1, intervals, offset + i * interval, output, group + ( i, ), new_coord)
else:
for i, c in enumerate(cube):
_groupby(c, depth + 1, intervals, offset, output, group + (-1, ), new_coord + [i])

Просмотреть файл

@ -8,17 +8,42 @@
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
def Multiset(list=None, key_field=None, count_field=None, allow_negative=False):
if allow_negative:
return _NegMultiset(list, key_field, count_field)
else:
return _Multiset(list, key_field, count_field)
from __future__ import unicode_literals
class _Multiset(object):
class Multiset(object):
"""
Multiset IS ONE MEMBER IN A FAMILY OF USEFUL CONTAINERS
def __init__(self, list=None, key_field=None, count_field=None):
+------------+---------+----------+
| Uniqueness | Ordered | Type |
+------------+---------+----------+
| Yes | Yes | Queue |
| Yes | No | Set |
| No | Yes | List |
| No | No | Multiset |
+------------+---------+----------+
"""
def __new__(cls, list=None, key_field=None, count_field=None, allow_negative=False):
try:
if allow_negative:
return _NegMultiset(list, key_field, count_field)
else:
return _Multiset(list, key_field, count_field)
except Exception, e:
from ..env.logs import Log
Log.error("Not expected", e)
class _Multiset(Multiset):
def __new__(cls, *args):
return object.__new__(cls, *args)
def __init__(self, list=None, key_field=None, count_field=None, **kwargs):
if not key_field and not count_field:
self.dic = dict()
if list:
@ -26,8 +51,8 @@ class _Multiset(object):
self.add(i)
return
else:
self.dic={i[key_field]:i[count_field] for i in list}
self.dic = {i[key_field]: i[count_field] for i in list}
def __iter__(self):
for k, m in self.dic.items():
@ -43,9 +68,9 @@ class _Multiset(object):
def add(self, value):
if value in self.dic:
self.dic[value]+=1
self.dic[value] += 1
else:
self.dic[value]=1
self.dic[value] = 1
return self
def extend(self, values):
@ -54,37 +79,43 @@ class _Multiset(object):
def remove(self, value):
if value not in self.dic:
from .logs import Log
Log.error("{{value}} is not in multiset", {"value":value})
from ..env.logs import Log
Log.error("{{value}} is not in multiset", {"value": value})
self._remove(value)
def copy(self):
output = _Multiset()
output.dic=self.dic.copy()
output.dic = self.dic.copy()
return output
def _remove(self, value):
count=self.dic.get(value, None)
count = self.dic.get(value, None)
if count == None:
return
count-=1
if count==0:
del(self.dic[value])
count -= 1
if count == 0:
del (self.dic[value])
else:
self.dic[value]=count
self.dic[value] = count
def __sub__(self, other):
output=self.copy()
output = self.copy()
for o in other:
output._remove(o)
return output
def __add__(self, other):
output=self.copy()
for o in other:
output.add(o)
output = self.copy()
if isinstance(other, Multiset):
for k, c in other.dic.items():
output.dic[k] = output.dic.get(k, 0) + c
else:
for o in other:
output.add(o)
return output
def __set__(self, other):
@ -106,10 +137,11 @@ class _Multiset(object):
return 0
class _NegMultiset(Multiset):
def __new__(cls, *args, **kwargs):
return object.__new__(cls, *args, **kwargs)
class _NegMultiset(object):
def __init__(self, list=None, key_field=None, count_field=None):
def __init__(self, list=None, key_field=None, count_field=None, **kwargs):
if not key_field and not count_field:
self.dic = dict()
if list:
@ -117,7 +149,7 @@ class _NegMultiset(object):
self.add(i)
return
else:
self.dic={i[key_field]:i[count_field] for i in list}
self.dic = {i[key_field]: i[count_field] for i in list}
# def __iter__(self):
@ -158,12 +190,12 @@ class _NegMultiset(object):
def copy(self):
output = _NegMultiset()
output.dic=self.dic.copy()
output.dic = self.dic.copy()
return output
def __add__(self, other):
output=self.copy()
output = self.copy()
if isinstance(other, _NegMultiset):
for k, c in other.dic.items():
@ -178,7 +210,7 @@ class _NegMultiset(object):
if not other:
return self
output=self.copy()
output = self.copy()
for o in other:
output.remove(o)
return output

Просмотреть файл

@ -0,0 +1,52 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
class Queue(object):
"""
A SET WITH ADDED ORDER MAINTAINED
+------------+---------+----------+
| Uniqueness | Ordered | Type |
+------------+---------+----------+
| Yes | Yes | Queue |
| Yes | No | Set |
| No | Yes | List |
| No | No | Multiset |
+------------+---------+----------+
"""
def __init__(self):
self.list = []
def __nonzero__(self):
return len(self.list) > 0
def __len__(self):
return self.list.__len__()
def add(self, value):
if value in self.list:
return self
self.list.append(value)
def extend(self, values):
for v in values:
self.add(v)
def pop(self):
if len(self.list) == 0:
return None
output = self.list.pop(0)
return output

Просмотреть файл

@ -0,0 +1,97 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
from ..collections import SUM
class Relation_usingList(object):
def __init__(self):
self.all=set()
def len(self):
return len(self.all)
def add(self, key, value):
test = (key, value)
if test not in self.all:
self.all.add(test)
def testAndAdd(self, key, value):
"""
RETURN TRUE IF THIS RELATION IS NET-NEW
"""
test = (key, value)
if test not in self.all:
self.all.add(test)
return True
return False
def extend(self, key, values):
for v in values:
self.add(key, v)
def __getitem__(self, key):
"""
RETURN AN ARRAY OF OBJECTS THAT key MAPS TO
"""
return [v for k, v in self.all if k == key]
class Relation(object):
def __init__(self):
self.map = dict()
def len(self):
return SUM(len(v) for k, v in self.map.items())
def add(self, key, value):
to = self.map.get(key, None)
if to is None:
to = set()
self.map[key] = to
to.add(value)
def testAndAdd(self, key, value):
"""
RETURN TRUE IF THIS RELATION IS NET-NEW
"""
to = self.map.get(key, None)
if to is None:
to = set()
self.map[key] = to
to.add(value)
return True
if value in to:
return False
to.add(value)
return True
def extend(self, key, values):
to = self.map.get(key, None)
if not to:
to = set(values)
self.map[key] = to
return
to.update(values)
def __getitem__(self, key):
"""
RETURN AN ARRAY OF OBJECTS THAT key MAPS TO
"""
o = self.map.get(key, None)
if not o:
return set()
return o
def domain(self):
return self.map.keys()

Просмотреть файл

@ -1,344 +0,0 @@
# encoding: utf-8
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from datetime import datetime
import re
import sha
import time
import requests
from .threads import ThreadedQueue
import struct
from .maths import Math
from .queries import Q
from .cnv import CNV
from .logs import Log
from .struct import nvl, Null
from .struct import Struct, StructList
DEBUG = False
class ElasticSearch(object):
def __init__(self, settings):
assert settings.host
assert settings.index
assert settings.type
if settings.index == settings.alias:
Log.error("must have a unique index name")
self.metadata = None
if not settings.port:
settings.port = 9200
self.debug = nvl(settings.debug, DEBUG)
globals()["DEBUG"] = DEBUG or self.debug
self.settings = settings
self.path = settings.host + ":" + unicode(settings.port) + "/" + settings.index + "/" + settings.type
@staticmethod
def create_index(settings, schema):
if isinstance(schema, basestring):
schema = CNV.JSON2object(schema)
ElasticSearch.post(
settings.host + ":" + unicode(settings.port) + "/" + settings.index,
data=CNV.object2JSON(schema),
headers={"Content-Type": "application/json"}
)
time.sleep(2)
es = ElasticSearch(settings)
return es
@staticmethod
def delete_index(settings, index=None):
index = nvl(index, settings.index)
ElasticSearch.delete(
settings.host + ":" + unicode(settings.port) + "/" + index,
)
def get_aliases(self):
"""
RETURN LIST OF {"alias":a, "index":i} PAIRS
ALL INDEXES INCLUDED, EVEN IF NO ALIAS {"alias":Null}
"""
data = self.get_metadata().indices
output = []
for index, desc in data.items():
if not desc["aliases"]:
output.append({"index": index, "alias": None})
else:
for a in desc["aliases"]:
output.append({"index": index, "alias": a})
return struct.wrap(output)
def get_metadata(self):
if not self.metadata:
response = self.get(self.settings.host + ":" + unicode(self.settings.port) + "/_cluster/state")
self.metadata = response.metadata
return self.metadata
def get_schema(self):
return self.get_metadata().indicies[self.settings.index]
#DELETE ALL INDEXES WITH GIVEN PREFIX, EXCEPT name
def delete_all_but(self, prefix, name):
if prefix == name:
Log.note("{{index_name}} will not be deleted", {"index_name": prefix})
for a in self.get_aliases():
# MATCH <prefix>YYMMDD_HHMMSS FORMAT
if re.match(re.escape(prefix) + "\\d{8}_\\d{6}", a.index) and a.index != name:
ElasticSearch.delete_index(self.settings, a.index)
@staticmethod
def proto_name(prefix, timestamp=None):
if not timestamp:
timestamp = datetime.utcnow()
return prefix + CNV.datetime2string(timestamp, "%Y%m%d_%H%M%S")
def add_alias(self, alias):
self.metadata = None
requests.post(
self.settings.host + ":" + unicode(self.settings.port) + "/_aliases",
CNV.object2JSON({
"actions": [
{"add": {"index": self.settings.index, "alias": alias}}
]
})
)
def get_proto(self, alias):
"""
RETURN ALL INDEXES THAT ARE INTENDED TO BE GIVEN alias, BUT HAVE NO
ALIAS YET BECAUSE INCOMPLETE
"""
output = Q.sort([
a.index
for a in self.get_aliases()
if re.match(re.escape(alias) + "\\d{8}_\\d{6}", a.index) and not a.alias
])
return output
def get_index(self, alias):
"""
RETURN THE INDEX USED BY THIS alias
"""
output = Q.sort([
a.index
for a in self.get_aliases()
if a.alias == alias
])
if len(output) > 1:
Log.error("only one index with given alias==\"{{alias}}\" expected", {"alias": alias})
if not output:
return Null
return output.last()
def is_proto(self, index):
"""
RETURN True IF THIS INDEX HAS NOT BEEN ASSIGNED IT'S ALIAS
"""
for a in self.get_aliases():
if a.index == index and a.alias:
return False
return True
def delete_record(self, query):
if isinstance(query, dict):
ElasticSearch.delete(
self.path + "/_query",
data=CNV.object2JSON(query)
)
else:
ElasticSearch.delete(
self.path + "/" + query
)
def extend(self, records):
# ADD LINE WITH COMMAND
lines = []
for r in records:
id = r["id"]
if "json" in r:
json = r["json"]
elif "value" in r:
json = CNV.object2JSON(r["value"])
else:
Log.error("Expecting every record given to have \"value\" or \"json\" property")
if id == None:
id = sha.new(json).hexdigest()
lines.append(u'{"index":{"_id":' + CNV.object2JSON(id) + '}}')
lines.append(json)
if not lines: return
response = ElasticSearch.post(
self.path + "/_bulk",
data="\n".join(lines).encode("utf8") + "\n",
headers={"Content-Type": "text"}
)
items = response["items"]
for i, item in enumerate(items):
if not item.index.ok:
Log.error("{{error}} while loading line:\n{{line}}", {
"error": item.index.error,
"line": lines[i * 2 + 1]
})
if self.debug:
Log.note("{{num}} items added", {"num": len(lines) / 2})
# RECORDS MUST HAVE id AND json AS A STRING OR
# HAVE id AND value AS AN OBJECT
def add(self, record):
if isinstance(record, list):
Log.error("add() has changed to only accept one record, no lists")
self.extend([record])
# -1 FOR NO REFRESH
def set_refresh_interval(self, seconds):
if seconds <= 0:
interval = "-1"
else:
interval = unicode(seconds) + "s"
response = ElasticSearch.put(
self.settings.host + ":" + unicode(
self.settings.port) + "/" + self.settings.index + "/_settings",
data="{\"index.refresh_interval\":\"" + interval + "\"}"
)
if response.content != '{"ok":true}':
Log.error("Can not set refresh interval ({{error}})", {
"error": response.content
})
def search(self, query):
try:
if DEBUG:
Log.note("Query:\n{{query|indent}}", {"query": query})
return ElasticSearch.post(self.path + "/_search", data=CNV.object2JSON(query))
except Exception, e:
Log.error("Problem with search (path={{path}}):\n{{query|indent}}", {
"path": self.path + "/_search",
"query": query
}, e)
def threaded_queue(self, size):
return ThreadedQueue(self, size)
@staticmethod
def post(*args, **kwargs):
try:
response = requests.post(*args, **kwargs)
if DEBUG:
Log.note(response.content[:130])
details = CNV.JSON2object(response.content)
if details.error:
Log.error(details.error)
return details
except Exception, e:
if args[0][0:4] != "http":
suggestion = " (did you forget \"http://\" prefix on the host name?)"
else:
suggestion = ""
Log.error("Problem with call to {{url}}" + suggestion, {"url": args[0]}, e)
@staticmethod
def get(*list, **args):
try:
response = requests.get(*list, **args)
if DEBUG:
Log.note(response.content[:130])
details = CNV.JSON2object(response.content)
if details.error:
Log.error(details.error)
return details
except Exception, e:
Log.error("Problem with call to {{url}}", {"url": list[0]}, e)
@staticmethod
def put(*list, **args):
try:
response = requests.put(*list, **args)
if DEBUG:
Log.note(response.content)
return response
except Exception, e:
Log.error("Problem with call to {{url}}", {"url": list[0]}, e)
@staticmethod
def delete(*args, **kwargs):
try:
response = requests.delete(*args, **kwargs)
if DEBUG:
Log.note(response.content)
return response
except Exception, e:
Log.error("Problem with call to {{url}}", {"url": args[0]}, e)
@staticmethod
def scrub(r):
"""
REMOVE KEYS OF DEGENERATE VALUES (EMPTY STRINGS, EMPTY LISTS, AND NULLS)
TO LOWER CASE
CONVERT STRINGS OF NUMBERS TO NUMBERS
RETURNS **COPY**, DOES NOT CHANGE ORIGINAL
"""
return struct.wrap(_scrub(r))
def _scrub(r):
try:
if r == None:
return None
elif isinstance(r, basestring):
if r == "":
return None
return r
elif Math.is_number(r):
return CNV.value2number(r)
elif isinstance(r, dict):
if isinstance(r, Struct):
r = r.dict
output = {}
for k, v in r.items():
v = _scrub(v)
if v != None:
output[k.lower()] = v
if len(output) == 0:
return None
return output
elif hasattr(r, '__iter__'):
if isinstance(r, StructList):
r = r.list
output = []
for v in r:
v = _scrub(v)
if v != None:
output.append(v)
if not output:
return None
try:
return Q.sort(output)
except Exception:
return output
else:
return r
except Exception, e:
Log.warning("Can not scrub: {{json}}", {"json": r})

1
tests/util/env/__init__.py поставляемый Normal file
Просмотреть файл

@ -0,0 +1 @@
__author__ = 'klahnakoski'

477
tests/util/env/elasticsearch.py поставляемый Normal file
Просмотреть файл

@ -0,0 +1,477 @@
# encoding: utf-8
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
from datetime import datetime
import re
import time
import requests
from ..maths.randoms import Random
from ..thread.threads import ThreadedQueue
from ..maths import Math
from ..cnv import CNV
from ..env.logs import Log
from ..struct import nvl, Null
from ..structs.wraps import wrap, unwrap
from ..struct import Struct, StructList
DEBUG = False
class ElasticSearch(object):
"""
AN ElasticSearch INDEX LIFETIME MANAGEMENT TOOL
ElasticSearch'S REST INTERFACE WORKS WELL WITH PYTHON AND JAVASCRIPT
SO HARDLY ANY LIBRARY IS REQUIRED. IT IS SIMPLER TO MAKE HTTP CALLS
DIRECTLY TO ES USING YOUR FAVORITE HTTP LIBRARY. I HAVE SOME
CONVENIENCE FUNCTIONS HERE, BUT IT'S BETTER TO MAKE YOUR OWN.
THIS CLASS IS TO HELP DURING ETL, CREATING INDEXES, MANAGING ALIASES
AND REMOVING INDEXES WHEN THEY HAVE BEEN REPLACED. IT USES A STANDARD
SUFFIX (YYYYMMDD-HHMMSS) TO TRACK AGE AND RELATIONSHIP TO THE ALIAS,
IF ANY YET.
"""
def __init__(self, settings=None):
"""
settings.explore_metadata == True - IF PROBING THE CLUSTER FOR METATDATA IS ALLOWED
settings.timeout == NUMBER OF SECONDS TO WAIT FOR RESPONSE, OR SECONDS TO WAIT FOR DOWNLOAD (PASSED TO requests)
"""
if settings is None:
self.debug = DEBUG
return
settings = wrap(settings)
assert settings.host
assert settings.index
assert settings.type
settings.setdefault("explore_metadata", True)
if settings.index == settings.alias:
Log.error("must have a unique index name")
self.cluster_metadata = None
if not settings.port:
settings.port = 9200
self.debug = nvl(settings.debug, DEBUG)
self.settings = settings
try:
index = self.get_index(settings.index)
if index:
settings.alias = settings.index
settings.index = index
except Exception, e:
# EXPLORING (get_metadata()) IS NOT ALLOWED ON THE PUBLIC CLUSTER
pass
self.path = settings.host + ":" + unicode(settings.port) + "/" + settings.index + "/" + settings.type
@staticmethod
def get_or_create_index(settings, schema, limit_replicas=False):
es = ElasticSearch(settings)
aliases = es.get_aliases()
if settings.index not in [a.index for a in aliases]:
schema = CNV.JSON2object(CNV.object2JSON(schema), paths=True)
es = ElasticSearch.create_index(settings, schema, limit_replicas=limit_replicas)
return es
@staticmethod
def create_index(settings, schema=None, limit_replicas=False):
if not schema and settings.schema_file:
from .files import File
schema = CNV.JSON2object(File(settings.schema_file).read(), flexible=True, paths=True)
else:
schema = wrap(schema)
if isinstance(schema, basestring):
schema = CNV.JSON2object(schema)
if limit_replicas:
# DO NOT ASK FOR TOO MANY REPLICAS
health = DUMMY.get(settings.host + ":" + unicode(settings.port) + "/_cluster/health")
if schema.settings.index.number_of_replicas >= health.number_of_nodes:
Log.warning("Reduced number of replicas: {{from}} requested, {{to}} realized", {
"from": schema.settings.index.number_of_replicas,
"to": health.number_of_nodes-1
})
schema.settings.index.number_of_replicas = health.number_of_nodes-1
DUMMY._post(
settings.host + ":" + unicode(settings.port) + "/" + settings.index,
data=CNV.object2JSON(schema).encode("utf8"),
headers={"Content-Type": "application/json"}
)
time.sleep(2)
es = ElasticSearch(settings)
return es
@staticmethod
def delete_index(settings, index=None):
index = nvl(index, settings.index)
DUMMY.delete(
settings.host + ":" + unicode(settings.port) + "/" + index,
)
def get_aliases(self):
"""
RETURN LIST OF {"alias":a, "index":i} PAIRS
ALL INDEXES INCLUDED, EVEN IF NO ALIAS {"alias":Null}
"""
data = self.get_metadata().indices
output = []
for index, desc in data.items():
if not desc["aliases"]:
output.append({"index": index, "alias": None})
else:
for a in desc["aliases"]:
output.append({"index": index, "alias": a})
return wrap(output)
def get_metadata(self):
if self.settings.explore_metadata:
if not self.cluster_metadata:
response = self.get(self.settings.host + ":" + unicode(self.settings.port) + "/_cluster/state")
self.cluster_metadata = response.metadata
self.node_metatdata = self.get(self.settings.host + ":" + unicode(self.settings.port) + "/")
else:
Log.error("Metadata exploration has been disabled")
return self.cluster_metadata
def get_schema(self):
if self.settings.explore_metadata:
indices = self.get_metadata().indices
index = indices[self.settings.index]
if not index.mappings[self.settings.type]:
Log.error("ElasticSearch index ({{index}}) does not have type ({{type}})", self.settings)
return index.mappings[self.settings.type]
else:
mapping = self.get(self.settings.host + ":" + unicode(self.settings.port) + "/" + self.settings.index +"/" + self.settings.type + "/_mapping")
if not mapping[self.settings.type]:
Log.error("{{index}} does not have type {{type}}", self.settings)
return wrap({"mappings":mapping[self.settings.type]})
#DELETE ALL INDEXES WITH GIVEN PREFIX, EXCEPT name
def delete_all_but(self, prefix, name):
if prefix == name:
Log.note("{{index_name}} will not be deleted", {"index_name": prefix})
for a in self.get_aliases():
# MATCH <prefix>YYMMDD_HHMMSS FORMAT
if re.match(re.escape(prefix) + "\\d{8}_\\d{6}", a.index) and a.index != name:
ElasticSearch.delete_index(self.settings, a.index)
@staticmethod
def proto_name(prefix, timestamp=None):
if not timestamp:
timestamp = datetime.utcnow()
return prefix + CNV.datetime2string(timestamp, "%Y%m%d_%H%M%S")
def add_alias(self, alias):
self.cluster_metadata = None
requests.post(
self.settings.host + ":" + unicode(self.settings.port) + "/_aliases",
CNV.object2JSON({
"actions": [
{"add": {"index": self.settings.index, "alias": alias}}
]
}),
timeout=nvl(self.settings.timeout, 30)
)
def get_proto(self, alias):
"""
RETURN ALL INDEXES THAT ARE INTENDED TO BE GIVEN alias, BUT HAVE NO
ALIAS YET BECAUSE INCOMPLETE
"""
output = sort([
a.index
for a in self.get_aliases()
if re.match(re.escape(alias) + "\\d{8}_\\d{6}", a.index) and not a.alias
])
return output
def get_index(self, alias):
"""
RETURN THE INDEX USED BY THIS alias
"""
output = sort([
a.index
for a in self.get_aliases()
if a.alias == alias
])
if len(output) > 1:
Log.error("only one index with given alias==\"{{alias}}\" expected", {"alias": alias})
if not output:
return Null
return output.last()
def is_proto(self, index):
"""
RETURN True IF THIS INDEX HAS NOT BEEN ASSIGNED ITS ALIAS
"""
for a in self.get_aliases():
if a.index == index and a.alias:
return False
return True
def delete_record(self, filter):
self.get_metadata()
if self.node_metatdata.version.number.startswith("0.90"):
query = filter
elif self.node_metatdata.version.number.startswith("1.0"):
query = {"query": filter}
else:
Log.error("not implemented yet")
if self.debug:
Log.note("Delete bugs:\n{{query}}", {"query": query})
self.delete(
self.path + "/_query",
data=CNV.object2JSON(query)
)
def extend(self, records):
"""
records - MUST HAVE FORM OF
[{"value":value}, ... {"value":value}] OR
[{"json":json}, ... {"json":json}]
OPTIONAL "id" PROPERTY IS ALSO ACCEPTED
"""
lines = []
try:
for r in records:
id = r.get("id", None)
if id == None:
id = Random.hex(40)
if "json" in r:
json = r["json"]
elif "value" in r:
json = CNV.object2JSON(r["value"])
else:
json = None
Log.error("Expecting every record given to have \"value\" or \"json\" property")
lines.append('{"index":{"_id": ' + CNV.object2JSON(id) + '}}')
lines.append(json)
if not lines:
return
try:
data_bytes = "\n".join(lines) + "\n"
data_bytes = data_bytes.encode("utf8")
except Exception, e:
Log.error("can not make request body from\n{{lines|indent}}", {"lines": lines}, e)
response = self._post(
self.path + "/_bulk",
data=data_bytes,
headers={"Content-Type": "text"},
timeout=self.settings.timeout
)
items = response["items"]
for i, item in enumerate(items):
if not item.index.ok:
Log.error("{{error}} while loading line:\n{{line}}", {
"error": item.index.error,
"line": lines[i * 2 + 1]
})
if self.debug:
Log.note("{{num}} items added", {"num": len(lines) / 2})
except Exception, e:
if e.message.startswith("sequence item "):
Log.error("problem with {{data}}", {"data": repr(lines[int(e.message[14:16].strip())])}, e)
Log.error("problem sending to ES", e)
# RECORDS MUST HAVE id AND json AS A STRING OR
# HAVE id AND value AS AN OBJECT
def add(self, record):
if isinstance(record, list):
Log.error("add() has changed to only accept one record, no lists")
self.extend([record])
# -1 FOR NO REFRESH
def set_refresh_interval(self, seconds):
if seconds <= 0:
interval = "-1"
else:
interval = unicode(seconds) + "s"
response = self.put(
self.settings.host + ":" + unicode(
self.settings.port) + "/" + self.settings.index + "/_settings",
data="{\"index.refresh_interval\":\"" + interval + "\"}"
)
result = CNV.JSON2object(response.content.decode("utf-8"))
if not result.ok:
Log.error("Can not set refresh interval ({{error}})", {
"error": response.content.decode("utf-8")
})
def search(self, query, timeout=None):
query = wrap(query)
try:
if self.debug:
if len(query.facets.keys()) > 20:
show_query = query.copy()
show_query.facets = {k: "..." for k in query.facets.keys()}
else:
show_query = query
Log.note("Query:\n{{query|indent}}", {"query": show_query})
return self._post(
self.path + "/_search",
data=CNV.object2JSON(query).encode("utf8"),
timeout=nvl(timeout, self.settings.timeout)
)
except Exception, e:
Log.error("Problem with search (path={{path}}):\n{{query|indent}}", {
"path": self.path + "/_search",
"query": query
}, e)
def threaded_queue(self, size=None, period=None):
return ThreadedQueue(self, size=size, period=period)
def _post(self, *args, **kwargs):
if "data" in kwargs and not isinstance(kwargs["data"], str):
Log.error("data must be utf8 encoded string")
try:
kwargs = wrap(kwargs)
kwargs.setdefault("timeout", 600)
kwargs.headers["Accept-Encoding"] = "gzip,deflate"
kwargs = unwrap(kwargs)
response = requests.post(*args, **kwargs)
if self.debug:
Log.note(response.content.decode("utf-8")[:130])
details = CNV.JSON2object(response.content.decode("utf-8"))
if details.error:
Log.error(CNV.quote2string(details.error))
if details._shards.failed > 0:
Log.error("Shard failure")
return details
except Exception, e:
if args[0][0:4] != "http":
suggestion = " (did you forget \"http://\" prefix on the host name?)"
else:
suggestion = ""
Log.error("Problem with call to {{url}}" + suggestion +"\n{{body}}", {
"url": args[0],
"body": kwargs["data"] if DEBUG else kwargs["data"][0:100]
}, e)
def get(self, *args, **kwargs):
try:
kwargs = wrap(kwargs)
kwargs.setdefault("timeout", 600)
response = requests.get(*args, **kwargs)
if self.debug:
Log.note(response.content.decode("utf-8")[:130])
details = wrap(CNV.JSON2object(response.content.decode("utf-8")))
if details.error:
Log.error(details.error)
return details
except Exception, e:
Log.error("Problem with call to {{url}}", {"url": args[0]}, e)
def put(self, *args, **kwargs):
try:
kwargs = wrap(kwargs)
kwargs.setdefault("timeout", 60)
response = requests.put(*args, **kwargs)
if self.debug:
Log.note(response.content.decode("utf-8"))
return response
except Exception, e:
Log.error("Problem with call to {{url}}", {"url": args[0]}, e)
def delete(self, *args, **kwargs):
try:
kwargs.setdefault("timeout", 60)
response = requests.delete(*args, **kwargs)
if self.debug:
Log.note(response.content.decode("utf-8"))
return response
except Exception, e:
Log.error("Problem with call to {{url}}", {"url": args[0]}, e)
@staticmethod
def scrub(r):
"""
REMOVE KEYS OF DEGENERATE VALUES (EMPTY STRINGS, EMPTY LISTS, AND NULLS)
TO LOWER CASE
CONVERT STRINGS OF NUMBERS TO NUMBERS
RETURNS **COPY**, DOES NOT CHANGE ORIGINAL
"""
return wrap(_scrub(r))
def _scrub(r):
try:
if r == None:
return None
elif isinstance(r, basestring):
if r == "":
return None
return r
elif Math.is_number(r):
return CNV.value2number(r)
elif isinstance(r, dict):
if isinstance(r, Struct):
r = object.__getattribute__(r, "__dict__")
output = {}
for k, v in r.items():
v = _scrub(v)
if v != None:
output[k.lower()] = v
if len(output) == 0:
return None
return output
elif hasattr(r, '__iter__'):
if isinstance(r, StructList):
r = r.list
output = []
for v in r:
v = _scrub(v)
if v != None:
output.append(v)
if not output:
return None
try:
return sort(output)
except Exception:
return output
else:
return r
except Exception, e:
Log.warning("Can not scrub: {{json}}", {"json": r})
def sort(values):
return wrap(sorted(values))
DUMMY = ElasticSearch()

Просмотреть файл

@ -8,27 +8,39 @@
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
import smtplib
import sys
from .struct import nvl
from .. import struct
from ..struct import nvl
class Emailer:
def __init__(self, settings):
self.settings=settings
"""
REQUIRES SETTINGS LIKE
"email": {
"from_address": "klahnakoski@mozilla.com", #DEFAULT
"to":"klahnakoski@mozilla.com", #DEFAULT
"subject": "catchy title", #DEFAULT
"host": "mail.mozilla.com",
"port": 465,
"username": "example@example.com",
"password": "password",
"use_ssl": 1
}
"""
self.settings = settings
def send_email(self,
from_address = None,
to_addrs = None,
subject='No Subject',
text_data = None,
html_data = None
from_address=None,
to_addrs=None,
subject=None,
text_data=None,
html_data=None
):
"""Sends an email.
@ -42,9 +54,10 @@ class Emailer:
viewer supports it; otherwise he'll see the text content.
"""
settings=self.settings
settings = self.settings
from_address=nvl(from_address, settings.from_address)
from_address = nvl(from_address, settings["from"], settings.from_address)
to_addrs = listwrap(nvl(to_addrs, settings.to, settings.to_addrs))
if not from_address or not to_addrs:
raise Exception("Both from_addr and to_addrs must be specified")
@ -63,14 +76,14 @@ class Emailer:
msg = MIMEText(text_data)
elif not text_data:
msg = MIMEMultipart()
msg.preamble = subject
msg.preamble = nvl(subject, settings.subject, 'No Subject')
msg.attach(MIMEText(html_data, 'html'))
else:
msg = MIMEMultipart('alternative')
msg.attach(MIMEText(text_data, 'plain'))
msg.attach(MIMEText(html_data, 'html'))
msg['Subject'] = subject
msg['Subject'] = nvl(subject, settings.subject)
msg['From'] = from_address
msg['To'] = ', '.join(to_addrs)
@ -79,7 +92,6 @@ class Emailer:
server.quit()
if sys.hexversion < 0x020603f0:
# versions earlier than 2.6.3 have a bug in smtplib when sending over SSL:
# http://bugs.python.org/issue4066
@ -90,7 +102,8 @@ if sys.hexversion < 0x020603f0:
import ssl
def _get_socket_fixed(self, host, port, timeout):
if self.debuglevel > 0: print>> sys.stderr, 'connect:', (host, port)
if self.debuglevel > 0:
print>> sys.stderr, 'connect:', (host, port)
new_socket = socket.create_connection((host, port), timeout)
new_socket = ssl.wrap_socket(new_socket, self.keyfile, self.certfile)
self.file = smtplib.SSLFakeFile(new_socket)

210
tests/util/env/files.py поставляемый Normal file
Просмотреть файл

@ -0,0 +1,210 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from datetime import datetime
import io
import os
import shutil
from ..maths import crypto
from ..struct import nvl
from ..structs.wraps import listwrap
from ..cnv import CNV
class File(object):
"""
ASSUMES ALL FILE CONTENT IS UTF8 ENCODED STRINGS
"""
def __init__(self, filename, buffering=2 ** 14, suffix=None):
"""
YOU MAY SET filename TO {"path":p, "key":k} FOR CRYPTO FILES
"""
if filename == None:
from ..env.logs import Log
Log.error("File must be given a filename")
elif isinstance(filename, basestring):
self.key = None
self._filename = "/".join(filename.split(os.sep)) # USE UNIX STANDARD
self.buffering = buffering
else:
self.key = CNV.base642bytearray(filename.key)
self._filename = "/".join(filename.path.split(os.sep)) # USE UNIX STANDARD
self.buffering = buffering
if suffix:
self._filename = File.add_suffix(self._filename, suffix)
@property
def filename(self):
return self._filename.replace("/", os.sep)
@property
def abspath(self):
return os.path.abspath(self._filename)
@staticmethod
def add_suffix(filename, suffix):
"""
ADD suffix TO THE filename (NOT INCLUDING THE FILE EXTENSION)
"""
path = filename.split("/")
parts = path[-1].split(".")
i = max(len(parts)-2, 0)
parts[i]=parts[i]+suffix
path[-1]=".".join(parts)
return "/".join(path)
def backup_name(self, timestamp=None):
"""
RETURN A FILENAME THAT CAN SERVE AS A BACKUP FOR THIS FILE
"""
suffix = CNV.datetime2string(nvl(timestamp, datetime.now()), "%Y%m%d_%H%M%S")
return File.add_suffix(self._filename, suffix)
def read(self, encoding="utf8"):
with open(self._filename, "rb") as f:
content = f.read().decode(encoding)
if self.key:
return crypto.decrypt(content, self.key)
else:
return content
def read_ascii(self):
if not self.parent.exists:
self.parent.create()
with open(self._filename, "r") as f:
return f.read()
def write_ascii(self, content):
if not self.parent.exists:
self.parent.create()
with open(self._filename, "w") as f:
f.write(content)
def write(self, data):
if not self.parent.exists:
self.parent.create()
with open(self._filename, "wb") as f:
if isinstance(data, list) and self.key:
from ..env.logs import Log
Log.error("list of data and keys are not supported, encrypt before sending to file")
for d in listwrap(data):
if not isinstance(d, unicode):
from ..env.logs import Log
Log.error("Expecting unicode data only")
if self.key:
f.write(crypto.encrypt(d, self.key).encode("utf8"))
else:
f.write(d.encode("utf8"))
def __iter__(self):
#NOT SURE HOW TO MAXIMIZE FILE READ SPEED
#http://stackoverflow.com/questions/8009882/how-to-read-large-file-line-by-line-in-python
#http://effbot.org/zone/wide-finder.htm
def output():
try:
with io.open(self._filename, "rb") as f:
for line in f:
yield line.decode("utf8")
except Exception, e:
from .logs import Log
Log.error("Can not read line from {{filename}}", {"filename": self._filename}, e)
return output()
def append(self, content):
if not self.parent.exists:
self.parent.create()
with open(self._filename, "ab") as output_file:
if isinstance(content, str):
from .logs import Log
Log.error("expecting to write unicode only")
output_file.write(content.encode("utf-8"))
output_file.write(b"\n")
def add(self, content):
return self.append(content)
def extend(self, content):
if not self.parent.exists:
self.parent.create()
with open(self._filename, "ab") as output_file:
for c in content:
if isinstance(c, str):
from .logs import Log
Log.error("expecting to write unicode only")
output_file.write(c.encode("utf-8"))
output_file.write(b"\n")
def delete(self):
try:
if os.path.isdir(self._filename):
shutil.rmtree(self._filename)
elif os.path.isfile(self._filename):
os.remove(self._filename)
return self
except Exception, e:
if e.strerror == "The system cannot find the path specified":
return
from ..env.logs import Log
Log.error("Could not remove file", e)
def backup(self):
names = self._filename.split("/")[-1].split(".")
if len(names) == 1:
backup = File(self._filename + ".backup " + datetime.utcnow().strftime("%Y%m%d %H%i%s"))
def create(self):
try:
os.makedirs(self._filename)
except Exception, e:
from ..env.logs import Log
Log.error("Could not make directory {{dir_name}}", {"dir_name": self._filename}, e)
@property
def parent(self):
return File("/".join(self._filename.split("/")[:-1]))
@property
def exists(self):
if self._filename in ["", "."]:
return True
try:
return os.path.exists(self._filename)
except Exception, e:
return False
def __bool__(self):
return self.__nonzero__()
def __nonzero__(self):
"""
USED FOR FILE EXISTENCE TESTING
"""
if self._filename in ["", "."]:
return True
try:
return os.path.exists(self._filename)
except Exception, e:
return False

134
tests/util/env/log_usingElasticSearch.py поставляемый Normal file
Просмотреть файл

@ -0,0 +1,134 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
from datetime import timedelta, datetime
from ..cnv import CNV
from .elasticsearch import ElasticSearch
from ..structs.wraps import wrap
from ..thread.threads import Thread, Queue
from .logs import BaseLog, Log
class Log_usingElasticSearch(BaseLog):
def __init__(self, settings):
settings = wrap(settings)
self.es = ElasticSearch(settings)
aliases = self.es.get_aliases()
if settings.index not in [a.index for a in aliases]:
schema = CNV.JSON2object(CNV.object2JSON(SCHEMA), paths=True)
self.es = ElasticSearch.create_index(settings, schema, limit_replicas=True)
self.queue = Queue()
self.thread = Thread("log to " + settings.index, time_delta_pusher, es=self.es, queue=self.queue, interval=timedelta(seconds=1))
self.thread.start()
def write(self, template, params):
try:
if params.get("template", None):
#DETECTED INNER TEMPLATE, ASSUME TRACE IS ON, SO DO NOT NEED THE OUTER TEMPLATE
self.queue.add(params)
else:
self.queue.add({"template": template, "params": params})
return self
except Exception, e:
raise e #OH NO!
def stop(self):
try:
self.queue.add(Thread.STOP) #BE PATIENT, LET REST OF MESSAGE BE SENT
self.thread.join()
except Exception, e:
pass
try:
self.queue.close()
except Exception, f:
pass
def time_delta_pusher(please_stop, es, queue, interval):
"""
appender - THE FUNCTION THAT ACCEPTS A STRING
queue - FILLED WITH LOG ENTRIES {"template":template, "params":params} TO WRITE
interval - timedelta
USE IN A THREAD TO BATCH LOGS BY TIME INTERVAL
"""
if not isinstance(interval, timedelta):
Log.error("Expecting interval to be a timedelta")
next_run = datetime.utcnow() + interval
while not please_stop:
Thread.sleep(till=next_run)
next_run = datetime.utcnow() + interval
logs = queue.pop_all()
if logs:
try:
last = len(logs)
for i, log in enumerate(logs):
if log is Thread.STOP:
please_stop.go()
last = i
next_run = datetime.utcnow()
if last > 0:
es.extend([{"value": v} for v in logs[0:last]])
except Exception, e:
# DO NOT KILL THREAD, WE MUST CONTINUE TO CONSUME MESSAGES
Log.warning("problem logging to es", e)
SCHEMA = {
"settings": {
"index.number_of_shards": 3,
"index.number_of_replicas": 2,
"index.store.throttle.type": "merge",
"index.store.throttle.max_bytes_per_sec": "2mb",
"index.cache.filter.expire": "1m",
"index.cache.field.type": "soft",
},
"mappings": {
"_default_": {
"dynamic_templates": [
{
"values_strings": {
"match": "*",
"match_mapping_type" : "string",
"mapping": {
"type": "string",
"index": "not_analyzed"
}
}
}
],
"_all": {
"enabled": False
},
"_source": {
"compress": True,
"enabled": True
},
"properties": {
"timestamp": {
"type": "long",
"index": "not_analyzed",
"store": "yes"
},
"params": {
"type": "object",
"enabled": False,
"index": "no",
"store": "yes"
}
}
}
}
}

87
tests/util/env/log_usingLogger.py поставляемый Normal file
Просмотреть файл

@ -0,0 +1,87 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
from datetime import timedelta
import logging
import sys
from .. import struct
from .log_usingStream import Log_usingStream, time_delta_pusher
from .logs import BaseLog, DEBUG_LOGGING, Log
from ..thread import threads
from ..thread.threads import Thread
#WRAP PYTHON CLASSIC logger OBJECTS
class Log_usingLogger(BaseLog):
def __init__(self, settings):
self.logger = logging.Logger("unique name", level=logging.INFO)
self.logger.addHandler(make_log_from_settings(settings))
# TURNS OUT LOGGERS ARE REALLY SLOW TOO
self.queue = threads.Queue(max=10000, silent=True)
self.thread = Thread("log to logger", time_delta_pusher, appender=self.logger.info, queue=self.queue, interval=timedelta(seconds=0.3))
self.thread.start()
def write(self, template, params):
# http://docs.python.org/2/library/logging.html#logging.LogRecord
self.queue.add({"template": template, "params": params})
def stop(self):
try:
if DEBUG_LOGGING:
sys.stdout.write("Log_usingLogger sees stop, adding stop to queue\n")
self.queue.add(Thread.STOP) #BE PATIENT, LET REST OF MESSAGE BE SENT
self.thread.join()
if DEBUG_LOGGING:
sys.stdout.write("Log_usingLogger done\n")
except Exception, e:
pass
try:
self.queue.close()
except Exception, f:
pass
def make_log_from_settings(settings):
assert settings["class"]
# IMPORT MODULE FOR HANDLER
path = settings["class"].split(".")
class_name = path[-1]
path = ".".join(path[:-1])
constructor = None
try:
temp = __import__(path, globals(), locals(), [class_name], -1)
constructor = object.__getattribute__(temp, class_name)
except Exception, e:
if settings.stream and not constructor:
#PROVIDE A DEFAULT STREAM HANLDER
constructor = Log_usingStream
else:
Log.error("Can not find class {{class}}", {"class": path}, e)
#IF WE NEED A FILE, MAKE SURE DIRECTORY EXISTS
if settings.filename:
from ..env.files import File
f = File(settings.filename)
if not f.parent.exists:
f.parent.create()
settings['class'] = None
params = struct.unwrap(settings)
log_instance = constructor(**params)
return log_instance

122
tests/util/env/log_usingStream.py поставляемый Normal file
Просмотреть файл

@ -0,0 +1,122 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
from datetime import datetime, timedelta
import sys
from .logs import BaseLog, DEBUG_LOGGING, Log
from ..strings import expand_template
from ..thread.threads import Thread
class Log_usingStream(BaseLog):
#stream CAN BE AN OBJCET WITH write() METHOD, OR A STRING
#WHICH WILL eval() TO ONE
def __init__(self, stream):
assert stream
use_UTF8 = False
if isinstance(stream, basestring):
if stream.startswith("sys."):
use_UTF8 = True #sys.* ARE OLD AND CAN NOT HANDLE unicode
self.stream = eval(stream)
name = stream
else:
self.stream = stream
name = "stream"
#WRITE TO STREAMS CAN BE *REALLY* SLOW, WE WILL USE A THREAD
from ..thread.threads import Queue
if use_UTF8:
def utf8_appender(value):
if isinstance(value, unicode):
value = value.encode('utf-8')
self.stream.write(value)
appender = utf8_appender
else:
appender = self.stream.write
self.queue = Queue(max=10000, silent=True)
self.thread = Thread("log to " + name, time_delta_pusher, appender=appender, queue=self.queue, interval=timedelta(seconds=0.3))
self.thread.start()
def write(self, template, params):
try:
self.queue.add({"template": template, "params": params})
return self
except Exception, e:
raise e #OH NO!
def stop(self):
try:
if DEBUG_LOGGING:
sys.stdout.write("Log_usingStream sees stop, adding stop to queue\n")
self.queue.add(Thread.STOP) #BE PATIENT, LET REST OF MESSAGE BE SENT
self.thread.join()
if DEBUG_LOGGING:
sys.stdout.write("Log_usingStream done\n")
except Exception, e:
if DEBUG_LOGGING:
raise e
try:
self.queue.close()
except Exception, f:
if DEBUG_LOGGING:
raise f
def time_delta_pusher(please_stop, appender, queue, interval):
"""
appender - THE FUNCTION THAT ACCEPTS A STRING
queue - FILLED WITH LOG ENTRIES {"template":template, "params":params} TO WRITE
interval - timedelta
USE IN A THREAD TO BATCH LOGS BY TIME INTERVAL
"""
if not isinstance(interval, timedelta):
Log.error("Expecting interval to be a timedelta")
next_run = datetime.utcnow() + interval
while not please_stop:
Thread.sleep(till=next_run)
next_run = datetime.utcnow() + interval
logs = queue.pop_all()
if logs:
lines = []
for log in logs:
try:
if log is Thread.STOP:
please_stop.go()
next_run = datetime.utcnow()
else:
expanded = expand_template(log.get("template", None), log.get("params", None))
lines.append(expanded)
except Exception, e:
Log.warning("Trouble formatting logs", e)
# SWALLOW ERROR, GOT TO KEEP RUNNING
try:
if DEBUG_LOGGING and please_stop:
sys.stdout.write("Call to appender with " + str(len(lines)) + " lines\n")
appender(u"\n".join(lines) + u"\n")
if DEBUG_LOGGING and please_stop:
sys.stdout.write("Done call to appender with " + str(len(lines)) + " lines\n")
except Exception, e:
sys.stderr.write("Trouble with appender: " + str(e.message) + "\n")
# SWALLOW ERROR, GOT TO KEEP RUNNNIG

523
tests/util/env/logs.py поставляемый Normal file
Просмотреть файл

@ -0,0 +1,523 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
from datetime import datetime
import sys
from .. import struct
from ..jsons import json_encoder
from ..thread import threads
from ..struct import nvl, Struct
from ..structs.wraps import listwrap, wrap
from ..strings import indent, expand_template
from ..thread.threads import Thread
DEBUG_LOGGING = False
ERROR = "ERROR"
WARNING = "WARNING"
NOTE = "NOTE"
class Log(object):
"""
FOR STRUCTURED LOGGING AND EXCEPTION CHAINING
"""
trace = False
main_log = None
logging_multi = None
profiler = None
cprofiler = None # screws up with pypy, but better than nothing
error_mode = False # prevent error loops
@classmethod
def new_instance(cls, settings):
settings = wrap(settings)
if settings["class"]:
if settings["class"].startswith("logging.handlers."):
from .log_usingLogger import Log_usingLogger
return Log_usingLogger(settings)
else:
try:
from .log_usingLogger import make_log_from_settings
return make_log_from_settings(settings)
except Exception, e:
pass # OH WELL :(
if settings.log_type=="file" or settings.file:
return Log_usingFile(file)
if settings.log_type=="file" or settings.filename:
return Log_usingFile(settings.filename)
if settings.log_type=="stream" or settings.stream:
from .log_usingStream import Log_usingStream
return Log_usingStream(settings.stream)
if settings.log_type=="elasticsearch" or settings.stream:
from .log_usingElasticSearch import Log_usingElasticSearch
return Log_usingElasticSearch(settings)
@classmethod
def add_log(cls, log):
cls.logging_multi.add_log(log)
@classmethod
def debug(cls, template=None, params=None):
"""
USE THIS FOR DEBUGGING (AND EVENTUAL REMOVAL)
"""
Log.note(nvl(template, ""), params, stack_depth=1)
@classmethod
def println(cls, template, params=None):
Log.note(template, params, stack_depth=1)
@classmethod
def note(cls, template, params=None, stack_depth=0):
# USE replace() AS POOR MAN'S CHILD TEMPLATE
log_params = Struct(
template=template,
params=nvl(params, {}).copy(),
timestamp=datetime.utcnow(),
)
if cls.trace:
log_template = "{{timestamp|datetime}} - {{thread.name}} - {{location.file}}:{{location.line}} ({{location.method}}) - " + template.replace("{{", "{{params.")
f = sys._getframe(stack_depth + 1)
log_params.location = {
"line": f.f_lineno,
"file": f.f_code.co_filename,
"method": f.f_code.co_name
}
thread = Thread.current()
log_params.thread = {"name": thread.name, "id": thread.id}
else:
log_template = "{{timestamp|datetime}} - " + template.replace("{{", "{{params.")
cls.main_log.write(log_template, log_params)
@classmethod
def warning(cls, template, params=None, cause=None):
if isinstance(params, BaseException):
cause = params
params = None
if cause and not isinstance(cause, Except):
cause = Except(WARNING, unicode(cause), trace=extract_tb(0))
trace = extract_stack(1)
e = Except(WARNING, template, params, cause, trace)
Log.note(unicode(e), {
"warning": {
"template": template,
"params": params,
"cause": cause,
"trace": trace
}
})
@classmethod
def error(
cls,
template, #human readable template
params=None, #parameters for template
cause=None, #pausible cause
offset=0 #stack trace offset (==1 if you do not want to report self)
):
"""
raise an exception with a trace for the cause too
"""
if params and isinstance(listwrap(params)[0], BaseException):
cause = params
params = None
if cause == None:
cause = []
elif isinstance(cause, list):
pass
elif isinstance(cause, Except):
cause = [cause]
else:
cause = [Except(ERROR, unicode(cause), trace=extract_tb(offset))]
trace = extract_stack(1 + offset)
e = Except(ERROR, template, params, cause, trace)
raise e
@classmethod
def fatal(
cls,
template, #human readable template
params=None, #parameters for template
cause=None, #pausible cause
offset=0 #stack trace offset (==1 if you do not want to report self)
):
"""
SEND TO STDERR
"""
if params and isinstance(listwrap(params)[0], BaseException):
cause = params
params = None
if cause == None:
cause = []
elif isinstance(cause, list):
pass
elif isinstance(cause, Except):
cause = [cause]
else:
cause = [Except(ERROR, unicode(cause), trace=extract_tb(offset))]
trace = extract_stack(1 + offset)
e = Except(ERROR, template, params, cause, trace)
str_e = unicode(e)
error_mode = cls.error_mode
try:
if not error_mode:
cls.error_mode = True
Log.note(str_e, {
"error": {
"template": template,
"params": params,
"cause": cause,
"trace": trace
}
})
except Exception, f:
pass
cls.error_mode = error_mode
sys.stderr.write(str_e)
#RUN ME FIRST TO SETUP THE THREADED LOGGING
@classmethod
def start(cls, settings=None):
##http://victorlin.me/2012/08/good-logging-practice-in-python/
if not settings:
return
cls.settings = settings
cls.trace = cls.trace | nvl(settings.trace, False)
if cls.trace:
from ..thread.threads import Thread
if not settings.log:
return
cls.logging_multi = Log_usingMulti()
cls.main_log = Log_usingThread(cls.logging_multi)
for log in listwrap(settings.log):
Log.add_log(Log.new_instance(log))
if settings.cprofile:
if isinstance(settings.cprofile, bool):
settings.cprofile = {"enabled": True, "filename": "cprofile.tab"}
import cProfile
cls.cprofiler = cProfile.Profile()
cls.cprofiler.enable()
if settings.profile:
from ..env import profiles
if isinstance(settings.profile, bool):
settings.profile = {"enabled": True, "filename": "profile.tab"}
if settings.profile.enabled:
profiles.ON = True
@classmethod
def stop(cls):
from ..env import profiles
if cls.cprofiler and hasattr(cls, "settings"):
write_profile(cls.settings.cprofile, cls.cprofiler)
if profiles.ON and hasattr(cls, "settings"):
profiles.write(cls.settings.profile)
cls.main_log.stop()
def write(self):
Log.error("not implemented")
def extract_stack(start=0):
"""
SNAGGED FROM traceback.py
Extract the raw traceback from the current stack frame.
Each item in the returned list is a quadruple (filename,
line number, function name, text), and the entries are in order
from newest to oldest
"""
try:
raise ZeroDivisionError
except ZeroDivisionError:
trace = sys.exc_info()[2]
f = trace.tb_frame.f_back
for i in range(start):
f = f.f_back
stack = []
n = 0
while f is not None:
stack.append({
"depth": n,
"line": f.f_lineno,
"file": f.f_code.co_filename,
"method": f.f_code.co_name
})
f = f.f_back
n += 1
return stack
def extract_tb(start):
"""
SNAGGED FROM traceback.py
Return list of up to limit pre-processed entries from traceback.
This is useful for alternate formatting of stack traces. If
'limit' is omitted or None, all entries are extracted. A
pre-processed stack trace entry is a quadruple (filename, line
number, function name, text) representing the information that is
usually printed for a stack trace.
"""
tb = sys.exc_info()[2]
for i in range(start):
tb = tb.tb_next
trace = []
n = 0
while tb is not None:
f = tb.tb_frame
trace.append({
"depth": n,
"file": f.f_code.co_filename,
"line": tb.tb_lineno,
"method": f.f_code.co_name
})
tb = tb.tb_next
n += 1
trace.reverse()
return trace
def format_trace(tbs, start=0):
trace = []
for d in tbs[start::]:
d["file"] = d["file"].replace("/", "\\")
item = expand_template('File "{{file}}", line {{line}}, in {{method}}\n', d)
trace.append(item)
return "".join(trace)
class Except(Exception):
def __init__(self, type=ERROR, template=None, params=None, cause=None, trace=None):
super(Exception, self).__init__(self)
self.type = type
self.template = template
self.params = params
self.cause = cause
self.trace = trace
@property
def message(self):
return unicode(self)
def contains(self, value):
if self.type == value:
return True
for c in self.cause:
if c.contains(value):
return True
return False
def __str__(self):
output = self.type + ": " + self.template
if self.params:
output = expand_template(output, self.params)
if self.trace:
output += "\n" + indent(format_trace(self.trace))
if self.cause:
cause_strings = []
for c in self.cause:
try:
cause_strings.append(c.__str__())
except Exception, e:
pass
output += "\ncaused by\n\t" + "\nand caused by\n\t".join(cause_strings)
return output + "\n"
def __json__(self):
return json_encoder(Struct(
type = self.type,
template = self.template,
params = self.params,
cause = self.cause,
trace = self.trace
))
class BaseLog(object):
def write(self, template, params):
pass
def stop(self):
pass
class Log_usingFile(BaseLog):
def __init__(self, file):
assert file
from ..env.files import File
self.file = File(file)
if self.file.exists:
self.file.backup()
self.file.delete()
self.file_lock = threads.Lock()
def write(self, template, params):
from ..env.files import File
with self.file_lock:
File(self.filename).append(expand_template(template, params))
class Log_usingThread(BaseLog):
def __init__(self, logger):
#DELAYED LOAD FOR THREADS MODULE
from ..thread.threads import Queue
self.queue = Queue(max=10000, silent=True)
self.logger = logger
def worker(please_stop):
while not please_stop:
Thread.sleep(1)
logs = self.queue.pop_all()
for log in logs:
if log is Thread.STOP:
if DEBUG_LOGGING:
sys.stdout.write("Log_usingThread.worker() sees stop, filling rest of queue\n")
please_stop.go()
else:
self.logger.write(**log)
self.thread = Thread("log thread", worker)
self.thread.start()
def write(self, template, params):
try:
self.queue.add({"template": template, "params": params})
return self
except Exception, e:
sys.stdout.write("IF YOU SEE THIS, IT IS LIKELY YOU FORGOT TO RUN Log.start() FIRST\n")
raise e #OH NO!
def stop(self):
try:
if DEBUG_LOGGING:
sys.stdout.write("injecting stop into queue\n")
self.queue.add(Thread.STOP) #BE PATIENT, LET REST OF MESSAGE BE SENT
self.thread.join()
if DEBUG_LOGGING:
sys.stdout.write("Log_usingThread telling logger to stop\n")
self.logger.stop()
except Exception, e:
if DEBUG_LOGGING:
raise e
try:
self.queue.close()
except Exception, f:
if DEBUG_LOGGING:
raise f
class Log_usingMulti(BaseLog):
def __init__(self):
self.many = []
def write(self, template, params):
for m in self.many:
try:
m.write(template, params)
except Exception, e:
pass
return self
def add_log(self, logger):
self.many.append(logger)
return self
def remove_log(self, logger):
self.many.remove(logger)
return self
def clear_log(self):
self.many = []
def stop(self):
for m in self.many:
try:
m.stop()
except Exception, e:
pass
def write_profile(profile_settings, cprofiler):
from ..cnv import CNV
from .files import File
import pstats
p = pstats.Stats(cprofiler)
stats = [{
"num_calls": d[1],
"self_time": d[2],
"total_time": d[3],
"self_time_per_call": d[2] / d[1],
"total_time_per_call": d[3] / d[1],
"file": (f[0] if f[0] != "~" else "").replace("\\", "/"),
"line": f[1],
"method": f[2].lstrip("<").rstrip(">")
}
for f, d, in p.stats.iteritems()
]
stats_file = File(profile_settings.filename, suffix=CNV.datetime2string(datetime.now(), "_%Y%m%d_%H%M%S"))
stats_file.write(CNV.list2tab(stats))
if not Log.main_log:
from log_usingStream import Log_usingStream
Log.main_log = Log_usingStream("sys.stdout")

93
tests/util/env/profiles.py поставляемый Normal file
Просмотреть файл

@ -0,0 +1,93 @@
# encoding: utf-8
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
from datetime import datetime
from time import clock
from ..collections import MAX
from ..struct import Struct
ON = False
profiles = {}
class Profiler(object):
"""
"""
def __new__(cls, *args):
if ON:
output = profiles.get(args[0], None)
if output:
return output
output = object.__new__(cls, *args)
return output
def __init__(self, description):
from ..queries.windows import Stats
if ON and not hasattr(self, "description"):
self.description = description
self.samples = []
self.stats = Stats()()
profiles[description] = self
def __enter__(self):
if ON:
self.start = clock()
return self
def __exit__(self, type, value, traceback):
if ON:
self.end = clock()
duration = self.end - self.start
from util.queries.windows import Stats
self.stats.add(duration)
if self.samples is not None:
self.samples.append(duration)
if len(self.samples) > 100:
self.samples = None
def write(profile_settings):
from ..cnv import CNV
from .files import File
profs = list(profiles.values())
for p in profs:
p.stats = p.stats.end()
stats = [{
"description": p.description,
"num_calls": p.stats.count,
"total_time": p.stats.count * p.stats.mean,
"total_time_per_call": p.stats.mean
}
for p in profs if p.stats.count > 0
]
stats_file = File(profile_settings.filename, suffix=CNV.datetime2string(datetime.now(), "_%Y%m%d_%H%M%S"))
if stats:
stats_file.write(CNV.list2tab(stats))
else:
stats_file.write("<no profiles>")
stats_file2 = File(profile_settings.filename, suffix=CNV.datetime2string(datetime.now(), "_series_%Y%m%d_%H%M%S"))
r = range(MAX([len(p.samples) for p in profs]))
profs.insert(0, Struct(description="index", samples=r))
stats = [
{p.description: p.samples[i] for p in profs if p.samples}
for i in r
]
if stats:
stats_file2.write(CNV.list2tab(stats))

158
tests/util/env/startup.py поставляемый Normal file
Просмотреть файл

@ -0,0 +1,158 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
import argparse
import os
import tempfile
import sys
from ..structs.wraps import listwrap, wrap, unwrap
from ..cnv import CNV
from ..env.logs import Log
from ..env.files import File
#PARAMETERS MATCH argparse.ArgumentParser.add_argument()
#http://docs.python.org/dev/library/argparse.html#the-add-argument-method
#name or flags - Either a name or a list of option strings, e.g. foo or -f, --foo.
#action - The basic type of action to be taken when this argument is encountered at the command line.
#nargs - The number of command-line arguments that should be consumed.
#const - A constant value required by some action and nargs selections.
#default - The value produced if the argument is absent from the command line.
#type - The type to which the command-line argument should be converted.
#choices - A container of the allowable values for the argument.
#required - Whether or not the command-line option may be omitted (optionals only).
#help - A brief description of what the argument does.
#metavar - A name for the argument in usage messages.
#dest - The name of the attribute to be added to the object returned by parse_args().
def _argparse(defs):
parser = argparse.ArgumentParser()
for d in listwrap(defs):
args = d.copy()
name = args.name
args.name = None
parser.add_argument(*unwrap(listwrap(name)), **unwrap(args))
namespace = parser.parse_args()
output = {k: getattr(namespace, k) for k in vars(namespace)}
return wrap(output)
def read_settings(filename=None, defs=None):
# READ SETTINGS
if filename:
settings_file = File(filename)
if not settings_file.exists:
Log.error("Can not file settings file {{filename}}", {
"filename": settings_file.abspath
})
json = settings_file.read()
settings = CNV.JSON2object(json, flexible=True)
if defs:
settings.args = _argparse(defs)
return settings
else:
defs = listwrap(defs)
defs.append({
"name": ["--settings", "--settings-file", "--settings_file"],
"help": "path to JSON file with settings",
"type": str,
"dest": "filename",
"default": "./settings.json",
"required": False
})
args = _argparse(defs)
settings_file = File(args.filename)
if not settings_file.exists:
Log.error("Can not read settings file {{filename}}", {
"filename": settings_file.abspath
})
json = settings_file.read()
settings = CNV.JSON2object(json, flexible=True)
settings.args = args
return settings
# snagged from https://github.com/pycontribs/tendo/blob/master/tendo/singleton.py (under licence PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2)
class SingleInstance:
"""
ONLY ONE INSTANCE OF PROGRAM ALLOWED
If you want to prevent your script from running in parallel just instantiate SingleInstance() class.
If is there another instance already running it will exist the application with the message
"Another instance is already running, quitting.", returning -1 error code.
me = SingleInstance()
This option is very useful if you have scripts executed by crontab at small amounts of time.
Remember that this works by creating a lock file with a filename based on the full path to the script file.
"""
def __init__(self, flavor_id=""):
self.initialized = False
appname = os.path.splitext(os.path.abspath(sys.argv[0]))[0]
basename = ((appname + '-%s') % flavor_id).replace("/", "-").replace(":", "").replace("\\", "-") + '.lock'
self.lockfile = os.path.normpath(tempfile.gettempdir() + '/' + basename)
def __enter__(self):
Log.note("SingleInstance.lockfile = " + self.lockfile)
if sys.platform == 'win32':
try:
# file already exists, we try to remove (in case previous execution was interrupted)
if os.path.exists(self.lockfile):
os.unlink(self.lockfile)
self.fd = os.open(self.lockfile, os.O_CREAT | os.O_EXCL | os.O_RDWR)
except Exception, e:
Log.note("\n"+
"**********************************************************************\n"+
"** Another instance is already running, quitting.\n"+
"**********************************************************************\n"
)
sys.exit(-1)
else: # non Windows
import fcntl
self.fp = open(self.lockfile, 'w')
try:
fcntl.lockf(self.fp, fcntl.LOCK_EX | fcntl.LOCK_NB)
except IOError:
Log.note("\n"+
"**********************************************************************\n"+
"** Another instance is already running, quitting.\n"+
"**********************************************************************\n"
)
sys.exit(-1)
self.initialized = True
def __exit__(self, type, value, traceback):
self.__del__()
def __del__(self):
import sys
import os
temp, self.initialized = self.initialized, False
if not temp:
return
try:
if sys.platform == 'win32':
if hasattr(self, 'fd'):
os.close(self.fd)
os.unlink(self.lockfile)
else:
import fcntl
fcntl.lockf(self.fp, fcntl.LOCK_UN)
if os.path.isfile(self.lockfile):
os.unlink(self.lockfile)
except Exception as e:
Log.warning("Problem with SingleInstance __del__()", e)
sys.exit(-1)

Просмотреть файл

@ -1,143 +0,0 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
import codecs
from datetime import datetime
import io
import os
import shutil
from .struct import listwrap, nvl
from .cnv import CNV
class File(object):
def __init__(self, filename, buffering=2 ** 14):
if filename == None:
from .logs import Log
Log.error("File must be given a filename")
#USE UNIX STANDARD
self._filename = "/".join(filename.split(os.sep))
self.buffering = buffering
@property
def filename(self):
return self._filename.replace("/", os.sep)
@property
def abspath(self):
return os.path.abspath(self._filename)
def backup_name(self, timestamp=None):
"""
RETURN A FILENAME THAT CAN SERVE AS A BACKUP FOR THIS FILE
"""
suffix = CNV.datetime2string(nvl(timestamp, datetime.now()), "%Y%m%d_%H%M%S")
parts = self._filename.split(".")
if len(parts) == 1:
output = self._filename + "." + suffix
elif len(parts) > 1 and parts[-2][-1] == "/":
output = self._filename + "." + suffix
else:
parts.insert(-1, suffix)
output = ".".join(parts)
return output
def read(self, encoding="utf-8"):
with codecs.open(self._filename, "r", encoding=encoding) as file:
return file.read()
def read_ascii(self):
if not self.parent.exists: self.parent.create()
with open(self._filename, "r") as file:
return file.read()
def write_ascii(self, content):
if not self.parent.exists: self.parent.create()
with open(self._filename, "w") as file:
file.write(content)
def write(self, data):
if not self.parent.exists: self.parent.create()
with open(self._filename, "w") as file:
for d in listwrap(data):
file.write(d)
def __iter__(self):
#NOT SURE HOW TO MAXIMIZE FILE READ SPEED
#http://stackoverflow.com/questions/8009882/how-to-read-large-file-line-by-line-in-python
#http://effbot.org/zone/wide-finder.htm
def output():
with io.open(self._filename, "rb") as f:
for line in f:
yield line.decode("utf-8")
return output()
def append(self, content):
if not self.parent.exists:
self.parent.create()
with open(self._filename, "a") as output_file:
output_file.write(content)
def add(self, content):
return self.append(content)
def extend(self, content):
if not self.parent.exists:
self.parent.create()
with open(self._filename, "a") as output_file:
for c in content:
output_file.write(c)
def delete(self):
try:
if os.path.isdir(self._filename):
shutil.rmtree(self._filename)
elif os.path.isfile(self._filename):
os.remove(self._filename)
return self
except Exception, e:
if e.strerror=="The system cannot find the path specified":
return
from .logs import Log
Log.error("Could not remove file", e)
def backup(self):
names=self._filename.split("/")[-1].split(".")
if len(names)==1:
backup=File(self._filename+".backup "+datetime.utcnow().strftime("%Y%m%d %H%i%s"))
def create(self):
try:
os.makedirs(self._filename)
except Exception, e:
from .logs import Log
Log.error("Could not make directory {{dir_name}}", {"dir_name":self._filename}, e)
@property
def parent(self):
return File("/".join(self._filename.split("/")[:-1]))
@property
def exists(self):
if self._filename in ["", "."]: return True
try:
return os.path.exists(self._filename)
except Exception, e:
return False

446
tests/util/json_decoder.py Normal file
Просмотреть файл

@ -0,0 +1,446 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
import json
from .jsons import json_encoder, use_pypy, UnicodeBuilder
from .struct import StructList, Null, EmptyList
from .structs.wraps import wrap, wrap_dot
DEBUG = True
# PARSE MODES
ARRAY = 1 # PARSING INSIDE AN ARRAY
VALUE = 3 # PARSING PROPERTY VALUE
OBJECT = 4 # PARSING PROPERTY NAME
def decode(json):
"""
THIS IS CURRENTLY 50% SLOWER THAN PyPy DEFAULT IMPLEMENTATION
THE INTENT IS TO NEVER ACTUALLY PARSE ARRAYS OF PRIMITIVE VALUES, RATHER FIND
THE START AND END OF THOSE ARRAYS AND SIMPLY STRING COPY THEM TO THE
INEVITABLE JSON OUTPUT
"""
var = ""
curr = StructList()
mode = ARRAY
stack = StructList()
# FIRST PASS SIMPLY GETS STRUCTURE
i = 0
while i < len(json):
c = json[i]
i += 1
if mode == ARRAY:
if c in [" ", "\t", "\n", "\r", ","]:
pass
elif c == "]":
curr = stack.pop()
if isinstance(curr, dict):
mode = OBJECT
else:
mode = ARRAY
elif c == "[":
i, arr = jump_array(i, json)
if arr is None:
arr = []
stack.append(curr)
curr.append(arr)
curr = arr
mode = ARRAY
else:
curr.append(arr)
elif c == "{":
obj = {}
stack.append(curr)
curr.append(obj)
curr = obj
mode = OBJECT
elif c == "\"":
i, val = fast_parse_string(i, json)
curr.children.append(val)
else:
i, val = parse_const(i, json)
elif mode == OBJECT:
if c in [" ", "\t", "\n", "\r", ","]:
pass
elif c == ":":
mode = VALUE
elif c == "}":
curr = stack.pop()
if isinstance(curr, dict):
mode = OBJECT
else:
mode = ARRAY
elif c == "\"":
i, var = fast_parse_string(i, json)
elif mode == VALUE:
if c in [" ", "\t", "\n", "\r"]:
pass
elif c == "}":
curr = stack.pop()
if isinstance(curr, dict):
mode = OBJECT
else:
mode = ARRAY
elif c == "[":
i, arr = jump_array(i, json)
if arr is None:
arr = []
stack.append(curr)
curr[var] = arr
curr = arr
mode = ARRAY
else:
curr[var] = arr
mode = OBJECT
elif c == "{":
obj = {}
stack.append(curr)
curr[var] = obj
curr = obj
mode = OBJECT
elif c == "\"":
i, val = fast_parse_string(i, json)
curr[var] = val
mode = OBJECT
else:
i, val = parse_const(i, json)
curr[var] = val
mode = OBJECT
return curr[0]
def fast_parse_string(i, json):
simple = True
j = i
while True:
c = json[j]
j += 1
if c == "\"":
if simple:
return j, json[i:j-1]
else:
return parse_string(i, json)
elif c == "\\":
simple = False
c = json[j]
if c == "u":
j += 5
elif c in ["\"", "\\", "/", "b", "n", "f", "n", "t"]:
j += 1
else:
pass
ESC = {
"\"": "\"",
"\\": "\\",
"/": "/",
"b": "\b",
"r": "\r",
"f": "\f",
"n": "\n",
"t": "\t"
}
def parse_string(i, json):
j = i
output = UnicodeBuilder()
while True:
c = json[j]
if c == "\"":
return j + 1, output.build()
elif c == "\\":
j += 1
c = json[j]
if c == "u":
n = json[j:j + 4].decode('hex').decode('utf-8')
output.append(n)
j += 4
else:
try:
output.append(ESC[c])
except Exception, e:
output.append("\\")
output.append(c)
else:
output.append(c)
j += 1
def parse_array(i, json):
"""
ARRAY OF PRIMITIVES ARE SKIPPED, THIS IS WHERE WE PARSE THEM
"""
output = []
val = None
while True:
c = json[i]
i += 1
if c in [" ", "\n", "\r", "\t"]:
pass
elif c == ",":
output.append(val)
val = Null
elif c == "]":
if val is not None:
output.append(val)
return i, output
elif c == "[":
i, val = parse_array(i, json)
elif c == "\"":
i, val = parse_string(i, json)
else:
i, val = parse_const(i, json)
def jump_string(i, json):
while True:
c = json[i]
i += 1
if c == "\"":
return i
elif c == "\\":
c = json[i]
if c == "u":
i += 5
elif c in ["\"", "\\", "/", "b", "n", "f", "n", "t"]:
i += 1
else:
pass
def jump_array(i, json):
j = i
empty = True
depth = 0
while True:
c = json[j]
j += 1
if c == "{":
return i, None
elif c == "[":
depth += 1
elif c == "]":
if depth == 0:
if empty:
return j, []
else:
return j, JSONList(json, i-1, j)
else:
depth -= 1
elif c == "\"":
empty = False
j = jump_string(j, json)
elif c not in [" ", "\t", "\r", "\n"]:
empty = False
def parse_const(i, json):
try:
j = i
mode = int
while True:
c = json[j]
if c in [" ", "\t", "\n", "\r", ",", "}", "]"]:
const = json[i-1:j]
try:
val = {
"0": 0,
"-1": -1,
"1": 1,
"true": True,
"false": False,
"null": None
}[const]
except Exception:
val = mode(const)
return j, val
elif c in [".", "e", "E"]:
mode = float
j += 1
except Exception, e:
from .env.logs import Log
Log.error("Can not parse const", e)
class JSONList(object):
def __init__(self, json, s, e):
self.json = json
self.start = s
self.end = e
self.list = None
def _convert(self):
if self.list is None:
i, self.list = parse_array(self.start+1, self.json)
def __getitem__(self, index):
self._convert()
if isinstance(index, slice):
# IMPLEMENT FLAT SLICES (for i not in range(0, len(self)): assert self[i]==None)
if index.step is not None:
from .env.logs import Log
Log.error("slice step must be None, do not know how to deal with values")
length = len(self.list)
i = index.start
i = min(max(i, 0), length)
j = index.stop
if j is None:
j = length
else:
j = max(min(j, length), 0)
return StructList(self.list[i:j])
if index < 0 or len(self.list) <= index:
return Null
return wrap(self.list[index])
def __setitem__(self, i, y):
self._convert()
self.json = None
self.list[i] = unwrap(y)
def __iter__(self):
self._convert()
return (wrap(v) for v in self.list)
def __contains__(self, item):
self._convert()
return list.__contains__(self.list, item)
def append(self, val):
self._convert()
self.json = None
self.list.append(unwrap(val))
return self
def __str__(self):
return self.json[self.start:self.end]
def __len__(self):
self._convert()
return self.list.__len__()
def __getslice__(self, i, j):
from .env.logs import Log
Log.error("slicing is broken in Python 2.7: a[i:j] == a[i+len(a), j] sometimes. Use [start:stop:step]")
def copy(self):
if self.list is not None:
return list(self.list)
return JSONList(self.json, self.start, self.end)
def remove(self, x):
self._convert()
self.json = None
self.list.remove(x)
return self
def extend(self, values):
self._convert()
self.json = None
for v in values:
self.list.append(unwrap(v))
return self
def pop(self):
self._convert()
self.json = None
return wrap(self.list.pop())
def __add__(self, value):
self._convert()
output = list(self.list)
output.extend(value)
return StructList(vals=output)
def __or__(self, value):
self._convert()
output = list(self.list)
output.append(value)
return StructList(vals=output)
def __radd__(self, other):
self._convert()
output = list(other)
output.extend(self.list)
return StructList(vals=output)
def right(self, num=None):
"""
WITH SLICES BEING FLAT, WE NEED A SIMPLE WAY TO SLICE FROM THE RIGHT
"""
self._convert()
if num == None:
return StructList([self.list[-1]])
if num <= 0:
return EmptyList
return StructList(self.list[-num])
def leftBut(self, num):
"""
WITH SLICES BEING FLAT, WE NEED A SIMPLE WAY TO SLICE FROM THE LEFT [:-num:]
"""
self._convert()
if num == None:
return StructList([self.list[:-1:]])
if num <= 0:
return EmptyList
return StructList(self.list[:-num:])
def last(self):
"""
RETURN LAST ELEMENT IN StructList
"""
self._convert()
if self.list:
return wrap(self.list[-1])
return Null
def map(self, oper, includeNone=True):
self._convert()
if includeNone:
return StructList([oper(v) for v in self.list])
else:
return StructList([oper(v) for v in self.list if v != None])
def __json__(self):
if self.json is not None:
return self.json[self.start:self.end]
else:
return json_encoder(self)
if use_pypy:
json_decoder = decode
else:
import json
builtin_json_decoder = json.JSONDecoder().decode
json_decoder = builtin_json_decoder
if DEBUG:
json_decoder = decode

Просмотреть файл

@ -7,149 +7,199 @@
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
from datetime import datetime
import time
from decimal import Decimal
import json
from math import floor
import re
import time
from datetime import datetime, date, timedelta
from decimal import Decimal
import sys
from .collections import AND, MAX
from .struct import Struct, StructList
json_decoder = json.JSONDecoder().decode
# THIS FILE EXISTS TO SERVE AS A FAST REPLACEMENT FOR JSON ENCODING
# THE DEFAULT JSON ENCODERS CAN NOT HANDLE A DIVERSITY OF TYPES *AND* BE FAST
#
# 1) WHEN USING cPython, WE HAVE NO COMPILER OPTIMIZATIONS: THE BEST STRATEGY IS TO
# CONVERT THE MEMORY STRUCTURE TO STANDARD TYPES AND SEND TO THE INSANELY FAST
# DEFAULT JSON ENCODER
# 2) WHEN USING PYPY, WE USE CLEAR-AND-SIMPLE PROGRAMMING SO THE OPTIMIZER CAN DO
# ITS JOB. ALONG WITH THE UnicodeBuilder WE GET NEAR C SPEEDS
use_pypy = False
try:
# StringBuilder IS ABOUT 2x FASTER THAN list()
from __pypy__.builders import StringBuilder
# UnicodeBuilder IS ABOUT 2x FASTER THAN list()
# use_pypy = True
from __pypy__.builders import UnicodeBuilder
use_pypy = True
except Exception, e:
use_pypy = False
class StringBuilder(list):
if use_pypy:
sys.stdout.write("The PyPy JSON serializer is in use! Currently running CPython, not a good mix.")
class UnicodeBuilder(list):
def __init__(self, length=None):
list.__init__(self)
def build(self):
return u"".join(self)
append = StringBuilder.append
append = UnicodeBuilder.append
class PyPyJSONEncoder(object):
def encode(value, pretty=False):
"""
pypy DOES NOT OPTIMIZE GENERATOR CODE WELL
"""
def __init__(self):
object.__init__(self)
if pretty:
return pretty_json(value)
def encode(self, value, pretty=False):
if pretty:
return unicode(json.dumps(json_scrub(value), indent=4, sort_keys=True, separators=(',', ': ')))
_buffer = StringBuilder(1024)
try:
_buffer = UnicodeBuilder(1024)
_value2json(value, _buffer)
output = _buffer.build()
return output
except Exception, e:
#THE PRETTY JSON WILL PROVIDE MORE DETAIL ABOUT THE SERIALIZATION CONCERNS
from .env.logs import Log
Log.warning("Serialization of JSON problems", e)
try:
return pretty_json(value)
except Exception, f:
Log.error("problem serializing object", f)
class cPythonJSONEncoder(object):
def __init__(self):
object.__init__(self)
self.encoder = json.JSONEncoder(
skipkeys=False,
ensure_ascii=False, # DIFF FROM DEFAULTS
check_circular=True,
allow_nan=True,
indent=None,
separators=None,
encoding='utf-8',
default=None,
sort_keys=False
)
def encode(self, value, pretty=False):
if value == None:
return "null"
if pretty:
return unicode(json.dumps(json_scrub(value), indent=4, sort_keys=True, separators=(',', ': ')))
return unicode(json.dumps(json_scrub(value)))
# OH HUM, cPython with uJSON, OR pypy WITH BUILTIN JSON?
# http://liangnuren.wordpress.com/2012/08/13/python-json-performance/
# http://morepypy.blogspot.ca/2011/10/speeding-up-json-encoding-in-pypy.html
if use_pypy:
json_encoder = PyPyJSONEncoder()
json_decoder = json._default_decoder
else:
json_encoder = cPythonJSONEncoder()
json_decoder = json._default_decoder
return pretty_json(value)
return unicode(self.encoder.encode(json_scrub(value)))
def _value2json(value, _buffer):
if isinstance(value, basestring):
_string2json(value, _buffer)
elif value == None:
append(_buffer, "null")
if value == None:
append(_buffer, u"null")
return
elif value is True:
append(_buffer, 'true')
append(_buffer, u"true")
return
elif value is False:
append(_buffer, 'false')
elif isinstance(value, (int, long, Decimal)):
append(_buffer, str(value))
elif isinstance(value, float):
append(_buffer, repr(value))
elif isinstance(value, datetime):
append(_buffer, unicode(long(time.mktime(value.timetuple())*1000)))
elif isinstance(value, dict):
_dict2json(value, _buffer)
elif hasattr(value, '__iter__'):
append(_buffer, u"false")
return
type = value.__class__
if type in (dict, Struct):
if value:
_dict2json(value, _buffer)
else:
append(_buffer, u"{}")
elif type is str:
append(_buffer, u"\"")
v = value.decode("utf8")
for c in v:
append(_buffer, ESCAPE_DCT.get(c, c))
append(_buffer, u"\"")
elif type is unicode:
append(_buffer, u"\"")
for c in value:
append(_buffer, ESCAPE_DCT.get(c, c))
append(_buffer, u"\"")
elif type in (int, long, Decimal):
append(_buffer, unicode(value))
elif type is float:
append(_buffer, unicode(repr(value)))
elif type in (set, list, tuple, StructList):
_list2json(value, _buffer)
elif type is date:
append(_buffer, unicode(long(time.mktime(value.timetuple()) * 1000)))
elif type is datetime:
append(_buffer, unicode(long(time.mktime(value.timetuple()) * 1000)))
elif type is timedelta:
append(_buffer, "\"")
append(_buffer, unicode(value.total_seconds()))
append(_buffer, "second\"")
elif hasattr(value, '__json__'):
j = value.__json__()
append(_buffer, j)
elif hasattr(value, '__iter__'):
_iter2json(value, _buffer)
else:
raise Exception(repr(value)+" is not JSON serializable")
raise Exception(repr(value) + " is not JSON serializable")
def _list2json(value, _buffer):
append(_buffer, "[")
first = True
if not value:
append(_buffer, u"[]")
else:
sep = u"["
for v in value:
append(_buffer, sep)
sep = u", "
_value2json(v, _buffer)
append(_buffer, u"]")
def _iter2json(value, _buffer):
append(_buffer, u"[")
sep = u""
for v in value:
if first:
first = False
else:
append(_buffer, ", ")
append(_buffer, sep)
sep = u", "
_value2json(v, _buffer)
append(_buffer, "]")
append(_buffer, u"]")
def _dict2json(value, _buffer):
items = value.iteritems()
append(_buffer, "{")
first = True
prefix = u"{\""
for k, v in value.iteritems():
if first:
first = False
else:
append(_buffer, ", ")
_string2json(unicode(k), _buffer)
append(_buffer, ": ")
append(_buffer, prefix)
prefix = u", \""
if isinstance(k, str):
k = k.decode("utf8")
for c in k:
append(_buffer, ESCAPE_DCT.get(c, c))
append(_buffer, u"\": ")
_value2json(v, _buffer)
append(_buffer, "}")
append(_buffer, u"}")
special_find = u"\\\"\t\n\r".find
replacement = [u"\\\\", u"\\\"", u"\\t", u"\\n", u"\\r"]
ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
ESCAPE_DCT = {
'\\': '\\\\',
'"': '\\"',
'\b': '\\b',
'\f': '\\f',
'\n': '\\n',
'\r': '\\r',
'\t': '\\t',
u"\\": u"\\\\",
u"\"": u"\\\"",
u"\b": u"\\b",
u"\f": u"\\f",
u"\n": u"\\n",
u"\r": u"\\r",
u"\t": u"\\t",
}
for i in range(0x20):
ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
def _string2json(value, _buffer):
def replace(match):
return ESCAPE_DCT[match.group(0)]
append(_buffer, "\"")
append(_buffer, ESCAPE.sub(replace, value))
append(_buffer, "\"")
ESCAPE_DCT.setdefault(chr(i), u'\\u{0:04x}'.format(i))
#REMOVE VALUES THAT CAN NOT BE JSON-IZED
@ -160,25 +210,240 @@ def json_scrub(value):
def _scrub(value):
if value == None:
return None
elif isinstance(value, datetime):
return long(time.mktime(value.timetuple())*1000)
type = value.__class__
if type in (date, datetime):
return datetime2milli(value)
elif type is timedelta:
return unicode(value.total_seconds()) + "second"
elif type is str:
return unicode(value.decode("utf8"))
elif type is Decimal:
return float(value)
elif isinstance(value, dict):
output = {}
for k, v in value.iteritems():
v = _scrub(v)
output[k] = v
return output
elif type in (list, StructList):
output = []
for v in value:
v = _scrub(v)
output.append(v)
return output
elif type.__name__ == "bool_": # DEAR ME! Numpy has it's own booleans (value==False could be used, but 0==False in Python. DOH!)
if value == False:
return False
else:
return True
elif hasattr(value, '__json__'):
try:
return json._default_decoder.decode(value.__json__())
except Exception, e:
from .env.logs import Log
Log.error("problem with calling __json__()", e)
elif hasattr(value, '__iter__'):
output = []
for v in value:
v = _scrub(v)
output.append(v)
return output
elif isinstance(value, Decimal):
return float(value)
else:
return value
ARRAY_ROW_LENGTH = 80
ARRAY_ITEM_MAX_LENGTH = 30
ARRAY_MAX_COLUMNS = 10
INDENT = " "
def pretty_json(value):
try:
if value == None:
return "null"
elif isinstance(value, basestring):
if isinstance(value, str):
try:
value = value.decode("utf8")
except Exception, e:
from .env.logs import Log
value = unicode(value.decode("latin1"))
Log.warning("Should not have latin1 encoded strings: {{value}}", {"value": value}, e)
try:
return quote(value)
except Exception, e:
from .env.logs import Log
try:
Log.note("try explicit convert of string with length {{length}}", {"length": len(value)})
acc = [u"\""]
for c in value:
try:
try:
c2 = ESCAPE_DCT[c]
except Exception, h:
c2 = c
c3 = unicode(c2)
acc.append(c3)
except BaseException, g:
pass
# Log.warning("odd character {{ord}} found in string. Ignored.", {"ord": ord(c)}, g)
acc.append(u"\"")
output = u"".join(acc)
Log.note("return value of length {{length}}", {"length": len(output)})
return output
except BaseException, f:
Log.warning("can not even explicit convert", f)
return "null"
elif isinstance(value, dict):
try:
if not value:
return "{}"
items = list(value.items())
if len(items) == 1:
return "{\"" + items[0][0] + "\": " + pretty_json(items[0][1]).strip() + "}"
items = sorted(items, lambda a, b: value_compare(a[0], b[0]))
values = [quote(k)+": " + indent(pretty_json(v)).strip() for k, v in items if v != None]
return "{\n" + INDENT + (",\n"+INDENT).join(values) + "\n}"
except Exception, e:
from .env.logs import Log
from .collections import OR
if OR(not isinstance(k, basestring) for k in value.keys()):
Log.error("JSON must have string keys: {{keys}}:", {
"keys": [k for k in value.keys()]
}, e)
Log.error("problem making dict pretty: keys={{keys}}:", {
"keys": [k for k in value.keys()]
}, e)
elif isinstance(value, list):
if not value:
return "[]"
if ARRAY_MAX_COLUMNS==1:
return "[\n" + ",\n".join([indent(pretty_json(v)) for v in value]) + "\n]"
if len(value) == 1:
j = pretty_json(value[0])
if j.find("\n") >= 0:
return "[\n" + indent(j) + "\n]"
else:
return "[" + j + "]"
js = [pretty_json(v) for v in value]
max_len = MAX(len(j) for j in js)
if max_len <= ARRAY_ITEM_MAX_LENGTH and AND(j.find("\n") == -1 for j in js):
#ALL TINY VALUES
num_columns = max(1, min(ARRAY_MAX_COLUMNS, int(floor((ARRAY_ROW_LENGTH + 2.0)/float(max_len+2))))) # +2 TO COMPENSATE FOR COMMAS
if len(js)<=num_columns: # DO NOT ADD \n IF ONLY ONE ROW
return "[" + ", ".join(js) + "]"
if num_columns == 1: # DO NOT rjust IF THERE IS ONLY ONE COLUMN
return "[\n" + ",\n".join([indent(pretty_json(v)) for v in value]) + "\n]"
content = ",\n".join(
", ".join(j.rjust(max_len) for j in js[r:r+num_columns])
for r in xrange(0, len(js), num_columns)
)
return "[\n" + indent(content) + "\n]"
pretty_list = [pretty_json(v) for v in value]
output = "[\n"
for i, p in enumerate(pretty_list):
try:
if i > 0:
output += ",\n"
output += indent(p)
except Exception, e:
from .env.logs import Log
Log.warning("problem concatenating string of length {{len1}} and {{len2}}", {
"len1": len(output),
"len2": len(p)
})
return output + "\n]"
elif hasattr(value, '__json__'):
j = value.__json__()
if j == None:
return " null " # TODO: FIND OUT WHAT CAUSES THIS
return pretty_json(json_decoder(j))
elif hasattr(value, '__iter__'):
return pretty_json(list(value))
else:
return encode(value)
except Exception, e:
from .env.logs import Log
Log.error("Problem turning value ({{value}}) to json", {"value": repr(value)}, e)
ESCAPE = re.compile(ur'[\x00-\x1f\\"\b\f\n\r\t]')
def replace(match):
return ESCAPE_DCT[match.group(0)]
def quote(value):
return "\""+ESCAPE.sub(replace, value)+"\""
def indent(value, prefix=INDENT):
try:
content = value.rstrip()
suffix = value[len(content):]
lines = content.splitlines()
return prefix + (u"\n" + prefix).join(lines) + suffix
except Exception, e:
raise Exception(u"Problem with indent of value (" + e.message + u")\n" + value)
def value_compare(a, b):
if a == None:
if b == None:
return 0
return -1
elif b == None:
return 1
if a > b:
return 1
elif a < b:
return -1
else:
return 0
def datetime2milli(d):
try:
if d == None:
return None
elif isinstance(d, datetime):
epoch = datetime(1970, 1, 1)
elif isinstance(d, date):
epoch = date(1970, 1, 1)
else:
raise Exception("Can not convert "+repr(d)+" to json")
diff = d - epoch
return long(diff.total_seconds()) * 1000L + long(diff.microseconds / 1000)
except Exception, e:
raise Exception("Can not convert "+repr(d)+" to json", e)
# OH HUM, cPython with uJSON, OR pypy WITH BUILTIN JSON?
# http://liangnuren.wordpress.com/2012/08/13/python-json-performance/
# http://morepypy.blogspot.ca/2011/10/speeding-up-json-encoding-in-pypy.html
if use_pypy:
json_encoder = encode
else:
json_encoder = cPythonJSONEncoder().encode

Просмотреть файл

@ -1,480 +0,0 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from datetime import datetime, timedelta
import traceback
import logging
import sys
from .struct import listwrap, nvl
import struct, threads
from .strings import indent, expand_template
from .threads import Thread
DEBUG_LOGGING = False
ERROR="ERROR"
WARNING="WARNING"
NOTE="NOTE"
main_log = None
logging_multi = None
class Log(object):
"""
FOR STRUCTURED LOGGING AND EXCEPTION CHAINING
"""
@classmethod
def new_instance(cls, settings):
settings=struct.wrap(settings)
if settings["class"]:
if not settings["class"].startswith("logging.handlers."):
return make_log_from_settings(settings)
# elif settings["class"]=="sys.stdout":
#CAN BE SUPER SLOW
else:
return Log_usingLogger(settings)
if settings.file: return Log_usingFile(file)
if settings.filename: return Log_usingFile(settings.filename)
if settings.stream: return Log_usingStream(settings.stream)
@classmethod
def add_log(cls, log):
logging_multi.add_log(log)
@staticmethod
def debug(template=None, params=None):
"""
USE THIS FOR DEBUGGING (AND EVENTUAL REMOVAL)
"""
Log.note(nvl(template, ""), params)
@staticmethod
def println(template, params=None):
Log.note(template, params)
@staticmethod
def note(template, params=None):
template="{{log_timestamp}} - "+template
params = nvl(params, {}).copy()
#NICE TO GATHER MANY MORE ITEMS FOR LOGGING (LIKE STACK TRACES AND LINE NUMBERS)
params["log_timestamp"]=datetime.utcnow().strftime("%H:%M:%S")
main_log.write(template, params)
@staticmethod
def warning(template, params=None, cause=None):
if isinstance(params, BaseException):
cause=params
params = None
if cause and not isinstance(cause, Except):
cause=Except(WARNING, unicode(cause), trace=format_trace(traceback.extract_tb(sys.exc_info()[2]), 0))
e = Except(WARNING, template, params, cause, format_trace(traceback.extract_stack(), 1))
Log.note(unicode(e))
#raise an exception with a trace for the cause too
@staticmethod
def error(
template, #human readable template
params=None, #parameters for template
cause=None, #pausible cause
offset=0 #stack trace offset (==1 if you do not want to report self)
):
if params and isinstance(struct.listwrap(params)[0], BaseException):
cause=params
params = None
if cause == None:
cause = []
elif isinstance(cause, list):
pass
elif isinstance(cause, Except):
cause = [cause]
else:
cause = [Except(ERROR, unicode(cause), trace=format_trace(traceback.extract_tb(sys.exc_info()[2]), offset))]
trace=format_trace(traceback.extract_stack(), 1+offset)
e=Except(ERROR, template, params, cause, trace)
raise e
#RUN ME FIRST TO SETUP THE THREADED LOGGING
@staticmethod
def start(settings=None):
##http://victorlin.me/2012/08/good-logging-practice-in-python/
if not settings: return
if not settings.log: return
globals()["logging_multi"]=Log_usingMulti()
globals()["main_log"] = Log_usingThread(logging_multi)
for log in listwrap(settings.log):
Log.add_log(Log.new_instance(log))
@staticmethod
def stop():
main_log.stop()
def write(self):
Log.error("not implemented")
def format_trace(tbs, trim=0):
tbs.reverse()
list = []
for filename, lineno, name, line in tbs[trim:]:
item = 'at File "%s", line %d, in %s\n' % (filename.replace("\\", "/"), lineno, name)
list.append(item)
return "".join(list)
#def format_trace(tb, trim=0):
# list = []
# for filename, lineno, name, line in traceback.extract_tb(tb)[0:-trim]:
# item = 'File "%s", line %d, in %s\n' % (filename,lineno,name)
# if line:
# item = item + '\t%s\n' % line.strip()
# list.append(item)
# return "".join(list)
class Except(Exception):
def __init__(self, type=ERROR, template=None, params=None, cause=None, trace=None):
super(Exception, self).__init__(self)
self.type=type
self.template=template
self.params=params
self.cause=cause
self.trace=trace
@property
def message(self):
return unicode(self)
def contains(self, value):
if self.type==value:
return True
for c in self.cause:
if c.contains(value):
return True
return False
def __str__(self):
output=self.type+": "+self.template
if self.params: output=expand_template(output, self.params)
if self.trace:
output+="\n"+indent(self.trace)
if self.cause:
output+="\ncaused by\n\t"+"\nand caused by\n\t".join([c.__str__() for c in self.cause])
return output+"\n"
class BaseLog(object):
def write(self, template, params):
pass
def stop(self):
pass
class Log_usingFile(BaseLog):
def __init__(self, file):
assert file
from files import File
self.file=File(file)
if self.file.exists:
self.file.backup()
self.file.delete()
self.file_lock=threads.Lock()
def write(self, template, params):
from files import File
with self.file_lock:
File(self.filename).append(expand_template(template, params))
#WRAP PYTHON CLASSIC logger OBJECTS
class Log_usingLogger(BaseLog):
def __init__(self, settings):
self.logger=logging.Logger("unique name", level=logging.INFO)
self.logger.addHandler(make_log_from_settings(settings))
# TURNS OUT LOGGERS ARE REALLY SLOW TOO
self.queue = threads.Queue()
self.thread = Thread("log to logger", time_delta_pusher, appender=self.logger.info, queue=self.queue, interval=timedelta(seconds=0.3))
self.thread.start()
def write(self, template, params):
# http://docs.python.org/2/library/logging.html#logging.LogRecord
self.queue.add({"template": template, "params": params})
def stop(self):
try:
if DEBUG_LOGGING:
sys.stdout.write("Log_usingLogger sees stop, adding stop to queue\n")
self.queue.add(Thread.STOP) #BE PATIENT, LET REST OF MESSAGE BE SENT
self.thread.join()
if DEBUG_LOGGING:
sys.stdout.write("Log_usingLogger done\n")
except Exception, e:
pass
try:
self.queue.close()
except Exception, f:
pass
def make_log_from_settings(settings):
assert settings["class"]
# IMPORT MODULE FOR HANDLER
path=settings["class"].split(".")
class_name=path[-1]
path=".".join(path[:-1])
temp=__import__(path, globals(), locals(), [class_name], -1)
constructor=object.__getattribute__(temp, class_name)
#IF WE NEED A FILE, MAKE SURE DIRECTORY EXISTS
if settings.filename:
from files import File
f = File(settings.filename)
if not f.parent.exists:
f.parent.create()
params = settings.dict
del params['class']
return constructor(**params)
def time_delta_pusher(please_stop, appender, queue, interval):
"""
appender - THE FUNCTION THAT ACCEPTS A STRING
queue - FILLED WITH LINES TO WRITE
interval - timedelta
USE IN A THREAD TO BATCH LOGS BY TIME INTERVAL
"""
if not isinstance(interval, timedelta):
Log.error("Expecting interval to be a timedelta")
next_run = datetime.utcnow() + interval
while not please_stop:
Thread.sleep(till=next_run)
next_run = datetime.utcnow() + interval
logs = queue.pop_all()
if logs:
lines = []
for log in logs:
try:
if log == Thread.STOP:
please_stop.go()
next_run = datetime.utcnow()
else:
lines.append(expand_template(log.get("template", None), log.get("params", None)))
except Exception, e:
if DEBUG_LOGGING:
sys.stdout.write("Trouble formatting logs: "+e.message)
raise e
try:
if DEBUG_LOGGING and please_stop:
sys.stdout.write("Last call to appender with "+str(len(lines))+" lines\n")
appender(u"\n".join(lines)+u"\n")
if DEBUG_LOGGING and please_stop:
sys.stdout.write("Done call to appender with "+str(len(lines))+" lines\n")
except Exception, e:
if DEBUG_LOGGING:
sys.stdout.write("Trouble with appender: "+e.message)
raise e
class Log_usingStream(BaseLog):
#stream CAN BE AN OBJCET WITH write() METHOD, OR A STRING
#WHICH WILL eval() TO ONE
def __init__(self, stream):
assert stream
use_UTF8 = False
if isinstance(stream, basestring):
if stream.startswith("sys."):
use_UTF8 = True #sys.* ARE OLD AND CAN NOT HANDLE unicode
self.stream = eval(stream)
name = stream
else:
self.stream = stream
name = "stream"
#WRITE TO STREAMS CAN BE *REALLY* SLOW, WE WILL USE A THREAD
from threads import Queue
if use_UTF8:
def utf8_appender(value):
if isinstance(value, unicode):
value = value.encode('utf-8')
self.stream.write(value)
appender = utf8_appender
else:
appender = self.stream.write
self.queue = Queue()
self.thread = Thread("log to " + name, time_delta_pusher, appender=appender, queue=self.queue, interval=timedelta(seconds=0.3))
self.thread.start()
def write(self, template, params):
try:
self.queue.add({"template": template, "params": params})
return self
except Exception, e:
raise e #OH NO!
def stop(self):
try:
if DEBUG_LOGGING:
sys.stdout.write("Log_usingStream sees stop, adding stop to queue\n")
self.queue.add(Thread.STOP) #BE PATIENT, LET REST OF MESSAGE BE SENT
self.thread.join()
if DEBUG_LOGGING:
sys.stdout.write("Log_usingStream done\n")
except Exception, e:
if DEBUG_LOGGING:
raise e
try:
self.queue.close()
except Exception, f:
if DEBUG_LOGGING:
raise f
class Log_usingThread(BaseLog):
def __init__(self, logger):
#DELAYED LOAD FOR THREADS MODULE
from threads import Queue
self.queue=Queue()
self.logger=logger
def worker(please_stop):
while not please_stop:
Thread.sleep(1)
logs = self.queue.pop_all()
for log in logs:
if log==Thread.STOP:
if DEBUG_LOGGING:
sys.stdout.write("Log_usingThread.worker() sees stop, filling rest of queue\n")
please_stop.go()
else:
self.logger.write(**log)
self.thread=Thread("log thread", worker)
self.thread.start()
def write(self, template, params):
try:
self.queue.add({"template":template, "params":params})
return self
except Exception, e:
sys.stdout.write("IF YOU SEE THIS, IT IS LIKELY YOU FORGOT TO RUN Log.start() FIRST\n")
raise e #OH NO!
def stop(self):
try:
if DEBUG_LOGGING:
sys.stdout.write("injecting stop into queue\n")
self.queue.add(Thread.STOP) #BE PATIENT, LET REST OF MESSAGE BE SENT
self.thread.join()
if DEBUG_LOGGING:
sys.stdout.write("Log_usingThread telling logger to stop\n")
self.logger.stop()
except Exception, e:
if DEBUG_LOGGING:
raise e
try:
self.queue.close()
except Exception, f:
if DEBUG_LOGGING:
raise f
class Log_usingMulti(BaseLog):
def __init__(self):
self.many=[]
def write(self, template, params):
for m in self.many:
try:
m.write(template, params)
except Exception, e:
pass
return self
def add_log(self, logger):
self.many.append(logger)
return self
def remove_log(self, logger):
self.many.remove(logger)
return self
def clear_log(self):
self.many=[]
def stop(self):
for m in self.many:
try:
m.stop()
except Exception, e:
pass
if not main_log:
main_log = Log_usingStream("sys.stdout")

Просмотреть файл

@ -1,115 +0,0 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
import math
from . import struct
from .struct import Null, nvl
from .logs import Log
from .strings import find_first
class Math(object):
@staticmethod
def bayesian_add(a, b):
if a>=1 or b>=1 or a<=0 or b<=0: Log.error("Only allowed values *between* zero and one")
return a*b/(a*b+(1-a)*(1-b))
# FOR GOODNESS SAKE - IF YOU PROVIDE A METHOD abs(), PLEASE PROVIDE IT'S COMPLEMENT
# x = abs(x)*sign(x)
# FOUND IN numpy, BUT WE USUALLY DO NOT NEED TO BRING IN A BIG LIB FOR A SIMPLE DECISION
@staticmethod
def sign(v):
if v<0: return -1
if v>0: return +1
return 0
@staticmethod
def is_number(s):
try:
float(s)
return True
except Exception:
return False
@staticmethod
def is_integer(s):
try:
if float(s)==round(float(s), 0):
return True
return False
except Exception:
return False
@staticmethod
def round_sci(value, decimal=None, digits=None):
if digits != None:
m=pow(10, math.floor(math.log10(digits)))
return round(value/m, digits)*m
return round(value, decimal)
@staticmethod
def floor(value, mod=None):
"""
x == floor(x, a) + mod(x, a) FOR ALL a
"""
mod = nvl(mod, 1)
v = int(math.floor(value))
return v - (v % mod)
#RETURN A VALUE CLOSE TO value, BUT WITH SHORTER len(unicode(value))<len(unicode(value)):
@staticmethod
def approx_str(value):
v=unicode(value)
d=v.find(".")
if d==-1: return value
i=find_first(v, ["9999", "0000"], d)
if i==-1: return value
return Math.round_sci(value, decimal=i-d-1)
@staticmethod
def min(values):
output = Null
for v in values:
if v == None:
continue
if math.isnan(v):
continue
if output == None:
output = v
continue
output = min(output, v)
return output
@staticmethod
def max(values):
output = Null
for v in values:
if v == None:
continue
if math.isnan(v):
continue
if output == None:
output = v
continue
output = max(output, v)
return output

Просмотреть файл

@ -0,0 +1,163 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
import math
import __builtin__
from ..struct import Null, nvl
from ..env.logs import Log
from ..strings import find_first
from . import stats
class Math(object):
"""
MATH FUNCTIONS THAT ASSUME None IMPLY *NOT APPLICABLE* RATHER THAN *MISSING*
LET "." BE SOME OPERATOR (+, -, *, etc)
a.None == None
None.a == None
.None == None
func(None, *kwargs)) == None
"""
@staticmethod
def bayesian_add(*args):
a = args[0]
if a >= 1 or a <= 0:
Log.error("Only allowed values *between* zero and one")
for b in args[1:]:
if b == None:
continue
if b >= 1 or b <= 0:
Log.error("Only allowed values *between* zero and one")
a = a * b / (a * b + (1 - a) * (1 - b))
return a
@staticmethod
def bayesian_subtract(a, b):
return Math.bayesian_add(a, 1 - b)
@staticmethod
def abs(v):
if v == None:
return Null
return abs(v)
@staticmethod
def log(v, base=None):
if v == None:
return Null
return math.log(v, base)
@staticmethod
def log10(v):
try:
return math.log(v, 10)
except Exception, e:
return Null
# FOR GOODNESS SAKE - IF YOU PROVIDE A METHOD abs(), PLEASE PROVIDE ITS COMPLEMENT
# x = abs(x)*sign(x)
# FOUND IN numpy, BUT WE USUALLY DO NOT NEED TO BRING IN A BIG LIB FOR A SIMPLE DECISION
@staticmethod
def sign(v):
if v == None:
return Null
if v < 0:
return -1
if v > 0:
return +1
return 0
@staticmethod
def is_number(s):
try:
float(s)
return True
except Exception:
return False
@staticmethod
def is_nan(s):
return math.isnan(s)
@staticmethod
def is_integer(s):
try:
if float(s) == round(float(s), 0):
return True
return False
except Exception:
return False
@staticmethod
def round(value, decimal=0, digits=None):
"""
ROUND TO GIVEN NUMBER OF DIGITS, OR GIVEN NUMBER OF DECIMAL PLACES
decimal - NUMBER OF SIGNIFICANT DIGITS (LESS THAN 1 IS INVALID)
digits - NUMBER OF DIGITS AFTER DECIMAL POINT (NEGATIVE IS VALID)
"""
if value == None:
return None
if digits != None:
m = pow(10, math.ceil(math.log10(value)))
return __builtin__.round(value / m, digits) * m
return __builtin__.round(value, decimal)
@staticmethod
def floor(value, mod=None):
"""
x == floor(x, a) + mod(x, a) FOR ALL a
"""
mod = nvl(mod, 1)
v = int(math.floor(value))
return v - (v % mod)
#RETURN A VALUE CLOSE TO value, BUT WITH SHORTER len(unicode(value))<len(unicode(value)):
@staticmethod
def approx_str(value):
v = unicode(value)
d = v.find(".")
if d == -1:
return value
if Math.round(value) == value:
return int(value)
i = find_first(v, ["9999", "0000"], d)
if i != -1:
Math.round(value, decimal=i - d - 1)
return value
@staticmethod
def ceiling(value):
return int(math.ceil(value))
@staticmethod
def max(*values):
output = None
for v in values:
if v == None:
continue
elif output == None or v > output:
output = v
else:
pass
return output

Просмотреть файл

@ -0,0 +1,90 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
from ..cnv import CNV
from ..env.logs import Log
from ..queries import Q
from ..struct import Struct
from ..maths.randoms import Random
from ..vendor.aespython import key_expander, aes_cipher, cbc_mode
DEBUG = False
def encrypt(text, _key, salt=None):
"""
RETURN JSON OF ENCRYPTED DATA {"salt":s, "length":l, "data":d}
"""
if not isinstance(text, unicode):
Log.error("only unicode is encrypted")
if _key is None:
Log.error("Expecting a key")
if salt is None:
salt = Random.bytes(16)
data = bytearray(text.encode("utf8"))
#Initialize encryption using key and iv
key_expander_256 = key_expander.KeyExpander(256)
expanded_key = key_expander_256.expand(_key)
aes_cipher_256 = aes_cipher.AESCipher(expanded_key)
aes_cbc_256 = cbc_mode.CBCMode(aes_cipher_256, 16)
aes_cbc_256.set_iv(salt)
output = Struct()
output.type = "AES256"
output.salt = CNV.bytearray2base64(salt)
output.length = len(data)
encrypted = bytearray()
for i, d in Q.groupby(data, size=16):
encrypted.extend(aes_cbc_256.encrypt_block(d))
output.data = CNV.bytearray2base64(encrypted)
json = CNV.object2JSON(output)
if DEBUG:
test = decrypt(json, _key)
if test != text:
Log.error("problem with encryption")
return json
def decrypt(data, _key):
"""
ACCEPT JSON OF ENCRYPTED DATA {"salt":s, "length":l, "data":d}
"""
#Key and iv have not been generated or provided, bail out
if _key is None:
Log.error("Expecting a key")
_input = CNV.JSON2object(data)
#Initialize encryption using key and iv
key_expander_256 = key_expander.KeyExpander(256)
expanded_key = key_expander_256.expand(_key)
aes_cipher_256 = aes_cipher.AESCipher(expanded_key)
aes_cbc_256 = cbc_mode.CBCMode(aes_cipher_256, 16)
aes_cbc_256.set_iv(CNV.base642bytearray(_input.salt))
raw = CNV.base642bytearray(_input.data)
out_data = bytearray()
for i, e in Q.groupby(raw, size=16):
out_data.extend(aes_cbc_256.decrypt_block(e))
return str(out_data[:_input.length:]).decode("utf8")

Просмотреть файл

@ -0,0 +1,56 @@
# encoding: utf-8
#
from __future__ import unicode_literals
import random
import string
SIMPLE_ALPHABET = string.ascii_letters + string.digits
SEED = random.Random()
class Random(object):
@staticmethod
def string(length, alphabet=SIMPLE_ALPHABET):
result = ''
for i in range(0, length):
result += SEED.choice(alphabet)
return result
@staticmethod
def hex(length):
return Random.string(length, string.digits + 'ABCDEF')
@staticmethod
def int(*args):
return random.randrange(*args)
@staticmethod
def float(*args):
if args:
return random.random()*args[0]
else:
return random.random()
@staticmethod
def sample(data, count):
num = len(data)
return [data[Random.int(num)] for i in range(count)]
@staticmethod
def combination(data):
output = []
data = list(data)
num = len(data)
for i in range(num):
n = Random.int(num-i)
output.append(data[n])
del data[n]
return output
@staticmethod
def bytes(count):
output = bytearray(random.randrange(256) for i in range(count))
return output

336
tests/util/maths/stats.py Normal file
Просмотреть файл

@ -0,0 +1,336 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
import sys
from ..vendor import strangman
from math import sqrt
from ..cnv import CNV
from ..collections import OR
from ..struct import nvl, Struct, Null
from ..env.logs import Log
DEBUG = True
DEBUG_STRANGMAN = True
EPSILON = 0.000000001
ABS_EPSILON = sys.float_info.min*2 # *2 FOR SAFETY
if DEBUG_STRANGMAN:
try:
import numpy
from scipy import stats
import scipy
except Exception, e:
DEBUG_STRANGMAN = False
def chisquare(f_obs, f_exp):
py_result = strangman.stats.chisquare(
f_obs,
f_exp
)
if DEBUG_STRANGMAN:
sp_result = scipy.stats.chisquare(
numpy.array(f_obs),
f_exp=numpy.array(f_exp)
)
if not closeEnough(sp_result[0], py_result[0]) and closeEnough(sp_result[1], py_result[1]):
Log.error("problem with stats lib")
return py_result
def stats2z_moment(stats):
# MODIFIED FROM http://statsmodels.sourceforge.net/devel/_modules/statsmodels/stats/moment_helpers.html
# ADDED count
mc0, mc1, mc2, skew, kurt = stats.count, nvl(stats.mean, 0), nvl(stats.variance, 0), nvl(stats.skew, 0), nvl(stats.kurtosis, 0)
mz0 = mc0
mz1 = mc1 * mc0
mz2 = (mc2 + mc1 * mc1) * mc0
mc3 = nvl(skew, 0) * (mc2 ** 1.5) # 3rd central moment
mz3 = (mc3 + 3 * mc1 * mc2 + mc1 ** 3) * mc0 # 3rd non-central moment
mc4 = (nvl(kurt, 0) + 3.0) * (mc2 ** 2.0) # 4th central moment
mz4 = (mc4 + 4 * mc1 * mc3 + 6 * mc1 * mc1 * mc2 + mc1 ** 4) * mc0
m = Z_moment(mz0, mz1, mz2, mz3, mz4)
if DEBUG:
globals()["DEBUG"] = False
try:
v = z_moment2stats(m, unbiased=False)
assert closeEnough(v.count, stats.count)
assert closeEnough(v.mean, stats.mean)
assert closeEnough(v.variance, stats.variance)
assert closeEnough(v.skew, stats.skew)
assert closeEnough(v.kurtosis, stats.vkurtosis)
except Exception, e:
v = z_moment2stats(m, unbiased=False)
Log.error("programmer error")
globals()["DEBUG"] = True
return m
def closeEnough(a, b):
if a == None and b == None:
return True
if a == None or b == None:
return False
if abs(a - b) < ABS_EPSILON:
return True
if abs(b) > abs(a):
err = abs((a - b) / b)
else:
err = abs((a - b) / a)
if err < EPSILON:
return True
return False
def z_moment2stats(z_moment, unbiased=True):
Z = z_moment.S
N = Z[0]
if N == 0:
return Stats()
mean = Z[1] / N
Z2 = Z[2] / N
Z3 = Z[3] / N
Z4 = Z[4] / N
if N == 1:
variance = None
skew = None
kurtosis = None
else:
variance = (Z2 - mean * mean)
error = -EPSILON * (abs(Z2) + 1) # EXPECTED FLOAT ERROR
if error < variance <= 0: # TODO: MAKE THIS A TEST ON SIGNIFICANT DIGITS
variance = 0
skew = None
kurtosis = None
elif variance < error:
Log.error("variance can not be negative ({{var}})", {"var": variance})
else:
mc3 = (Z3 - (3 * mean * variance + mean ** 3)) # 3rd central moment
mc4 = (Z4 - (4 * mean * mc3 + 6 * mean * mean * variance + mean ** 4))
skew = mc3 / (variance ** 1.5)
kurtosis = (mc4 / (variance ** 2.0)) - 3.0
stats = Stats(
count=N,
mean=mean,
variance=variance,
skew=skew,
kurtosis=kurtosis,
unbiased=unbiased
)
if DEBUG:
globals()["DEBUG"] = False
v=Null
try:
v = stats2z_moment(stats)
for i in range(5):
assert closeEnough(v.S[i], Z[i])
except Exception, e:
Log.error("Convertion failed. Programmer error:\nfrom={{from|indent}},\nresult stats={{stats|indent}},\nexpected parem={{expected|indent}}", {
"from": Z,
"stats": stats,
"expected": v.S
}, e)
globals()["DEBUG"] = True
return stats
class Stats(Struct):
def __init__(self, **kwargs):
Struct.__init__(self)
if "samples" in kwargs:
s = z_moment2stats(Z_moment.new_instance(kwargs["samples"]))
self.count = s.count
self.mean = s.mean
self.variance = s.variance
self.skew = s.skew
self.kurtosis = s.kurtosis
return
if "count" not in kwargs:
self.count = 0
self.mean = None
self.variance = None
self.skew = None
self.kurtosis = None
elif "mean" not in kwargs:
self.count = kwargs["count"]
self.mean = None
self.variance = None
self.skew = None
self.kurtosis = None
elif "variance" not in kwargs and "std" not in kwargs:
self.count = kwargs["count"]
self.mean = kwargs["mean"]
self.variance = 0
self.skew = None
self.kurtosis = None
elif "skew" not in kwargs:
self.count = kwargs["count"]
self.mean = kwargs["mean"]
self.variance = kwargs["variance"] if "variance" in kwargs else kwargs["std"] ** 2
self.skew = None
self.kurtosis = None
elif "kurtosis" not in kwargs:
self.count = kwargs["count"]
self.mean = kwargs["mean"]
self.variance = kwargs["variance"] if "variance" in kwargs else kwargs["std"] ** 2
self.skew = kwargs["skew"]
self.kurtosis = None
else:
self.count = kwargs["count"]
self.mean = kwargs["mean"]
self.variance = kwargs["variance"] if "variance" in kwargs else kwargs["std"] ** 2
self.skew = kwargs["skew"]
self.kurtosis = kwargs["kurtosis"]
self.unbiased = \
kwargs["unbiased"] if "unbiased" in kwargs else \
not kwargs["biased"] if "biased" in kwargs else \
False
@property
def std(self):
return sqrt(self.variance)
class Z_moment(object):
"""
ZERO-CENTERED MOMENTS
"""
def __init__(self, *args):
self.S = tuple(args)
def __add__(self, other):
return Z_moment(*map(add, self.S, other.S))
def __sub__(self, other):
return Z_moment(*map(sub, self.S, other.S))
@property
def tuple(self):
#RETURN AS ORDERED TUPLE
return self.S
@property
def dict(self):
#RETURN HASH OF SUMS
return {u"s" + unicode(i): m for i, m in enumerate(self.S)}
@staticmethod
def new_instance(values=None):
if values == None:
return Z_moment()
return Z_moment(
len(values),
sum([n for n in values]),
sum([pow(n, 2) for n in values]),
sum([pow(n, 3) for n in values]),
sum([pow(n, 4) for n in values])
)
def add(a, b):
return nvl(a, 0) + nvl(b, 0)
def sub(a, b):
return nvl(a, 0) - nvl(b, 0)
def z_moment2dict(z):
#RETURN HASH OF SUMS
return {u"s" + unicode(i): m for i, m in enumerate(z.S)}
setattr(CNV, "z_moment2dict", staticmethod(z_moment2dict))
def median(values, simple=True, mean_weight=0.0):
"""
RETURN MEDIAN VALUE
IF simple=False THEN IN THE EVENT MULTIPLE INSTANCES OF THE
MEDIAN VALUE, THE MEDIAN IS INTERPOLATED BASED ON ITS POSITION
IN THE MEDIAN RANGE
mean_weight IS TO PICK A MEDIAN VALUE IN THE ODD CASE THAT IS
CLOSER TO THE MEAN (PICK A MEDIAN BETWEEN TWO MODES IN BIMODAL CASE)
"""
if OR(v == None for v in values):
Log.error("median is not ready to handle None")
try:
if not values:
return Null
l = len(values)
_sorted = sorted(values)
middle = l / 2
_median = float(_sorted[middle])
if len(_sorted) == 1:
return _median
if simple:
if l % 2 == 0:
return float(_sorted[middle - 1] + _median) / 2
return _median
#FIND RANGE OF THE median
start_index = middle - 1
while start_index > 0 and _sorted[start_index] == _median:
start_index -= 1
start_index += 1
stop_index = middle + 1
while stop_index < l and _sorted[stop_index] == _median:
stop_index += 1
num_middle = stop_index - start_index
if l % 2 == 0:
if num_middle == 1:
return float(_sorted[middle - 1] + _median) / 2
else:
return (_median - 0.5) + float(middle - start_index) / float(num_middle)
else:
if num_middle == 1:
return (1 - mean_weight) * _median + mean_weight * (_sorted[middle - 1] + _sorted[middle + 1]) / 2
else:
return (_median - 0.5) + float(middle + 0.5 - start_index) / float(num_middle)
except Exception, e:
Log.error("problem with median of {{values}}", {"values": values}, e)
zero = Stats()

Просмотреть файл

@ -1,160 +0,0 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
import threading
from .struct import nvl
from .logs import Log
from .threads import Queue, Thread
DEBUG = True
class worker_thread(threading.Thread):
#in_queue MUST CONTAIN HASH OF PARAMETERS FOR load()
def __init__(self, name, in_queue, out_queue, function):
threading.Thread.__init__(self)
self.name=name
self.in_queue=in_queue
self.out_queue=out_queue
self.function=function
self.keep_running=True
self.num_runs=0
self.start()
#REQUIRED TO DETECT KEYBOARD, AND OTHER, INTERRUPTS
def join(self, timeout=None):
while self.isAlive():
Log.note("Waiting on thread {{thread}}", {"thread":self.name})
threading.Thread.join(self, nvl(timeout, 0.5))
def run(self):
got_stop=False
while self.keep_running:
request = self.in_queue.pop()
if request == Thread.STOP:
got_stop=True
if self.in_queue.queue:
Log.warning("programmer error")
break
if not self.keep_running:
break
try:
if DEBUG and hasattr(self.function, "func_name"):
Log.note("run {{function}}", {"function": self.function.func_name})
result = self.function(**request)
if self.out_queue != None:
self.out_queue.add({"response": result})
except Exception, e:
Log.warning("Can not execute with params={{params}}", {"params": request}, e)
if self.out_queue != None:
self.out_queue.add({"exception": e})
finally:
self.num_runs += 1
self.keep_running = False
if self.num_runs==0:
Log.warning("{{name}} thread did no work", {"name":self.name})
if DEBUG and self.num_runs!=1:
Log.note("{{name}} thread did {{num}} units of work", {
"name":self.name,
"num":self.num_runs
})
if got_stop and self.in_queue.queue:
Log.warning("multithread programmer error")
if DEBUG:
Log.note("{{thread}} DONE", {"thread":self.name})
def stop(self):
self.keep_running=False
#PASS A SET OF FUNCTIONS TO BE EXECUTED (ONE PER THREAD)
#PASS AN (ITERATOR/LIST) OF PARAMETERS TO BE ISSUED TO NEXT AVAILABLE THREAD
class Multithread(object):
def __init__(self, functions):
self.outbound=Queue()
self.inbound=Queue()
#MAKE THREADS
self.threads=[]
for t, f in enumerate(functions):
thread=worker_thread("worker "+unicode(t), self.inbound, self.outbound, f)
self.threads.append(thread)
def __enter__(self):
return self
#WAIT FOR ALL QUEUED WORK TO BE DONE BEFORE RETURNING
def __exit__(self, type, value, traceback):
try:
if isinstance(value, Exception):
self.inbound.close()
self.inbound.add(Thread.STOP)
self.join()
except Exception, e:
Log.warning("Problem sending stops", e)
#IF YOU SENT A stop(), OR Thread.STOP, YOU MAY WAIT FOR SHUTDOWN
def join(self):
try:
#WAIT FOR FINISH
for t in self.threads:
t.join()
except (KeyboardInterrupt, SystemExit):
Log.note("Shutdow Started, please be patient")
except Exception, e:
Log.error("Unusual shutdown!", e)
finally:
for t in self.threads:
t.keep_running=False
self.inbound.close()
self.outbound.close()
for t in self.threads:
t.join()
#RETURN A GENERATOR THAT HAS len(parameters) RESULTS (ANY ORDER)
def execute(self, request):
#FILL QUEUE WITH WORK
self.inbound.extend(request)
num=len(request)
def output():
for i in xrange(num):
result=self.outbound.pop()
if "exception" in result:
raise result["exception"]
else:
yield result["response"]
return output()
#EXTERNAL COMMAND THAT RETURNS IMMEDIATELY
def stop(self):
self.inbound.close() #SEND STOPS TO WAKE UP THE WORKERS WAITING ON inbound.pop()
for t in self.threads:
t.keep_running=False

16
tests/util/parsers.py Normal file
Просмотреть файл

@ -0,0 +1,16 @@
from urlparse import urlparse
from .struct import Struct
def URL(value):
output = urlparse(value)
return Struct(
protocol=output.scheme,
host=output.netloc,
port=output.port,
path=output.path,
query=output.query,
fragmen=output.fragment
)

739
tests/util/queries/MVEL.py Normal file
Просмотреть файл

@ -0,0 +1,739 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http:# mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
from datetime import datetime
import re
from .. import struct
from ..cnv import CNV
from ..collections import reverse
from ..env.logs import Log
from ..maths import Math
from ..queries.filters import TRUE_FILTER
from ..struct import Struct, nvl, split_field, join_field, Null
from ..times.durations import Duration
class _MVEL(object):
def __init__(self, fromData, isLean=False):
self.fromData = fromData
self.isLean = isLean
self.prefixMap = []
self.functions = {}
def code(self, query):
"""
RETURN THE MVEL THAT WILL FILTER USING query.where AND TERM-PACK THE query.select CLAUSE
"""
selectList = listwrap(query.select)
fromPath = query.frum.name # FIRST NAME IS THE INDEX
sourceVar = "__sourcedoc__"
whereClause = query.where
# PARSE THE fromPath
code = self.frum(fromPath, sourceVar, "__loop")
select = self.select(selectList, fromPath, "output", sourceVar)
body = "var output = \"\";\n" + \
code.replace(
"<CODE>",
"if (" + _where(whereClause, lambda(v): self._translate(v)) + "){\n" +
select.body +
"}\n"
) + \
"output\n"
# ADD REFERENCED CONTEXT VARIABLES
context = self.getFrameVariables(body)
func = UID()
predef = addFunctions(select.head+context+body).head
param = "_source" if body.find(sourceVar) else ""
output = predef + \
select.head + \
context + \
'var ' + func + ' = function('+sourceVar+'){\n' + \
body + \
'};\n' + \
func + '('+param+')\n'
return Compiled(output)
def frum(self, fromPath, sourceVar, loopVariablePrefix):
"""
indexName NAME USED TO REFER TO HIGH LEVEL DOCUMENT
loopVariablePrefix PREFIX FOR LOOP VARIABLES
"""
loopCode = "if (<PATH> != null){ for(<VAR> : <PATH>){\n<CODE>\n}}\n"
self.prefixMap = []
code = "<CODE>"
path = split_field(fromPath)
# ADD LOCAL VARIABLES
from ..queries.es_query_util import INDEX_CACHE
columns = INDEX_CACHE[path[0]].columns
for i, c in enumerate(columns):
if c.name=="attachments":
Log.debug("")
if c.name.find("\\.") >= 0:
self.prefixMap.insert(0, {
"path": c.name,
"variable": "get(" + sourceVar + ", \"" + c.name.replace("\\.", ".") + "\")"
})
else:
self.prefixMap.insert(0, {
"path": c.name,
"variable": sourceVar + ".?" + c.name
})
# ADD LOOP VARIABLES
currPath = []
# self.prefixMap.insert(0, {"path": path[0], "variable": path[0]})
for i, step in enumerate(path[1::]):
loopVariable = loopVariablePrefix + str(i)
currPath.append(step)
pathi = ".".join(currPath)
shortPath = self._translate(pathi)
self.prefixMap.insert(0, {"path": pathi, "variable": loopVariable})
loop = loopCode.replace("<VAR>", loopVariable).replace("<PATH>", shortPath)
code = code.replace("<CODE>", loop)
return code
def _translate(self, variableName):
shortForm = variableName
for p in self.prefixMap:
prefix = p["path"]
if shortForm == prefix:
shortForm = p["variable"]
else:
shortForm = replacePrefix(shortForm, prefix + ".", p["variable"] + ".?") # ADD NULL CHECK
shortForm = replacePrefix(shortForm, prefix + "[", p["variable"] + "[")
return shortForm
# CREATE A PIPE DELIMITED RESULT SET
def select(self, selectList, fromPath, varName, sourceVar):
path = split_field(fromPath)
is_deep = len(path) > 1
heads = []
list = []
for s in selectList:
if is_deep:
if s.value and isKeyword(s.value):
shortForm = self._translate(s.value)
list.append("Value2Pipe(" + shortForm + ")\n")
else:
Log.error("do not know how to handle yet")
else:
if s.value and isKeyword(s.value):
list.append("Value2Pipe(getDocValue(" + value2MVEL(s.value) + "))\n")
elif s.value:
shortForm = self._translate(s.value)
list.append("Value2Pipe(" + shortForm + ")\n")
else:
code, decode = self.Parts2Term(s.domain)
heads.append(code.head)
list.append("Value2Pipe(" + code.body + ")\n")
if len(split_field(fromPath)) > 1:
output = 'if (' + varName + ' != "") ' + varName + '+="|";\n' + varName + '+=' + '+"|"+'.join(["Value2Pipe("+v+")\n" for v in list]) + ';\n'
else:
output = varName + ' = ' + '+"|"+'.join(["Value2Pipe("+v+")\n" for v in list]) + ';\n'
return Struct(
head="".join(heads),
body=output
)
def Parts2Term(self, domain):
"""
TERMS ARE ALWAYS ESCAPED SO THEY CAN BE COMPOUNDED WITH PIPE (|)
CONVERT AN ARRAY OF PARTS{name, esfilter} TO AN MVEL EXPRESSION
RETURN expression, function PAIR, WHERE
expression - MVEL EXPRESSION
function - TAKES RESULT OF expression AND RETURNS PART
"""
fields = domain.dimension.fields
term = []
if len(split_field(self.fromData.name)) == 1 and fields:
if isinstance(fields, dict):
# CONVERT UNORDERED FIELD DEFS
qb_fields, es_fields = zip(*[(k, fields[k]) for k in sorted(fields.keys())])
else:
qb_fields, es_fields = zip(*[(i, e) for i, e in enumerate(fields)])
#NO LOOPS BECAUSE QUERY IS SHALLOW
#DOMAIN IS FROM A DIMENSION, USE IT'S FIELD DEFS TO PULL
if len(es_fields) == 1:
def fromTerm(term):
return domain.getPartByKey(term)
return Struct(
head="",
body='getDocValue('+CNV.string2quote(domain.dimension.fields[0])+')'
), fromTerm
else:
def fromTerm(term):
terms = [CNV.pipe2value(t) for t in CNV.pipe2value(term).split("|")]
candidate = dict(zip(qb_fields, terms))
for p in domain.partitions:
for k, t in candidate.items():
if p.value[k] != t:
break
else:
return p
if domain.type in ["uid", "default"]:
part = {"value": candidate}
domain.partitions.append(part)
return part
else:
return Null
for f in es_fields:
term.append('Value2Pipe(getDocValue('+CNV.string2quote(f)+'))')
return Struct(
head="",
body='Value2Pipe('+('+"|"+'.join(term))+')'
), fromTerm
else:
for v in domain.partitions:
term.append("if (" + _where(v.esfilter, lambda x: self._translate(x)) + ") " + value2MVEL(domain.getKey(v)) + "; else ")
term.append(value2MVEL(domain.getKey(domain.NULL)))
func_name = "_temp"+UID()
return self.register_function("+\"|\"+".join(term))
def Parts2TermScript(self, domain):
code, decode = self.Parts2Term(domain)
func = addFunctions(code.head + code.body)
return func.head + code.head + code.body, decode
def getFrameVariables(self, body):
contextVariables = []
columns = self.fromData.columns
parentVarNames = set() # ALL PARENTS OF VARIABLES WITH "." IN NAME
body = body.replace(".?", ".")
for i, c in enumerate(columns):
j = body.find(c.name, 0)
while j >= 0:
s = j
j = body.find(c.name, s + 1)
test0 = body[s - 1: s + len(c.name) + 1:]
test3 = body[s - 8: s + len(c.name):]
if test0[:-1] == "\"" + c.name:
continue
if test3 == "_source." + c.name:
continue
def defParent(name):
# DO NOT MAKE THE SAME PARENT TWICE
if name in parentVarNames:
return
parentVarNames.add(name)
if len(split_field(name)) == 1:
contextVariables.append("Map " + name + " = new HashMap();\n")
else:
defParent(join_field(split_field(name)[0:-1]))
contextVariables.append(name + " = new HashMap();\n")
body = body.replace(c.name, "-"*len(c.name))
if self.isLean or c.useSource:
if len(split_field(c.name)) > 1:
defParent(join_field(split_field(c.name)[0:-1]))
contextVariables.append(c.name + " = getSourceValue(\"" + c.name + "\");\n")
else:
contextVariables.append(c.name + " = _source[\"" + c.name + "\"];\n")
else:
if len(split_field(c.name)) > 1:
defParent(join_field(split_field(c.name)[0:-1]))
contextVariables.append(c.name + " = getDocValue(\"" + c.name + "\");\n")
else:
contextVariables.append(c.name + " = getDocValue(\"" + c.name + "\");\n")
break
return "".join(contextVariables)
def compile_expression(self, expression, constants=None):
# EXPAND EXPRESSION WITH ANY CONSTANTS
expression = setValues(expression, constants)
fromPath = self.fromData.name # FIRST NAME IS THE INDEX
indexName = split_field(fromPath)[0]
context = self.getFrameVariables(expression)
if context == "":
return addFunctions(expression).head+expression
func = UID()
code = addFunctions(context+expression)
output = code.head + \
'var ' + func + ' = function(' + indexName + '){\n' + \
context + \
expression + ";\n" + \
'};\n' + \
func + '(_source)\n'
return Compiled(output)
def register_function(self, code):
for n, c in self.functions.items():
if c == code:
break
else:
n = "_temp" + UID()
self.functions[n] = code
return Struct(
head='var ' + n + ' = function(){\n' + code + '\n};\n',
body=n + '()\n'
)
class Compiled(object):
def __init__(self, code):
self.code=code
def __str__(self):
return self.code
def __json__(self):
return CNV.string2quote(self.code)
__UID__ = 1000
def UID():
output = "_" + str(__UID__)
globals()["__UID__"] += 1
return output
def setValues(expression, constants):
if not constants:
return expression
constants = constants.copy()
# EXPAND ALL CONSTANTS TO PRIMITIVE VALUES (MVEL CAN ONLY ACCEPT PRIMITIVE VALUES)
for c in constants:
value = c.value
n = c.name
if len(split_field(n)) >= 3:
continue # DO NOT GO TOO DEEP
if isinstance(value, list):
continue # DO NOT MESS WITH ARRAYS
if isinstance(value, dict):
for k, v in value.items():
constants.append({"name": n + "." + k, "value": v})
for c in reverse(constants):# REVERSE ORDER, SO LONGER NAMES ARE TESTED FIRST
s = 0
while True:
s = expression.find(c.name, s)
if s == -1:
break
if re.match(r"\w", expression[s - 1]):
break
if re.match(r"\w", expression[s + len(c.name)]):
break
v = value2MVEL(c.value)
expression = expression[:s:] + "" + v + expression[:s + len(c.name):]
return expression
def unpack_terms(facet, selects):
# INTERPRET THE TERM-PACKED ES RESULTS AND RETURN DATA CUBE
# ASSUME THE .term IS JSON OBJECT WITH ARRAY OF RESULT OBJECTS
mod = len(selects)
output = []
for t in facet.terms:
if t.term == "":
continue # NO DATA
value = []
for i, v in enumerate(t.term.split("|")):
value.append(CNV.pipe2value(v))
if ((i + 1) % mod) == 0:
value.append(t.count)
output.append(value)
value = []
return output
# PASS esFilter SIMPLIFIED ElasticSearch FILTER OBJECT
# RETURN MVEL EXPRESSION
def _where(esFilter, _translate):
if not esFilter or esFilter is TRUE_FILTER:
return "true"
keys = esFilter.keys()
if len(keys) != 1:
Log.error("Expecting only one filter aggregate")
op = keys[0]
if op == "and":
list = esFilter[op]
if not (list):
return "true"
if len(list) == 1:
return _where(list[0], _translate)
output = "(" + " && ".join(_where(l, _translate) for l in list) + ")"
return output
elif op == "or":
list = esFilter[op]
if not list:
return "false"
if len(list) == 1:
return _where(list[0], _translate)
output = "(" + " || ".join(_where(l, _translate) for l in list) + ")"
return output
elif op == "not":
return "!(" + _where(esFilter[op, _translate]) + ")"
elif op == "term":
pair = esFilter[op]
if len(pair.keys()) == 1:
return [_translate(k) + "==" + value2MVEL(v) for k, v in pair.items()][0]
else:
return "(" + " && ".join(_translate(k) + "==" + value2MVEL(v) for k, v in pair.items()) + ")"
elif op == "terms":
output = []
for variableName, valueList in esFilter[op].items():
if not valueList:
Log.error("Expecting something in 'terms' array")
if len(valueList) == 1:
output.append(_translate(variableName) + "==" + value2MVEL(valueList[0]))
else:
output.append("(" + " || ".join(_translate(variableName) + "==" + value2MVEL(v) for v in valueList) + ")")
return " && ".join(output)
elif op == "exists":
# "exists":{"field":"myField"}
pair = esFilter[op]
variableName = pair.field
return "(" + _translate(variableName) + "!=null)"
elif op == "missing":
fieldName = _translate(esFilter[op].field)
testExistence = nvl(esFilter[op].existence, True)
testNull = nvl(esFilter[op].null_value, True)
output = []
if testExistence and not testNull:
output.append("(" + fieldName.replace(".?", ".") + " == empty)") # REMOVE THE .? SO WE REFER TO THE FIELD, NOT GET THE VALUE
if testNull:
output.append("(" + fieldName + "==null)")
return " || ".join(output)
elif op == "range":
pair = esFilter[op]
ranges = []
for variableName, r in pair.items():
if r.gte:
ranges.append(value2MVEL(r.gte) + "<=" + _translate(variableName))
elif r.gt:
ranges.append(value2MVEL(r.gt) + "<" + _translate(variableName))
elif r["from"]:
if r.include_lower == None or r.include_lower:
ranges.append(value2MVEL(r["from"]) + "<=" + _translate(variableName))
else:
ranges.append(value2MVEL(r["from"]) + "<" + _translate(variableName))
if r.lte:
ranges.append(value2MVEL(r.lte) + ">=" + _translate(variableName))
elif r.lt:
ranges.append(value2MVEL(r.lt) + ">" + _translate(variableName))
elif r["from"]:
if r.include_lower == None or r.include_lower:
ranges.append(value2MVEL(r["from"]) + ">=" + _translate(variableName))
else:
ranges.append(value2MVEL(r["from"]) + ">" + _translate(variableName))
return "("+" && ".join(ranges)+")"
elif op == "script":
script = esFilter[op].script
return _translate(script)
elif op == "prefix":
pair = esFilter[op]
variableName, value = pair.items()[0]
return _translate(variableName) + ".startsWith(" + CNV.string2quote(value) + ")"
elif op == "match_all":
return "true"
else:
Log.error("'" + op + "' is an unknown aggregate")
return ""
VAR_CHAR = "abcdefghijklmnopqurstvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_.\""
keyword_pattern = re.compile(r"\w+(?:\.\w+)*")
def isKeyword(value):
"""
RETURN TRUE IF THE value IS JUST A NAME OF A FIELD, A LIST OF FIELDS, (OR A VALUE)
"""
if not value or not isinstance(value, basestring):
Log.error("Expecting a string")
if keyword_pattern.match(value):
return True
return False
def value2MVEL(value):
"""
FROM PYTHON VALUE TO MVEL EQUIVALENT
"""
if isinstance(value, datetime):
return str(CNV.datetime2milli(value)) + " /*" + value.format("yyNNNdd HHmmss") + "*/" # TIME
if isinstance(value, Duration):
return str(CNV.timedelta2milli(value)) + " /*" + str(value) + "*/" # DURATION
if Math.is_number(value):
return str(value)
return CNV.string2quote(value)
# FROM PYTHON VALUE TO ES QUERY EQUIVALENT
def value2query(value):
if isinstance(value, datetime):
return CNV.datetime2milli(value)
if isinstance(value, Duration):
return value.milli
if Math.is_number(value):
return value
return CNV.string2quote(value)
def value2value(value):
"""
CONVERT FROM PYTHON VALUE TO ES EQUIVALENT
"""
if isinstance(value, datetime):
return CNV.datetime2milli(value)
if isinstance(value, Duration):
return value.milli # DURATION
return value
def addFunctions(mvel):
"""
PREPEND THE REQUIRED MVEL FUNCTIONS TO THE CODE
"""
isAdded = Struct() # SOME FUNCTIONS DEPEND ON OTHERS
head=[]
body=mvel
keepAdding = True
while keepAdding:
keepAdding = False
for func_name, func_code in FUNCTIONS.items():
if isAdded[func_name]:
continue
if mvel.find(func_name) == -1:
continue
keepAdding = True
isAdded[func_name] = func_code
head.append(func_code)
mvel = func_code + mvel
return Struct(
head="".join(head),
body=body
)
FUNCTIONS = {
"String2Quote":
"var String2Quote = function(str){\n" +
"if (!(str is String)){ str; }else{\n" + # LAST VALUE IS RETURNED. "return" STOPS EXECUTION COMPLETELY!
"" + value2MVEL("\"") + "+" +
"str.replace(" + value2MVEL("\\") + "," + value2MVEL("\\\\") +
").replace(" + value2MVEL("\"") + "," + value2MVEL("\\\"") +
").replace(" + value2MVEL("\'") + "," + value2MVEL("\\\'") + ")+" +
value2MVEL("\"") + ";\n" +
"}};\n",
"Value2Pipe":
'var Value2Pipe = function(value){\n' + # SPACES ARE IMPORTANT BETWEEN "="
"if (value==null){ \"0\" }else " +
"if (value is ArrayList || value is org.elasticsearch.common.mvel2.util.FastList){" +
"var out = \"\";\n" +
"foreach (v : value) out = (out==\"\") ? v : out + \"|\" + Value2Pipe(v);\n" +
"'a'+Value2Pipe(out);\n" +
"}else \n" +
"if (value is Long || value is Integer || value is Double){ 'n'+value; }else \n" +
"if (!(value is String)){ 's'+value.getClass().getName(); }else \n" +
'"s"+value.replace("\\\\", "\\\\\\\\").replace("|", "\\\\p");' + # CAN NOT ""+value TO MAKE NUMBER A STRING (OR EVEN TO PREPEND A STRING!)
"};\n",
# "replaceAll":
# "var replaceAll = function(output, find, replace){\n" +
# "if (output.length()==0) return output;\n"+
# "s = output.indexOf(find, 0);\n" +
# "while(s>=0){\n" +
# "output=output.replace(find, replace);\n" +
# "s=s-find.length()+replace.length();\n" +
# "s = output.indexOf(find, s);\n" +
# "}\n"+
# "output;\n"+
# '};\n',
"floorDay":
"var floorDay = function(value){ Math.floor(value/86400000))*86400000;};\n",
"floorInterval":
"var floorInterval = function(value, interval){ Math.floor((double)value/(double)interval)*interval;};\n",
"maximum": # JUST BECAUSE MVEL'S MAX ONLY USES MAX(int, int). G*DDA*NIT!
"var maximum = function(a, b){if (a==null) b; else if (b==null) a; else if (a>b) a; else b;\n};\n",
"minimum": # JUST BECAUSE MVEL'S MAX ONLY USES MAX(int, int). G*DDA*NIT!
"var minimum = function(a, b){if (a==null) b; else if (b==null) a; else if (a<b) a; else b;\n};\n",
"coalesce": # PICK FIRST NOT-NULL VALUE
"var coalesce = function(a, b){if (a==null) b; else a; \n};\n",
"zero2null": # ES MAKES IT DIFFICULT TO DETECT NULL/MISSING VALUES, BUT WHEN DEALING WITH NUMBERS, ES DEFAULTS TO RETURNING ZERO FOR missing VALUES!!
"var zero2null = function(a){if (a==0) null; else a; \n};\n",
"get": # MY OWN PERSONAL *FU* TO THE TWISTED MVEL PROPERTY ACCESS
"var get = function(hash, key){\n" +
"if (hash==null) null; else hash[key];\n" +
"};\n",
"isNumeric":
"var isNumeric = function(value){\n" +
"value = value + \"\";\n" +
# "try{ value-0; }catch(e){ 0; }"+
"var isNum = value.length()>0;\n" +
"for (v : value.toCharArray()){\n" +
"if (\"0123456789\".indexOf(v)==-1) isNum = false;\n" +
"};\n" +
"isNum;\n" +
"};\n",
"alpha2zero":
"var alpha2zero = function(value){\n" +
"var output = 0;\n" +
"if (isNumeric(value)) output = value-0;\n" +
"return output;" +
"};\n",
# KANBAN SOFTWARE
# CAN SEE QUEUE BLOCKAGES AND SEE SINGLE BLOCKERS
"concat":
"var concat = function(array){\n" +
"if (array==null) \"\"; else {\n" +
"var output = \"\";\n" +
"for (v : array){ output = output+\"|\"+v+\"|\"; };\n" +
"output;\n" +
"}};\n",
# "contains":
# "var contains = function(array, value){\n"+
# "if (array==null) false; else {\n"+
# "var good = false;\n"+
# "for (v : array){ if (v==value) good=true; };\n"+
# "good;\n"+
# "}};\n",
"getFlagValue": # SPECIFICALLY FOR cf_* FLAGS: CONCATENATE THE ATTRIBUTE NAME WITH ATTRIBUTE VALUE, IF EXISTS
"var getFlagValue = function(name){\n" +
"if (_source[name]!=null)" +
"\" \"+name+_source[name];\n" +
"else \n" +
"\"\";\n" +
"};\n",
"getDocValue":
"var getDocValue = function(name){\n" +
"var out = [];\n" +
"var v = doc[name];\n" +
# "if (v is org.elasticsearch.common.mvel2.ast.Function) v = v();=n" +
"if (v==null || v.value==null) { null; } else\n" +
"if (v.values.size()<=1){ v.value; } else\n" + # ES MAKES NO DISTINCTION BETWEEN v or [v], SO NEITHER DO I
"{for(k : v.values) out.add(k); out;}" +
"};\n",
"getSourceValue":
"var getSourceValue = function(name){\n" +
"var out = [];\n" +
"var v = _source[name];\n" +
# "if (v is org.elasticsearch.common.mvel2.ast.Function) v = v();=n" +
"if (v==null) { null; } else\n" +
"if (v[\"values\"]==null || v.values.size()<=1){ v.value; } else {\n" + # ES MAKES NO DISTINCTION BETWEEN v or [v], SO NEITHER DO I
"for(k : v) out.add(k); out;\n" + # .size() MUST BE USED INSTEAD OF .length, THE LATTER WILL CRASH IF JITTED (https://github.com/elasticsearch/elasticsearch/issues/3094)
"}};\n",
"getDocArray":
"var getDocArray = function(name){\n" +
"var out = [];\n" +
"var v = doc[name];\n" +
"if (v!=null && v.value!=null) for(k : v.values) out.add(k);" +
"out;" +
"};\n",
"milli2Month":
"var milli2Month = function(value, milliOffset){\n" +
"g=new java.util.GregorianCalendar(new java.util.SimpleTimeZone(0, \"GMT\"));\n" +
"g.setTimeInMillis(value);\n" +
"g.add(java.util.GregorianCalendar.MILLISECOND, -milliOffset);\n" +
"m = g.get(java.util.GregorianCalendar.MONTH);\n" +
"output = \"\"+g.get(java.util.GregorianCalendar.YEAR)+(m>9?\"\":\"0\")+m;\n" +
"output;\n" +
"};\n",
"between":
"var between = function(value, prefix, suffix){\n" +
"if (value==null){ null; }else{\n" +
"var start = value.indexOf(prefix, 0);\n" +
"if (start==-1){ null; }else{\n" +
"var end = value.indexOf(suffix, start+prefix.length());\n" +
"if (end==-1){ null; }else{\n" +
"value.substring(start+prefix.length(), end);\n" +
"}}}\n" +
"};\n"
}
def replacePrefix(value, prefix, new_prefix):
try:
if value.startswith(prefix):
return new_prefix+value[len(prefix)::]
return value
except Exception, e:
Log.error("can not replace prefix", e)

Разница между файлами не показана из-за своего большого размера Загрузить разницу

240
tests/util/queries/cube.py Normal file
Просмотреть файл

@ -0,0 +1,240 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
from .. import struct
from ..collections.matrix import Matrix
from ..collections import MAX, OR
from ..queries.query import _normalize_edge
from ..struct import StructList
from ..structs.wraps import wrap, wrap_dot, listwrap
from ..env.logs import Log
class Cube(object):
"""
A CUBE IS LIKE A NUMPY ARRAY, ONLY WITH THE DIMENSIONS TYPED AND NAMED.
CUBES ARE BETTER THAN PANDAS BECAUSE THEY DEAL WITH NULLS GRACEFULLY
"""
def __init__(self, select, edges, data, frum=None):
"""
data IS EXPECTED TO BE A dict TO MATRICES, BUT OTHER COLLECTIONS ARE
ALLOWED, USING THE select AND edges TO DESCRIBE THE data
"""
self.is_value = False if isinstance(select, list) else True
self.select = select
#ENSURE frum IS PROPER FORM
if isinstance(select, list):
if OR(not isinstance(v, Matrix) for v in data.values()):
Log.error("Expecting data to be a dict with Matrix values")
if not edges:
if not data:
if isinstance(select, list):
Log.error("not expecting a list of records")
data = {select.name: Matrix.ZERO}
self.edges = StructList.EMPTY
elif isinstance(data, dict):
# EXPECTING NO MORE THAN ONE rownum EDGE IN THE DATA
length = MAX([len(v) for v in data.values()])
if length >= 1:
self.edges = [{"name": "rownum", "domain": {"type": "index"}}]
else:
self.edges = StructList.EMPTY
elif isinstance(data, list):
if isinstance(select, list):
Log.error("not expecting a list of records")
data = {select.name: Matrix.wrap(data)}
self.edges = [{"name": "rownum", "domain": {"type": "index"}}]
elif isinstance(data, Matrix):
if isinstance(select, list):
Log.error("not expecting a list of records")
data = {select.name: data}
else:
if isinstance(select, list):
Log.error("not expecting a list of records")
data = {select.name: Matrix(value=data)}
self.edges = StructList.EMPTY
else:
self.edges = edges
self.data = data
def __len__(self):
"""
RETURN DATA VOLUME
"""
if not self.edges:
return 1
return len(self.data.values()[0])
def __iter__(self):
if self.is_value:
return self.data[self.select.name].__iter__()
if not self.edges:
return list.__iter__([])
if len(self.edges) == 1 and wrap(self.edges[0]).domain.type == "index":
# ITERATE AS LIST OF RECORDS
keys = list(self.data.keys())
output = (struct.zip(keys, r) for r in zip(*self.data.values()))
return output
Log.error("This is a multicube")
@property
def value(self):
if self.edges:
Log.error("can not get value of with dimension")
if isinstance(self.select, list):
Log.error("can not get value of multi-valued cubes")
return self.data[self.select.name].cube
def __lt__(self, other):
return self.value < other
def __gt__(self, other):
return self.value > other
def __eq__(self, other):
if other == None:
if self.edges:
return False
if self.is_value and self.value == None:
return True
return False
return self.value == other
def __ne__(self, other):
return not Cube.__eq__(self, other)
def __add__(self, other):
return self.value + other
def __radd__(self, other):
return other + self.value
def __sub__(self, other):
return self.value - other
def __rsub__(self, other):
return other - self.value
def __mul__(self, other):
return self.value * other
def __rmul__(self, other):
return other * self.value
def __div__(self, other):
return self.value / other
def __rdiv__(self, other):
return other / self.value
def __getitem__(self, item):
return self.data[item]
def __getattr__(self, item):
return self.data[item]
def get_columns(self):
return self.edges + listwrap(self.select)
def _select(self, select):
selects = listwrap(select)
is_aggregate = OR(s.aggregate != None and s.aggregate != "none" for s in selects)
if is_aggregate:
values = {s.name: Matrix(value=self.data[s.value].aggregate(s.aggregate)) for s in selects}
return Cube(select, [], values)
else:
values = {s.name: self.data[s.value] for s in selects}
return Cube(select, self.edges, values)
def groupby(self, edges):
"""
SLICE THIS CUBE IN TO ONES WITH LESS DIMENSIONALITY
simple==True WILL HAVE GROUPS BASED ON PARTITION VALUE, NOT PARTITION OBJECTS
"""
edges = StructList([_normalize_edge(e) for e in edges])
stacked = [e for e in self.edges if e.name in edges.name]
remainder = [e for e in self.edges if e.name not in edges.name]
selector = [1 if e.name in edges.name else 0 for e in self.edges]
if len(stacked) + len(remainder) != len(self.edges):
Log.error("can not find some edges to group by")
#CACHE SOME RESULTS
keys = [e.name for e in self.edges]
getKey = [e.domain.getKey for e in self.edges]
lookup = [[getKey[i](p) for p in e.domain.partitions] for i, e in enumerate(self.edges)]
def coord2term(coord):
output = wrap_dot({keys[i]: lookup[i][c] for i, c in enumerate(coord)})
return output
if isinstance(self.select, list):
selects = listwrap(self.select)
index, v = zip(*self.data[selects[0].name].groupby(selector))
coord = wrap([coord2term(c) for c in index])
values = [v]
for s in selects[1::]:
i, v = zip(*self.data[s.name].group_by(selector))
values.append(v)
output = zip(coord, [Cube(self.select, remainder, {s.name: v[i] for i, s in enumerate(selects)}) for v in zip(*values)])
elif not remainder:
# v IS A VALUE, NO NEED TO WRAP IT IN A Cube
output = (
(
coord2term(coord),
v
)
for coord, v in self.data[self.select.name].groupby(selector)
)
else:
output = (
(
coord2term(coord),
Cube(self.select, remainder, v)
)
for coord, v in self.data[self.select.name].groupby(selector)
)
return output
def __str__(self):
if self.is_value:
return str(self.data)
else:
return str(self.data)
def __int__(self):
if self.is_value:
return int(self.value)
else:
return int(self.data)
def __float__(self):
if self.is_value:
return float(self.value)
else:
return float(self.data)

Просмотреть файл

@ -0,0 +1,438 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
from .. import struct
from ..cnv import CNV
from ..collections.matrix import Matrix
from .query import Query
from ..sql.db import int_list_packer, SQL, DB
from ..env.logs import Log
from ..strings import indent, expand_template
from ..struct import nvl, StructList
from ..structs.wraps import wrap, listwrap
class DBQuery(object):
"""
Qb to MySQL DATABASE QUERIES
"""
def __init__(self, db):
object.__init__(self)
if isinstance(db, DB):
self.db = db
else:
self.db = DB(db)
def query(self, query, stacked=False):
"""
TRANSLATE Qb QUERY ON SINGLE TABLE TO SQL QUERY
"""
query = Query(query)
sql, post = self._subquery(query, isolate=False, stacked=stacked)
query.data = post(sql)
return query.data
def update(self, query):
self.db.execute("""
UPDATE {{table_name}}
SET {{assignment}}
{{where}}
""", {
"table_name": query["from"],
"assignment": ",".join(self.db.quote_column(k) + "=" + self.db.quote_value(v) for k, v in query.set),
"where": self._where2sql(query.where)
})
def _subquery(self, query, isolate=True, stacked=False):
if isinstance(query, basestring):
return self.db.quote_column(query), None
if query.name: # IT WOULD BE SAFER TO WRAP TABLE REFERENCES IN A TYPED OBJECT (Cube, MAYBE?)
return self.db.quote_column(query.name), None
if query.edges:
# RETURN A CUBE
sql, post = self._grouped(query, stacked)
else:
select = listwrap(query.select)
if select[0].aggregate != "none":
sql, post = self._aggop(query)
else:
sql, post = self._setop(query)
if isolate:
return "(\n"+sql+"\n) a\n", post
else:
return sql, post
def _grouped(self, query, stacked=False):
select = listwrap(query.select)
# RETURN SINGLE OBJECT WITH AGGREGATES
for s in select:
if s.aggregate not in aggregates:
Log.error("Expecting all columns to have an aggregate: {{select}}", {"select": s})
selects = StructList()
groups = StructList()
edges = query.edges
for e in edges:
if e.domain.type != "default":
Log.error("domain of type {{type}} not supported, yet", {"type": e.domain.type})
groups.append(e.value)
selects.append(e.value + " AS " + self.db.quote_column(e.name))
for s in select:
selects.append(aggregates[s.aggregate].replace("{{code}}", s.value) + " AS " + self.db.quote_column(s.name))
sql = expand_template("""
SELECT
{{selects}}
FROM
{{table}}
{{where}}
GROUP BY
{{groups}}
""", {
"selects": SQL(",\n".join(selects)),
"groups": SQL(",\n".join(groups)),
"table": self._subquery(query["from"])[0],
"where": self._where2sql(query.where)
})
def post_stacked(sql):
# RETURN IN THE USUAL DATABASE RESULT SET FORMAT
return self.db.query(sql)
def post(sql):
# FIND OUT THE default DOMAIN SIZES
result = self.db.column_query(sql)
num_edges = len(edges)
for e, edge in enumerate(edges):
domain = edge.domain
if domain.type == "default":
domain.type = "set"
parts = set(result[e])
domain.partitions = [{"index": i, "value": p} for i, p in enumerate(parts)]
domain.map = {p: i for i, p in enumerate(parts)}
else:
Log.error("Do not know what to do here, yet")
# FILL THE DATA CUBE
maps = [(struct.unwrap(e.domain.map), result[i]) for i, e in enumerate(edges)]
cubes = StructList()
for c, s in enumerate(select):
data = Matrix(*[len(e.domain.partitions) + (1 if e.allow_nulls else 0) for e in edges])
for rownum, value in enumerate(result[c + num_edges]):
coord = [m[r[rownum]] for m, r in maps]
data[coord] = value
cubes.append(data)
if isinstance(query.select, list):
return cubes
else:
return cubes[0]
return sql, post if not stacked else post_stacked
def _aggop(self, query):
"""
SINGLE ROW RETURNED WITH AGGREGATES
"""
if isinstance(query.select, list):
# RETURN SINGLE OBJECT WITH AGGREGATES
for s in query.select:
if s.aggregate not in aggregates:
Log.error("Expecting all columns to have an aggregate: {{select}}", {"select": s})
selects = StructList()
for s in query.select:
selects.append(aggregates[s.aggregate].replace("{{code}}", s.value) + " AS " + self.db.quote_column(s.name))
sql = expand_template("""
SELECT
{{selects}}
FROM
{{table}}
{{where}}
""", {
"selects": SQL(",\n".join(selects)),
"table": self._subquery(query["from"])[0],
"where": self._where2sql(query.filter)
})
return sql, lambda sql: self.db.column(sql)[0] # RETURNING SINGLE OBJECT WITH AGGREGATE VALUES
else:
# RETURN SINGLE VALUE
s0 = query.select
if s0.aggregate not in aggregates:
Log.error("Expecting all columns to have an aggregate: {{select}}", {"select": s0})
select = aggregates[s0.aggregate].replace("{{code}}", s0.value) + " AS " + self.db.quote_column(s0.name)
sql = expand_template("""
SELECT
{{selects}}
FROM
{{table}}
{{where}}
""", {
"selects": SQL(select),
"table": self._subquery(query["from"])[0],
"where": self._where2sql(query.where)
})
def post(sql):
result = self.db.column_query(sql)
return result[0][0]
return sql, post # RETURN SINGLE VALUE
def _setop(self, query):
"""
NO AGGREGATION, SIMPLE LIST COMPREHENSION
"""
if isinstance(query.select, list):
# RETURN BORING RESULT SET
selects = StructList()
for s in listwrap(query.select):
if isinstance(s.value, dict):
for k, v in s.value.items:
selects.append(v + " AS " + self.db.quote_column(s.name+"."+k))
if isinstance(s.value, list):
for i, ss in enumerate(s.value):
selects.append(s.value + " AS " + self.db.quote_column(s.name+","+str(i)))
else:
selects.append(s.value + " AS " + self.db.quote_column(s.name))
sql = expand_template("""
SELECT
{{selects}}
FROM
{{table}}
{{where}}
{{limit}}
{{sort}}
""", {
"selects": SQL(",\n".join(selects)),
"table": self._subquery(query["from"])[0],
"where": self._where2sql(query.where),
"limit": self._limit2sql(query.limit),
"sort": self._sort2sql(query.sort)
})
def post_process(sql):
result = self.db.query(sql)
for s in listwrap(query.select):
if isinstance(s.value, dict):
for r in result:
r[s.name] = {}
for k, v in s.value:
r[s.name][k] = r[s.name+"."+k]
r[s.name+"."+k] = None
if isinstance(s.value, list):
#REWRITE AS TUPLE
for r in result:
r[s.name] = tuple(r[s.name + "," + str(i)] for i, ss in enumerate(s.value))
for i, ss in enumerate(s.value):
r[s.name + "," + str(i)] = None
expand_json(result)
return result
return sql, post_process # RETURN BORING RESULT SET
else:
# RETURN LIST OF VALUES
if query.select.value == "*":
select = "*"
else:
name = query.select.name
select = query.select.value + " AS " + self.db.quote_column(name)
sql = expand_template("""
SELECT
{{selects}}
FROM
{{table}}
{{where}}
{{limit}}
{{sort}}
""", {
"selects": SQL(select),
"table": self._subquery(query["from"])[0],
"where": self._where2sql(query.where),
"limit": self._limit2sql(query.limit),
"sort": self._sort2sql(query.sort)
})
if query.select.value == "*":
def post(sql):
result = self.db.query(sql)
expand_json(result)
return result
return sql, post
else:
return sql, lambda sql: [r[name] for r in self.db.query(sql)] # RETURNING LIST OF VALUES
def _sort2sql(self, sort):
"""
RETURN ORDER BY CLAUSE
"""
if not sort:
return ""
return SQL("ORDER BY "+",\n".join([self.db.quote_column(o.field)+(" DESC" if o.sort==-1 else "") for o in sort]))
def _limit2sql(self, limit):
return SQL("" if not limit else "LIMIT "+str(limit))
def _where2sql(self, where):
if where == None:
return ""
return SQL("WHERE "+_esfilter2sqlwhere(self.db, where))
def _isolate(separator, list):
try:
if len(list) > 1:
return "(\n" + indent((" " + separator + "\n").join(list)) + "\n)"
else:
return list[0]
except Exception, e:
Log.error("Programming problem: separator={{separator}}, list={{list}", {
"list": list,
"separator": separator
}, e)
def esfilter2sqlwhere(db, esfilter):
return SQL(_esfilter2sqlwhere(db, esfilter))
def _esfilter2sqlwhere(db, esfilter):
"""
CONVERT ElassticSearch FILTER TO SQL FILTER
db - REQUIRED TO PROPERLY QUOTE VALUES AND COLUMN NAMES
"""
esfilter = wrap(esfilter)
if esfilter["and"]:
return _isolate("AND", [esfilter2sqlwhere(db, a) for a in esfilter["and"]])
elif esfilter["or"]:
return _isolate("OR", [esfilter2sqlwhere(db, a) for a in esfilter["or"]])
elif esfilter["not"]:
return "NOT (" + esfilter2sqlwhere(db, esfilter["not"]) + ")"
elif esfilter.term:
return _isolate("AND", [db.quote_column(col) + "=" + db.quote_value(val) for col, val in esfilter.term.items()])
elif esfilter.terms:
for col, v in esfilter.terms.items():
if len(v) == 0:
return "FALSE"
try:
int_list = CNV.value2intlist(v)
has_null = False
for vv in v:
if vv == None:
has_null = True
break
if int_list:
filter = int_list_packer(col, int_list)
if has_null:
return esfilter2sqlwhere(db, {"or": [{"missing": col}, filter]})
else:
return esfilter2sqlwhere(db, filter)
else:
if has_null:
return esfilter2sqlwhere(db, {"missing": col})
else:
return "false"
except Exception, e:
pass
return db.quote_column(col) + " in (" + ", ".join([db.quote_value(val) for val in v]) + ")"
elif esfilter.script:
return "(" + esfilter.script + ")"
elif esfilter.range:
name2sign = {
"gt": ">",
"gte": ">=",
"lte": "<=",
"lt": "<"
}
def single(col, r):
min = nvl(r["gte"], r[">="])
max = nvl(r["lte"], r["<="])
if min and max:
#SPECIAL CASE (BETWEEN)
return db.quote_column(col) + " BETWEEN " + db.quote_value(min) + " AND " + db.quote_value(max)
else:
return " AND ".join(
db.quote_column(col) + name2sign[sign] + db.quote_value(value)
for sign, value in r.items()
)
output = _isolate("AND", [single(col, ranges) for col, ranges in esfilter.range.items()])
return output
elif esfilter.missing:
if isinstance(esfilter.missing, basestring):
return "(" + db.quote_column(esfilter.missing) + " IS Null)"
else:
return "(" + db.quote_column(esfilter.missing.field) + " IS Null)"
elif esfilter.exists:
if isinstance(esfilter.exists, basestring):
return "(" + db.quote_column(esfilter.exists) + " IS NOT Null)"
else:
return "(" + db.quote_column(esfilter.exists.field) + " IS NOT Null)"
elif esfilter.match_all:
return "1=1"
elif esfilter.instr:
return _isolate("AND", ["instr(" + db.quote_column(col) + ", " + db.quote_value(val) + ")>0" for col, val in esfilter.instr.items()])
else:
Log.error("Can not convert esfilter to SQL: {{esfilter}}", {"esfilter": esfilter})
def expand_json(rows):
#CONVERT JSON TO VALUES
for r in rows:
for k, json in list(r.items()):
if isinstance(json, basestring) and json[0:1] in ("[", "{"):
try:
value = CNV.JSON2object(json)
r[k] = value
except Exception, e:
pass
#MAP NAME TO SQL FUNCTION
aggregates = {
"one": "COUNT({{code}})",
"sum": "SUM({{code}})",
"add": "SUM({{code}})",
"count": "COUNT({{code}})",
"maximum": "MAX({{code}})",
"minimum": "MIN({{code}})",
"max": "MAX({{code}})",
"min": "MIN({{code}})",
"mean": "AVG({{code}})",
"average": "AVG({{code}})",
"avg": "AVG({{code}})",
"N": "COUNT({{code}})",
"X0": "COUNT({{code}})",
"X1": "SUM({{code}})",
"X2": "SUM(POWER({{code}}, 2))",
"std": "STDDEV({{code}})",
"stddev": "STDDEV({{code}})",
"var": "POWER(STDDEV({{code}}), 2)",
"variance": "POWER(STDDEV({{code}}), 2)"
}

Просмотреть файл

@ -0,0 +1,336 @@
# encoding: utf-8
#
#
# self Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with self file,
# You can obtain one at http:# mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
from .. import struct
from ..collections import SUM
from ..queries.domains import Domain, ALGEBRAIC, KNOWN
from ..struct import Struct, nvl, Null, StructList, join_field, split_field
from ..times.timer import Timer
from ..env.logs import Log
from ..structs.wraps import wrap, listwrap
DEFAULT_QUERY_LIMIT = 20
class Dimension(object):
def __init__(self, dim, parent, qb):
self.name = dim.name
self.parent = parent
self.full_name = join_field(split_field(self.parent.full_name)+[self.name])
self.min = dim.min
self.max = dim.max
self.interval = dim.interval
self.value = dim.value
self.label = dim.label
self.end = dim.end
self.esfilter = dim.esfilter
self.weight = dim.weight
self.style = dim.style
self.isFacet = dim.isFacet
self.type = nvl(dim.type, "set")
self.limit = nvl(dim.limit, DEFAULT_QUERY_LIMIT)
self.index = nvl(dim.index, nvl(parent, Null).index, qb.es.settings.name)
if not self.index:
Log.error("Expecting an index name")
# ALLOW ACCESS TO SUB-PART BY NAME (IF ONLY THERE IS NO NAME COLLISION)
self.edges = {}
for e in listwrap(dim.edges):
new_e = Dimension(e, self, qb)
self.edges[new_e.full_name] = new_e
self.partitions = wrap(nvl(dim.partitions, []))
parse_partition(self)
fields = nvl(dim.field, dim.fields)
if not fields:
return # NO FIELDS TO SEARCH
elif isinstance(fields, dict):
self.fields = wrap(fields)
edges = wrap([{"name": k, "value": v, "allowNulls": False} for k, v in self.fields.items()])
else:
self.fields = listwrap(fields)
edges = wrap([{"name": f, "value": f, "index": i, "allowNulls": False} for i, f in enumerate(self.fields)])
if dim.partitions:
return # ALREADY HAVE PARTS
if dim.type not in KNOWN - ALGEBRAIC:
return # PARTS OR TOO FUZZY (OR TOO NUMEROUS) TO FETCH
with Timer("Get parts of {{name}}", {"name": self.name}):
parts = qb.query({
"from": self.index,
"select": {"name": "count", "aggregate": "count"},
"edges": edges,
"esfilter": self.esfilter,
"limit": self.limit
})
d = parts.edges[0].domain
if dim.path:
if len(edges) > 1:
Log.error("Not supported yet")
# EACH TERM RETURNED IS A PATH INTO A PARTITION TREE
temp = Struct(partitions=[])
for i, count in enumerate(parts):
a = dim.path(d.getEnd(d.partitions[i]))
if not isinstance(a, list):
Log.error("The path function on " + dim.name + " must return an ARRAY of parts")
addParts(
temp,
dim.path(d.getEnd(d.partitions[i])),
count,
0
)
self.value = nvl(dim.value, "name")
self.partitions = temp.partitions
elif isinstance(fields, dict):
self.value = "name" # USE THE "name" ATTRIBUTE OF PARTS
partitions = StructList()
for g, p in parts.groupby(edges):
if p:
partitions.append({
"value": g,
"esfilter": {"and": [
{"term": {e.value: g[e.name]}}
for e in edges
]},
"count": int(p)
})
self.partitions = partitions
elif len(edges) == 1:
self.value = "name" # USE THE "name" ATTRIBUTE OF PARTS
# SIMPLE LIST OF PARTS RETURNED, BE SURE TO INTERRELATE THEM
self.partitions = wrap([
{
"name": str(d.partitions[i].name), # CONVERT TO STRING
"value": d.getEnd(d.partitions[i]),
"esfilter": {"term": {edges[0].value: d.partitions[i].value}},
"count": count
}
for i, count in enumerate(parts)
])
elif len(edges) == 2:
self.value = "name" # USE THE "name" ATTRIBUTE OF PARTS
d2 = parts.edges[1].domain
# SIMPLE LIST OF PARTS RETURNED, BE SURE TO INTERRELATE THEM
array = parts.data.values()[0].cube # DIG DEEP INTO RESULT (ASSUME SINGLE VALUE CUBE, WITH NULL AT END)
def edges2value(*values):
if isinstance(fields, dict):
output = Struct()
for e, v in zip(edges, values):
output[e.name] = v
return output
else:
return tuple(values)
self.partitions = wrap([
{
"name": str(d.partitions[i].name), # CONVERT TO STRING
"value": d.getEnd(d.partitions[i]),
"esfilter": {"term": {edges[0].value: d.partitions[i].value}},
"count": SUM(subcube),
"partitions": [
{
"name": str(d2.partitions[j].name), # CONVERT TO STRING
"value": edges2value(d.getEnd(d.partitions[i]), d2.getEnd(d2.partitions[j])),
"esfilter": {"and": [
{"term": {edges[0].value: d.partitions[i].value}},
{"term": {edges[1].value: d2.partitions[j].value}}
]},
"count": count2
}
for j, count2 in enumerate(subcube)
if count2 > 0 # ONLY INCLUDE PROPERTIES THAT EXIST
]
}
for i, subcube in enumerate(array)
])
else:
Log.error("Not supported")
parse_partition(self) # RELATE THE PARTS TO THE PARENTS
def __getattr__(self, key):
"""
RETURN CHILD EDGE OR PARTITION BY NAME
"""
e = self.edges[key]
if e:
return e
for p in self.partitions:
if p.name == key:
return p
return Null
def getDomain(self, **kwargs):
# kwargs.depth IS MEANT TO REACH INTO SUB-PARTITIONS
kwargs = wrap(kwargs)
kwargs.depth = nvl(kwargs.depth, len(self.fields)-1 if isinstance(self.fields, list) else None)
if not self.partitions and self.edges:
# USE EACH EDGE AS A PARTITION, BUT isFacet==True SO IT ALLOWS THE OVERLAP
partitions = [
{
"name":v.name,
"value":v.name,
"esfilter":v.esfilter,
"style":v.style,
"weight":v.weight # YO! WHAT DO WE *NOT* COPY?
}
for i, v in enumerate(self.edges)
if i < nvl(self.limit, DEFAULT_QUERY_LIMIT) and v.esfilter
]
self.isFacet = True
elif kwargs.depth == None: # ASSUME self.fields IS A dict
partitions = StructList()
for i, part in enumerate(self.partitions):
if i >= nvl(self.limit, DEFAULT_QUERY_LIMIT):
break
partitions.append({
"name":part.name,
"value":part.value,
"esfilter":part.esfilter,
"style":nvl(part.style, part.parent.style),
"weight":part.weight # YO! WHAT DO WE *NOT* COPY?
})
elif kwargs.depth == 0:
partitions = [
{
"name":v.name,
"value":v.value,
"esfilter":v.esfilter,
"style":v.style,
"weight":v.weight # YO! WHAT DO WE *NOT* COPY?
}
for i, v in enumerate(self.partitions)
if i < nvl(self.limit, DEFAULT_QUERY_LIMIT)]
elif kwargs.depth == 1:
partitions = StructList()
rownum = 0
for i, part in enumerate(self.partitions):
if i >= nvl(self.limit, DEFAULT_QUERY_LIMIT):
continue
rownum += 1
try:
for j, subpart in enumerate(part.partitions):
partitions.append({
"name":join_field(split_field(subpart.parent.name) + [subpart.name]),
"value":subpart.value,
"esfilter":subpart.esfilter,
"style":nvl(subpart.style, subpart.parent.style),
"weight":subpart.weight # YO! WHAT DO WE *NOT* COPY?
})
except Exception, e:
Log.error("", e)
else:
Log.error("deeper than 2 is not supported yet")
return Domain(
type=self.type,
name=self.name,
partitions=wrap(partitions),
min=self.min,
max=self.max,
interval=self.interval,
# THE COMPLICATION IS THAT SOMETIMES WE WANT SIMPLE PARTITIONS, LIKE
# STRINGS, DATES, OR NUMBERS. OTHER TIMES WE WANT PARTITION OBJECTS
# WITH NAME, VALUE, AND OTHER MARKUP.
# USUALLY A "set" IS MEANT TO BE SIMPLE, BUT THE end() FUNCTION IS
# OVERRIDES EVERYTHING AND IS EXPLICIT. - NOT A GOOD SOLUTION BECAUSE
# end() IS USED BOTH TO INDICATE THE QUERY PARTITIONS *AND* DISPLAY
# COORDINATES ON CHARTS
# PLEASE SPLIT end() INTO value() (replacing the string value) AND
# label() (for presentation)
value="name" if not self.value and self.partitions else self.value,
key="value",
label=nvl(self.label, (self.type == "set" and self.name)),
end=nvl(self.end, (self.type == "set" and self.name)),
isFacet=self.isFacet,
dimension=self
)
def getSelect(self, **kwargs):
if self.fields:
if len(self.fields) == 1:
return Struct(
name=self.full_name,
value=self.fields[0],
aggregate="none"
)
else:
return Struct(
name=self.full_name,
value=self.fields,
aggregate="none"
)
domain = self.getDomain(**kwargs)
if not domain.getKey:
Log.error("Should not happen")
if not domain.NULL:
Log.error("Should not happen")
return Struct(
name=self.full_name,
domain=domain,
aggregate="none"
)
def addParts(parentPart, childPath, count, index):
"""
BUILD A hierarchy BY REPEATEDLY CALLING self METHOD WITH VARIOUS childPaths
count IS THE NUMBER FOUND FOR self PATH
"""
if index == None:
index = 0
if index == len(childPath):
return
c = childPath[index]
parentPart.count = nvl(parentPart.count, 0) + count
if parentPart.partitions == None:
parentPart.partitions = StructList()
for i, part in enumerate(parentPart.partitions):
if part.name == c.name:
addParts(part, childPath, count, index + 1)
return
parentPart.partitions.append(c)
addParts(c, childPath, count, index + 1)
def parse_partition(part):
for p in part.partitions:
if part.index:
p.index = part.index # COPY INDEX DOWN
parse_partition(p)
p.value = nvl(p.value, p.name)
p.parent = part
if not part.esfilter:
if len(part.partitions) > 100:
Log.error("Must define an esfilter on {{name}} there are too many partitions ({{num_parts}})", {
"name": part.name,
"num_parts": len(part.partitions)
})
# DEFAULT esfilter IS THE UNION OF ALL CHILD FILTERS
part.esfilter = {"or": part.partitions.esfilter}

Просмотреть файл

@ -0,0 +1,237 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with self file,
# You can obtain one at http:# mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
import re
from ..cnv import CNV
from ..collections import UNION
from .index import UniqueIndex
from ..env.logs import Log
from ..struct import Struct, nvl, StructList
from ..structs.wraps import wrap, unwrap
ALGEBRAIC = {"time", "duration", "numeric", "count", "datetime"} # DOMAINS THAT HAVE ALGEBRAIC OPERATIONS DEFINED
KNOWN = {"set", "boolean", "duration", "time", "numeric"} # DOMAINS THAT HAVE A KNOWN NUMBER FOR PARTS AT QUERY TIME
PARTITION = {"uid", "set", "boolean"} # DIMENSIONS WITH CLEAR PARTS
class Domain(object):
def __new__(cls, **desc):
desc = wrap(desc)
if desc.type == "value":
return ValueDomain(**unwrap(desc))
elif desc.type == "default":
return DefaultDomain(**unwrap(desc))
elif desc.type == "set":
if isinstance(desc.key, (list, tuple)):
Log.error("multi key not supported yet")
return SetDomain(**unwrap(desc))
elif desc.type == "uid":
return DefaultDomain(**unwrap(desc))
else:
Log.error("Do not know domain of type {{type}}", {"type": desc.type})
def __init__(self, **desc):
desc = wrap(desc)
self.name = nvl(desc.name, desc.type)
self.type = desc.type
self.min = desc.min
self.max = desc.max
self.interval = desc.interval
self.value = desc.value,
self.key = desc.key,
self.label = desc.label,
self.end = desc.end,
self.isFacet = nvl(desc.isFacet, False)
self.dimension = desc.dimension
@property
def dict(self):
return Struct(
type=self.type,
name=self.name,
partitions=self.partitions,
min=self.min,
max=self.max,
interval=self.interval,
value=self.value,
key=self.key,
label=self.label,
end=self.end,
isFacet=self.isFacet
)
def __json__(self):
return CNV.object2JSON(self.dict)
class ValueDomain(Domain):
def __new__(cls, **desc):
return object.__new__(ValueDomain)
def __init__(self, **desc):
Domain.__init__(self, **desc)
self.NULL = None
def compare(self, a, b):
return value_compare(a, b)
def getCanonicalPart(self, part):
return part
def getPartByKey(self, key):
return key
def getKey(self, part):
return part
def getEnd(self, value):
return value
class DefaultDomain(Domain):
"""
DOMAIN IS A LIST OF OBJECTS, EACH WITH A value PROPERTY
"""
def __new__(cls, **desc):
return object.__new__(DefaultDomain)
def __init__(self, **desc):
Domain.__init__(self, **desc)
self.NULL = Struct(value=None)
self.partitions = StructList()
self.map = dict()
self.map[None] = self.NULL
def compare(self, a, b):
return value_compare(a.value, b.value)
def getCanonicalPart(self, part):
return self.getPartByKey(part.value)
def getPartByKey(self, key):
canonical = self.map.get(key, None)
if canonical:
return canonical
canonical = Struct(name=key, value=key)
self.partitions.append(canonical)
self.map[key] = canonical
return canonical
def getKey(self, part):
return part.value
def getEnd(self, part):
return part.value
def getLabel(self, part):
return part.value
class SetDomain(Domain):
"""
DOMAIN IS A LIST OF OBJECTS, EACH WITH A value PROPERTY
"""
def __new__(cls, **desc):
return object.__new__(SetDomain)
def __init__(self, **desc):
Domain.__init__(self, **desc)
desc = wrap(desc)
self.NULL = Struct(value=None)
self.partitions = StructList()
if desc.partitions and desc.dimension.fields and len(desc.dimension.fields)>1:
self.map = UniqueIndex(keys=desc.dimension.fields)
elif desc.partitions and isinstance(desc.partitions[0][desc.key], dict):
keys = UNION(set(d[desc.key].keys()) for d in desc.partitions)
self.map = UniqueIndex(keys=keys)
else:
self.map = dict()
self.map[None] = self.NULL
self.label = nvl(self.label, "name")
if not isinstance(desc.partitions, list):
Log.error("expecting a list of partitions")
if isinstance(desc.partitions[0], basestring):
# ASSMUE PARTS ARE STRINGS, CONVERT TO REAL PART OBJECTS
for p in desc.partitions:
part = {"name": p, "value": p}
self.partitions.append(part)
self.map[p] = part
self.key = ("value", )
else:
if desc.key == None:
Log.error("Domains must have keys")
if not is_keyword(desc.key):
Log.error("scripts not supported yet")
self.key = desc.key
self.partitions = desc.partitions.copy()
for p in desc.partitions:
self.map[p[self.key]] = p
def compare(self, a, b):
return value_compare(self.getKey(a), self.getKey(b))
def getCanonicalPart(self, part):
return self.getPartByKey(part.value)
def getPartByKey(self, key):
try:
canonical = self.map.get(key, None)
if not canonical:
return self.NULL
return canonical
except Exception, e:
Log.error("problem", e)
def getKey(self, part):
return part[self.key]
def getEnd(self, part):
if self.value:
return part[self.value]
else:
return part
def getLabel(self, part):
return part[self.label]
def value_compare(a, b):
if a == None:
if b == None:
return 0
return -1
elif b == None:
return 1
if a > b:
return 1
elif a < b:
return -1
else:
return 0
keyword_pattern = re.compile(r"\w+(?:\.\w+)*")
def is_keyword(value):
if value == None:
return False
return True if keyword_pattern.match(value) else False

Просмотреть файл

@ -0,0 +1,188 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http:# mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
from ..cnv import CNV
from ..queries import MVEL
from ..queries.es_query_aggop import is_aggop, es_aggop
from ..queries.es_query_setop import is_fieldop, is_setop, is_deep, es_setop, es_deepop, es_fieldop
from ..queries.es_query_terms import es_terms, is_terms
from ..queries.es_query_terms_stats import es_terms_stats, is_terms_stats
from ..queries.es_query_util import aggregates, loadColumns
from . import Q
from ..queries.dimensions import Dimension
from ..queries.query import Query, _normalize_where
from ..env.logs import Log
from ..queries.MVEL import _MVEL
from ..struct import Struct, split_field, StructList, nvl
from ..structs.wraps import wrap, unwrap, listwrap
class ESQuery(object):
"""
SEND GENERAL Qb QUERIES TO ElasticSearch
"""
def __init__(self, es):
self.es = es
self.edges = Struct()
self.worker = None
self.ready=False
def __enter__(self):
self.ready = True
return self
def __exit__(self, type, value, traceback):
self.ready = False
if not self.worker:
return
if isinstance(value, Exception):
self.worker.stop()
self.worker.join()
else:
self.worker.join()
def query(self, _query):
if not self.ready:
Log.error("Must use with clause for any instance of ESQuery")
query = Query(_query, schema=self)
for s in listwrap(query.select):
if not aggregates[s.aggregate]:
Log.error("ES can not aggregate " + self.select[0].name + " because '" + self.select[0].aggregate + "' is not a recognized aggregate")
frum = query["from"]
if isinstance(frum, Query):
result = self.query(frum)
q2 = query.copy()
q2.frum = result
return Q.run(q2)
frum = loadColumns(self.es, query["from"])
mvel = _MVEL(frum)
if is_fieldop(query):
return es_fieldop(self.es, query)
elif is_deep(query):
return es_deepop(self.es, mvel, query)
elif is_setop(query):
return es_setop(self.es, mvel, query)
elif is_aggop(query):
return es_aggop(self.es, mvel, query)
elif is_terms(query):
return es_terms(self.es, mvel, query)
elif is_terms_stats(query):
return es_terms_stats(self, mvel, query)
Log.error("Can not handle")
def addDimension(self, dim):
if isinstance(dim, list):
Log.error("Expecting dimension to be a object, not a list:\n{{dim}}", {"dim":dim})
self._addDimension(dim, [])
def _addDimension(self, dim, path):
dim.full_name = dim.name
for e in dim.edges:
d = Dimension(e, dim, self)
self.edges[d.full_name] = d
def __getitem__(self, item):
f = split_field(item)
e = self.edges[f[0]]
for i in f[1::]:
e = e[i]
return e
def __getattr__(self, item):
return self.edges[item]
def normalize_edges(self, edges):
output = StructList()
for e in listwrap(edges):
output.extend(self._normalize_edge(e))
return output
def _normalize_edge(self, edge):
"""
RETURN A EDGE DEFINITION INTO A SIMPLE ARRAY OF PATH-LEAF
DEFINITIONS [ {"name":<pathA>, "value":<pathB>}, ... ]
USEFUL FOR DECLARING HIGH-LEVEL DIMENSIONS, AND RELIEVING LOW LEVEL PATH PAIRS
"""
if isinstance(edge, basestring):
e = self[edge]
if e:
domain = e.getDomain()
fields = domain.dimension.fields
if isinstance(fields, list):
if len(fields) == 1:
return [{"value": fields[0]}]
else:
return [{"name": (edge + "["+str(i)+"]"), "value": v} for i, v in enumerate(fields)]
elif isinstance(fields, dict):
return [{"name": (edge + "." + k), "value": v} for k, v in fields.items()]
else:
Log.error("do not know how to handle")
return [{
"name": edge,
"value": edge
}]
else:
return [{
"name": nvl(edge.name, edge.value),
"value": edge.value
}]
def update(self, command):
"""
EXPECTING command == {"set":term, "where":where}
THE set CLAUSE IS A DICT MAPPING NAMES TO VALUES
THE where CLAUSE IS AN ES FILTER
"""
command = wrap(command)
#GET IDS OF DOCUMENTS
results = self.es.search({
"fields": [],
"query": {"filtered": {
"query": {"match_all": {}},
"filter": _normalize_where(command.where, self)
}},
"size": 200000
})
# SCRIPT IS SAME FOR ALL (CAN ONLY HANDLE ASSIGNMENT TO CONSTANT)
scripts = StructList()
for k, v in command.set.items():
if not MVEL.isKeyword(k):
Log.error("Only support simple paths for now")
scripts.append("ctx._source."+k+" = "+MVEL.value2MVEL(v)+";")
script = "".join(scripts)
if results.hits.hits:
command = []
for id in results.hits.hits._id:
command.append({"update": {"_id": id}})
command.append({"script": script})
content = ("\n".join(CNV.object2JSON(c) for c in command)+"\n").encode('utf-8')
self.es._post(
self.es.path + "/_bulk",
data=content,
headers={"Content-Type": "application/json"}
)

Просмотреть файл

@ -0,0 +1,102 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http:# mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
from ..collections.matrix import Matrix
from ..collections import AND
from ..structs.wraps import listwrap
from ..struct import unwrap
from ..queries import es_query_util
from ..queries.es_query_util import aggregates, fix_es_stats, buildESQuery
from ..queries.filters import simplify
from ..queries import MVEL
from ..queries.cube import Cube
def is_aggop(query):
if not query.edges:
return True
return False
def es_aggop(es, mvel, query):
select = listwrap(query.select)
esQuery = buildESQuery(query)
isSimple = AND(aggregates[s.aggregate] == "count" for s in select)
if isSimple:
return es_countop(es, query) # SIMPLE, USE TERMS FACET INSTEAD
value2facet = dict() # ONLY ONE FACET NEEDED PER
name2facet = dict() # MAP name TO FACET WITH STATS
for s in select:
if s.value not in value2facet:
if MVEL.isKeyword(s.value):
unwrap(esQuery.facets)[s.name] = {
"statistical": {
"field": s.value
},
"facet_filter": simplify(query.where)
}
else:
unwrap(esQuery.facets)[s.name] = {
"statistical": {
"script": mvel.compile_expression(s.value, query)
},
"facet_filter": simplify(query.where)
}
value2facet[s.value] = s.name
name2facet[s.name] = value2facet[s.value]
data = es_query_util.post(es, esQuery, query.limit)
matricies = {s.name: Matrix(value=fix_es_stats(unwrap(data.facets)[s.name])[aggregates[s.aggregate]]) for s in select}
cube = Cube(query.select, [], matricies)
cube.frum = query
return cube
def es_countop(es, mvel, query):
"""
RETURN SINGLE COUNT
"""
select = listwrap(query.select)
esQuery = buildESQuery(query)
for s in select:
if MVEL.isKeyword(s.value):
esQuery.facets[s.name] = {
"terms": {
"field": s.value,
"size": query.limit,
},
"facet_filter":{"exists":{"field":s.value}}
}
else:
# COMPLICATED value IS PROBABLY A SCRIPT, USE IT
esQuery.facets[s.name] = {
"terms": {
"script_field": mvel.compile_expression(s.value, query),
"size": 200000
}
}
data = es_query_util.post(es, esQuery, query.limit)
matricies = {}
for s in select:
matricies[s.name] = Matrix(value=data.hits.facets[s.name].total)
cube = Cube(query.select, query.edges, matricies)
cube.frum = query
return cube

Просмотреть файл

@ -0,0 +1,256 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http:# mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
from .. import struct
from ..collections.matrix import Matrix
from ..collections import AND, SUM, OR
from ..structs.wraps import listwrap
from ..queries.es_query_util import aggregates
from ..queries import domains, es_query_util
from ..queries.filters import simplify, TRUE_FILTER
from ..env.logs import Log
from ..queries import MVEL, filters
from ..queries.cube import Cube
from ..struct import split_field, unwrap, nvl, StructList
def is_fieldop(query):
# THESE SMOOTH EDGES REQUIRE ALL DATA (SETOP)
select = listwrap(query.select)
if not query.edges:
isDeep = len(split_field(query.frum.name)) > 1 # LOOKING INTO NESTED WILL REQUIRE A SCRIPT
isSimple = AND(s.value != None and (s.value == "*" or isKeyword(s.value)) for s in select)
noAgg = AND(s.aggregate == "none" for s in select)
if not isDeep and isSimple and noAgg:
return True
else:
isSmooth = AND((e.domain.type in domains.ALGEBRAIC and e.domain.interval == "none") for e in query.edges)
if isSmooth:
return True
return False
def isKeyword(value):
if isinstance(value, dict):
return AND(isKeyword(v) for k, v in value.items())
if isinstance(value, list):
return AND(isKeyword(v) for v in value)
return MVEL.isKeyword(value)
def es_fieldop(es, query):
esQuery = es_query_util.buildESQuery(query)
select = listwrap(query.select)
esQuery.query = {
"filtered": {
"query": {
"match_all": {}
},
"filter": filters.simplify(query.where)
}
}
esQuery.size = nvl(query.limit, 200000)
esQuery.fields = StructList()
for s in select.value:
if s == "*":
esQuery.fields = None
elif isinstance(s, list):
esQuery.fields.extend(s)
elif isinstance(s, dict):
esQuery.fields.extend(s.values())
else:
esQuery.fields.append(s)
esQuery.sort = [{s.field: "asc" if s.sort >= 0 else "desc"} for s in query.sort]
data = es_query_util.post(es, esQuery, query.limit)
T = data.hits.hits
matricies = {}
for s in select:
if s.value == "*":
matricies[s.name] = Matrix.wrap([t._source for t in T])
elif isinstance(s.value, dict):
# for k, v in s.value.items():
# matricies[join_field(split_field(s.name)+[k])] = Matrix.wrap([unwrap(t.fields)[v] for t in T])
matricies[s.name] = Matrix.wrap([{k: unwrap(t.fields).get(v, None) for k, v in s.value.items()}for t in T])
elif isinstance(s.value, list):
matricies[s.name] = Matrix.wrap([tuple(unwrap(t.fields).get(ss, None) for ss in s.value) for t in T])
elif not s.value:
matricies[s.name] = Matrix.wrap([unwrap(t.fields).get(s.value, None) for t in T])
else:
try:
matricies[s.name] = Matrix.wrap([unwrap(t.fields).get(s.value, None) for t in T])
except Exception, e:
Log.error("", e)
cube = Cube(query.select, query.edges, matricies, frum=query)
cube.frum = query
return cube
def is_setop(query):
select = listwrap(query.select)
if not query.edges:
isDeep = len(split_field(query.frum.name)) > 1 # LOOKING INTO NESTED WILL REQUIRE A SCRIPT
simpleAgg = AND([s.aggregate in ("count", "none") for s in select]) # CONVERTING esfilter DEFINED PARTS WILL REQUIRE SCRIPT
# NO EDGES IMPLIES SIMPLER QUERIES: EITHER A SET OPERATION, OR RETURN SINGLE AGGREGATE
if simpleAgg or isDeep:
return True
else:
isSmooth = AND((e.domain.type in domains.ALGEBRAIC and e.domain.interval == "none") for e in query.edges)
if isSmooth:
return True
return False
def es_setop(es, mvel, query):
esQuery = es_query_util.buildESQuery(query)
select = listwrap(query.select)
isDeep = len(split_field(query.frum.name)) > 1 # LOOKING INTO NESTED WILL REQUIRE A SCRIPT
isComplex = OR([s.value == None and s.aggregate not in ("count", "none") for s in select]) # CONVERTING esfilter DEFINED PARTS WILL REQUIRE SCRIPT
if not isDeep and not isComplex and len(select) == 1:
if not select[0].value:
esQuery.query = {"filtered": {
"query": {"match_all": {}},
"filter": simplify(query.where)
}}
esQuery.size = 1 # PREVENT QUERY CHECKER FROM THROWING ERROR
elif MVEL.isKeyword(select[0].value):
esQuery.facets.mvel = {
"terms": {
"field": select[0].value,
"size": nvl(query.limit, 200000)
},
"facet_filter": simplify(query.where)
}
if query.sort:
s = query.sort
if len(s) > 1:
Log.error("can not sort by more than one field")
s0 = s[0]
if s0.field != select[0].value:
Log.error("can not sort by anything other than count, or term")
esQuery.facets.mvel.terms.order = "term" if s0.sort >= 0 else "reverse_term"
elif not isDeep:
simple_query = query.copy()
simple_query.where = TRUE_FILTER #THE FACET FILTER IS FASTER
esQuery.facets.mvel = {
"terms": {
"script_field": mvel.code(simple_query),
"size": nvl(simple_query.limit, 200000)
},
"facet_filter": simplify(query.where)
}
else:
esQuery.facets.mvel = {
"terms": {
"script_field": mvel.code(query),
"size": nvl(query.limit, 200000)
},
"facet_filter": simplify(query.where)
}
data = es_query_util.post(es, esQuery, query.limit)
if len(select) == 1:
if not select[0].value:
# SPECIAL CASE FOR SINGLE COUNT
output = Matrix(value=data.hits.total)
cube = Cube(query.select, [], {select[0].name: output})
elif MVEL.isKeyword(select[0].value):
# SPECIAL CASE FOR SINGLE TERM
T = data.facets.mvel.terms
output = Matrix.wrap([t.term for t in T])
cube = Cube(query.select, [], {select[0].name: output})
else:
data_list = MVEL.unpack_terms(data.facets.mvel, select)
if not data_list:
cube = Cube(select, [], {s.name: Matrix.wrap([]) for s in select})
else:
output = zip(*data_list)
cube = Cube(select, [], {s.name: Matrix(list=output[i]) for i, s in enumerate(select)})
cube.frum = query
return cube
def is_deep(query):
select = listwrap(query.select)
if len(select) > 1:
return False
if aggregates[select[0].aggregate] not in ("none", "count"):
return False
if len(query.edges)<=1:
return False
isDeep = len(split_field(query["from"].name)) > 1 # LOOKING INTO NESTED WILL REQUIRE A SCRIPT
if not isDeep:
return False # BETTER TO USE TERM QUERY
return True
def es_deepop(es, mvel, query):
esQuery = es_query_util.buildESQuery(query)
select = query.edges
temp_query = query.copy()
temp_query.select = select
temp_query.edges = StructList()
esQuery.facets.mvel = {
"terms": {
"script_field": mvel.code(temp_query),
"size": query.limit
},
"facet_filter": simplify(query.where)
}
data = es_query_util.post(es, esQuery, query.limit)
rows = MVEL.unpack_terms(data.facets.mvel, query.edges)
terms = zip(*rows)
# NUMBER ALL EDGES FOR Qb INDEXING
edges = query.edges
for f, e in enumerate(edges):
for r in terms[f]:
e.domain.getPartByKey(r)
e.index = f
for p, part in enumerate(e.domain.partitions):
part.dataIndex = p
e.domain.NULL.dataIndex = len(e.domain.partitions)
# MAKE CUBE
dims = [len(e.domain.partitions) for e in query.edges]
output = Matrix(*dims)
# FILL CUBE
for r in rows:
term_coord = [e.domain.getPartByKey(r[i]).dataIndex for i, e in enumerate(edges)]
output[term_coord] = SUM(output[term_coord], r[-1])
cube = Cube(query.select, query.edges, {query.select.name: output})
cube.frum = query
return cube

Просмотреть файл

@ -0,0 +1,151 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http:# mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
from .. import struct
from ..collections.matrix import Matrix
from ..collections import AND
from ..queries import Q
from ..queries import es_query_util
from ..queries.es_query_util import aggregates, buildESQuery, compileEdges2Term
from ..queries.filters import simplify
from ..queries.cube import Cube
from ..struct import nvl, StructList
from ..structs.wraps import wrap, listwrap
def is_terms(query):
select = listwrap(query.select)
isSimple = not query.select or AND(aggregates[s.aggregate] in ("none", "count") for s in select)
if isSimple:
return True
return False
def es_terms(es, mvel, query):
"""
RETURN LIST OF ALL EDGE QUERIES
EVERY FACET IS NAMED <select.name>, <c1>, ... <cN> WHERE <ci> ARE THE ELEMENT COORDINATES
WE TRY TO PACK DIMENSIONS INTO THE TERMS TO MINIMIZE THE CROSS-PRODUCT EXPLOSION
"""
if len(query.edges) == 2:
return _es_terms2(es, mvel, query)
select = listwrap(query.select)
esQuery = buildESQuery(query)
packed_term = compileEdges2Term(mvel, query.edges, wrap([]))
for s in select:
esQuery.facets[s.name] = {
"terms": {
"field": packed_term.field,
"script_field": packed_term.expression,
"size": nvl(query.limit, 200000)
},
"facet_filter": simplify(query.where)
}
term2Parts = packed_term.term2parts
data = es_query_util.post(es, esQuery, query.limit)
# GETTING ALL PARTS WILL EXPAND THE EDGES' DOMAINS
# BUT HOW TO UNPACK IT FROM THE term FASTER IS UNKNOWN
for k, f in data.facets.items():
for t in f.terms:
term2Parts(t.term)
# NUMBER ALL EDGES FOR Qb INDEXING
for f, e in enumerate(query.edges):
e.index = f
if e.domain.type in ["uid", "default"]:
# e.domain.partitions = Q.sort(e.domain.partitions, "value")
for p, part in enumerate(e.domain.partitions):
part.dataIndex = p
e.domain.NULL.dataIndex = len(e.domain.partitions)
# MAKE CUBE
output = {}
dims = [len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges]
for s in select:
output[s.name] = Matrix(*dims)
# FILL CUBE
# EXPECTING ONLY SELECT CLAUSE FACETS
for facetName, facet in data.facets.items():
for term in facet.terms:
term_coord = term2Parts(term.term).dataIndex
for s in select:
try:
output[s.name][term_coord] = term[aggregates[s.aggregate]]
except Exception, e:
#USUALLY CAUSED BY output[s.name] NOT BEING BIG ENOUGH TO HANDLE NULL COUNTS
pass
cube = Cube(query.select, query.edges, output)
cube.query = query
return cube
def _es_terms2(es, mvel, query):
"""
WE ASSUME THERE ARE JUST TWO EDGES, AND EACH HAS A SIMPLE value
"""
# REQUEST VALUES IN FIRST DIMENSION
q1 = query.copy()
q1.edges = query.edges[0:1:]
values1 = es_terms(es, mvel, q1).edges[0].domain.partitions.value
select = listwrap(query.select)
esQuery = buildESQuery(query)
for s in select:
for i, v in enumerate(values1):
esQuery.facets[s.name + "," + str(i)] = {
"terms": {
"field": query.edges[1].value,
"size": nvl(query.limit, 200000)
},
"facet_filter": simplify({"and": [
query.where,
{"term": {query.edges[0].value: v}}
]})
}
data = es_query_util.post(es, esQuery, query.limit)
# UNION ALL TERMS FROM SECOND DIMENSION
values2 = set()
for k, f in data.facets.items():
values2.update(f.terms.term)
values2 = Q.sort(values2)
term2index = {v: i for i, v in enumerate(values2)}
query.edges[1].domain.partitions = StructList([{"name": v, "value": v} for v in values2])
# MAKE CUBE
output = {}
dims = [len(values1), len(values2)]
for s in select:
output[s.name] = Matrix(*dims)
# FILL CUBE
# EXPECTING ONLY SELECT CLAUSE FACETS
for facetName, facet in data.facets.items():
coord = facetName.split(",")
s = [s for s in select if s.name == coord[0]][0]
i1 = int(coord[1])
for term in facet.terms:
i2 = term2index[term.term]
output[s.name][(i1, i2)] = term[aggregates[s.aggregate]]
cube = Cube(query.select, query.edges, output)
cube.query = query
return cube

Просмотреть файл

@ -0,0 +1,333 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http:# mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
from ..collections.matrix import Matrix
from ..collections import COUNT, PRODUCT
from ..queries import es_query_util
from ..queries.cube import Cube
from ..queries.es_query_util import aggregates, buildESQuery, compileEdges2Term
from ..queries.filters import simplify
from ..env.logs import Log
from ..queries import domains, MVEL, filters
from ..queries.MVEL import UID
from ..struct import nvl, StructList
from ..structs.wraps import wrap, listwrap
def is_terms_stats(query):
#ONLY ALLOWED ONE UNKNOWN DOMAIN
num_unknown = COUNT(1 for e in query.edges if e.domain.type not in domains.KNOWN)
if num_unknown <= 1:
if query.sort:
Log.error("terms_stats can not be sorted")
return True
return False
def es_terms_stats(esq, mvel, query):
select = listwrap(query.select)
facetEdges = [] # EDGES THAT WILL REQUIRE A FACET FOR EACH PART
termsEdges = StructList()
specialEdge = None
special_index = -1
# A SPECIAL EDGE IS ONE THAT HAS AN UNDEFINED NUMBER OF PARTITIONS AT QUERY TIME
# FIND THE specialEdge, IF ONE
for f, tedge in enumerate(query.edges):
if tedge.domain.type in domains.KNOWN:
for p, part in enumerate(tedge.domain.partitions):
part.dataIndex = p
# FACETS ARE ONLY REQUIRED IF SQL JOIN ON DOMAIN IS REQUIRED (RANGE QUERY)
# OR IF WE ARE NOT SIMPLY COUNTING
# OR IF NO SCRIPTING IS ALLOWED (SOME OTHER CODE IS RESPONSIBLE FOR SETTING isFacet)
# OR IF WE JUST WANT TO FORCE IT :)
# OF COURSE THE default EDGE IS NOT EXPLICIT, SO MUST BE A TERM
facetEdges.append(tedge)
else:
if specialEdge:
Log.error("There is more than one open-ended edge: self can not be handled")
specialEdge = tedge
special_index = f
termsEdges.append(tedge)
if not specialEdge:
# WE SERIOUSLY WANT A SPECIAL EDGE, OTHERWISE WE WILL HAVE TOO MANY FACETS
#THE BIGGEST EDGE MAY BE COLLAPSED TO A TERM, MAYBE?
num_parts = 0
special_index = -1
for i, e in enumerate(facetEdges):
l = len(e.domain.partitions)
if ((e.value and MVEL.isKeyword(e.value)) or len(e.domain.dimension.fields) == 1) and l > num_parts:
num_parts = l
specialEdge = e
special_index = i
facetEdges.pop(special_index)
termsEdges.append(specialEdge)
total_facets = PRODUCT(len(f.domain.partitions) for f in facetEdges)*len(select)
if total_facets > 100:
# WE GOT A PROBLEM, LETS COUNT THE SIZE OF REALITY:
counts = esq.query({
"from": query.frum,
"select": {"aggregate": "count"},
"edges": facetEdges,
"where": query.where,
"limit": query.limit
})
esFacets = []
def add_facet(value, coord, cube):
if value:
esFacets.append([e.domain.partitions[coord[i]] for i, e in enumerate(facetEdges)])
counts["count"].forall(add_facet)
Log.note("{{theory_count}} theoretical combinations, {{real_count}} actual combos found", {"real_count": len(esFacets), "theory_count":total_facets})
if not esFacets:
# MAKE EMPTY CUBE
matricies = {}
dims = [len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges]
for s in select:
matricies[s.name] = Matrix(*dims)
cube = Cube(query.select, query.edges, matricies)
cube.frum = query
return cube
else:
# GENERATE ALL COMBOS
esFacets = getAllEdges(facetEdges)
calcTerm = compileEdges2Term(mvel, termsEdges, StructList())
term2parts = calcTerm.term2parts
if len(esFacets) * len(select) > 1000:
# WE HAVE SOME SERIOUS PERMUTATIONS, WE MUST ISSUE MULTIPLE QUERIES
pass
esQuery = buildESQuery(query)
for s in select:
for parts in esFacets:
condition = StructList()
constants = StructList()
name = [s.name]
for f, fedge in enumerate(facetEdges):
name.append(str(parts[f].dataIndex))
condition.append(buildCondition(mvel, fedge, parts[f]))
constants.append({"name": fedge.domain.name, "value": parts[f]})
condition.append(query.where)
name = ",".join(name)
esQuery.facets[name] = {
"terms_stats": {
"key_field": calcTerm.field,
"value_field": s.value if MVEL.isKeyword(s.value) else None,
"value_script": mvel.compile_expression(s.value) if not MVEL.isKeyword(s.value) else None,
"size": nvl(query.limit, 200000)
}
}
if condition:
esQuery.facets[name].facet_filter = simplify({"and": condition})
data = es_query_util.post(esq.es, esQuery, query.limit)
if specialEdge.domain.type not in domains.KNOWN:
#WE BUILD THE PARTS BASED ON THE RESULTS WE RECEIVED
partitions = StructList()
map = {}
for facetName, parts in data.facets.items():
for stats in parts.terms:
if not map[stats]:
part = {"value": stats, "name": stats}
partitions.append(part)
map[stats] = part
partitions.sort(specialEdge.domain.compare)
for p, part in enumerate(partitions):
part.dataIndex = p
specialEdge.domain.map = map
specialEdge.domain.partitions = partitions
# MAKE CUBE
matricies = {}
dims = [len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges]
for s in select:
matricies[s.name] = Matrix(*dims)
name2agg = {s.name: aggregates[s.aggregate] for s in select}
# FILL CUBE
for edgeName, parts in data.facets.items():
temp = edgeName.split(",")
pre_coord = tuple(int(c) for c in temp[1:])
sname = temp[0]
for stats in parts.terms:
if specialEdge:
special = term2parts(stats.term)[0]
coord = pre_coord[:special_index]+(special.dataIndex, )+pre_coord[special_index:]
else:
coord = pre_coord
matricies[sname][coord] = stats[name2agg[sname]]
cube = Cube(query.select, query.edges, matricies)
cube.frum = query
return cube
def register_script_field(esQuery, code):
if not esQuery.script_fields:
esQuery.script_fields = {}
#IF CODE IS IDENTICAL, THEN USE THE EXISTING SCRIPT
for n, c in esQuery.script_fields.items():
if c.script == code:
return n
name = "script" + UID()
esQuery.script_fields[name].script = code
return name
def getAllEdges(facetEdges):
if not facetEdges:
return [()]
return _getAllEdges(facetEdges, 0)
def _getAllEdges(facetEdges, edgeDepth):
"""
RETURN ALL PARTITION COMBINATIONS: A LIST OF ORDERED TUPLES
"""
if edgeDepth == len(facetEdges):
return [()]
edge = facetEdges[edgeDepth]
deeper = _getAllEdges(facetEdges, edgeDepth + 1)
output = StructList()
partitions = edge.domain.partitions
for part in partitions:
for deep in deeper:
output.append((part,) + deep)
return output
def buildCondition(mvel, edge, partition):
"""
RETURN AN ES FILTER OBJECT
"""
output = {}
if edge.domain.isFacet:
# MUST USE THIS' esFacet
condition = wrap(nvl(partition.where, {"and": []}))
if partition.min and partition.max and MVEL.isKeyword(edge.value):
condition["and"].append({
"range": {edge.value: {"gte": partition.min, "lt": partition.max}}
})
# ES WILL FREAK OUT IF WE SEND {"not":{"and":x}} (OR SOMETHING LIKE THAT)
return filters.simplify(condition)
elif edge.range:
# THESE REALLY NEED FACETS TO PERFORM THE JOIN-TO-DOMAIN
# USE MVEL CODE
if edge.domain.type in domains.ALGEBRAIC:
output = {"and": []}
if edge.range.mode and edge.range.mode == "inclusive":
# IF THE range AND THE partition OVERLAP, THEN MATCH IS MADE
if MVEL.isKeyword(edge.range.min):
output["and"].append({"range": {edge.range.min: {"lt": MVEL.value2value(partition.max)}}})
else:
# WHOA!! SUPER SLOW!!
output["and"].append({"script": {"script": mvel.compile_expression(
edge.range.min + " < " + MVEL.value2MVEL(partition.max)
)}})
if MVEL.isKeyword(edge.range.max):
output["and"].append({"or": [
{"missing": {"field": edge.range.max}},
{"range": {edge.range.max, {"gt": MVEL.value2value(partition.min)}}}
]})
else:
# WHOA!! SUPER SLOW!!
output["and"].append({"script": {"script": mvel.compile_expression(
edge.range.max + " > " + MVEL.value2MVEL(partition.min))}})
else:
# SNAPSHOT - IF range INCLUDES partition.min, THEN MATCH IS MADE
if MVEL.isKeyword(edge.range.min):
output["and"].append({"range": {edge.range.min: {"lte": MVEL.value2value(partition.min)}}})
else:
# WHOA!! SUPER SLOW!!
output["and"].append({"script": {"script": mvel.compile_expression(
edge.range.min + "<=" + MVEL.value2MVEL(partition.min)
)}})
if MVEL.isKeyword(edge.range.max):
output["and"].append({"or": [
{"missing": {"field": edge.range.max}},
{"range": {edge.range.max, {"gte": MVEL.value2value(partition.min)}}}
]})
else:
# WHOA!! SUPER SLOW!!
output["and"].append({"script": {"script": mvel.compile_expression(
MVEL.value2MVEL(partition.min) + " <= " + edge.range.max
)}})
return output
else:
Log.error("Do not know how to handle range query on non-continuous domain")
elif not edge.value:
# MUST USE THIS' esFacet, AND NOT(ALL THOSE ABOVE)
return partition.esfilter
elif MVEL.isKeyword(edge.value):
# USE FAST ES SYNTAX
if edge.domain.type in domains.ALGEBRAIC:
output.range = {}
output.range[edge.value] = {"gte": MVEL.value2query(partition.min), "lt": MVEL.value2query(partition.max)}
elif edge.domain.type == "set":
if partition.value:
if partition.value != edge.domain.getKey(partition):
Log.error("please ensure the key attribute of the domain matches the value attribute of all partitions, if only because we are now using the former")
# DEFAULT TO USING THE .value ATTRIBUTE, IF ONLY BECAUSE OF LEGACY REASONS
output.term = {edge.value: partition.value}
else:
output.term = {edge.value: edge.domain.getKey(partition)}
elif edge.domain.type == "default":
output.term = dict()
output.term[edge.value] = partition.value
else:
Log.error("Edge \"" + edge.name + "\" is not supported")
return output
else:
# USE MVEL CODE
if edge.domain.type in domains.ALGEBRAIC:
output.script = {"script": edge.value + ">=" + MVEL.value2MVEL(partition.min) + " and " + edge.value + "<" + MVEL.value2MVEL(partition.max)}
else:
output.script = {"script": "( " + edge.value + " ) ==" + MVEL.value2MVEL(partition.value)}
code = MVEL.addFunctions(output.script.script)
output.script.script = code.head + code.body
return output

Просмотреть файл

@ -0,0 +1,498 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http:# mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
from datetime import datetime
from .. import struct
from ..cnv import CNV
from .. import strings
from ..collections import COUNT
from ..maths import stats
from ..env.elasticsearch import ElasticSearch
from ..env.logs import Log
from ..maths import Math
from ..queries import domains, MVEL, filters
from ..struct import nvl, StructList, Struct, split_field, join_field
from ..structs.wraps import wrap
from ..times import durations
TrueFilter = {"match_all": {}}
DEBUG = False
INDEX_CACHE = {} # MATCH NAMES TO FULL CONNECTION INFO
def loadColumns(es, frum):
"""
ENSURE COLUMNS FOR GIVEN INDEX/QUERY ARE LOADED, AND MVEL COMPILATION WORKS BETTER
"""
if isinstance(frum, basestring):
if frum in INDEX_CACHE:
return INDEX_CACHE[frum]
frum = Struct(
name=frum
)
else:
if not frum.name:
Log.error("Expecting from clause to have a name")
if frum.name in INDEX_CACHE:
return INDEX_CACHE[frum.name]
# FILL frum WITH DEFAULTS FROM es.settings
struct.set_default(frum, es.settings)
if not frum.host:
Log.error("must have host defined")
#DETERMINE IF THE es IS FUNCTIONALLY DIFFERENT
diff = False
for k, v in es.settings.items():
if k != "name" and v != frum[k]:
diff = True
if diff:
es = ElasticSearch(frum)
output = wrap(frum).copy()
schema = es.get_schema()
properties = schema.properties
output.es = es
root = split_field(frum.name)[0]
if root != frum.name:
INDEX_CACHE[frum.name] = output
loadColumns(es, root)
else:
INDEX_CACHE[root] = output
output.columns = parseColumns(frum.index, root, properties)
return output
def post(es, esQuery, limit):
if not esQuery.facets and esQuery.size == 0:
Log.error("ESQuery is sending no facets")
# DO NOT KNOW WHY THIS WAS HERE
# if isinstance(query.select, list) or len(query.edges) and not esQuery.facets.keys and esQuery.size == 0:
# Log.error("ESQuery is sending no facets")
postResult = None
try:
postResult = es.search(esQuery)
for facetName, f in postResult.facets:
if f._type == "statistical":
return None
if not f.terms:
return None
if not DEBUG and not limit and len(f.terms) == limit:
Log.error("Not all data delivered (" + str(len(f.terms)) + "/" + str(f.total) + ") try smaller range")
except Exception, e:
Log.error("Error with ESQuery", e)
return postResult
def buildESQuery(query):
output = wrap({
"query": {"match_all": {}},
"from": 0,
"size": 100 if DEBUG else 0,
"sort": [],
"facets": {
}
})
if DEBUG:
# TO LIMIT RECORDS TO WHAT'S IN FACETS
output.query = {
"filtered": {
"query": {
"match_all": {}
},
"filter": filters.simplify(query.where)
}
}
return output
def parseColumns(index_name, parent_path, esProperties):
"""
RETURN THE COLUMN DEFINITIONS IN THE GIVEN esProperties OBJECT
"""
columns = StructList()
for name, property in esProperties.items():
if parent_path:
path = join_field(split_field(parent_path) + [name])
else:
path = name
childColumns = None
if property.type == "nested" and property.properties:
# NESTED TYPE IS A NEW TYPE DEFINITION
if path not in INDEX_CACHE:
INDEX_CACHE[path] = INDEX_CACHE[parent_path].copy()
INDEX_CACHE[path].name = path
INDEX_CACHE[path].columns = childColumns
columns.append({
"name": struct.join_field(split_field(path)[1::]),
"type": property.type,
"useSource": True
})
continue
if property.properties:
childColumns = parseColumns(index_name, path, property.properties)
columns.extend(childColumns)
columns.append({
"name": join_field(split_field(path)[1::]),
"type": "object",
"useSource": True
})
if property.dynamic:
continue
if not property.type:
continue
if property.type == "multi_field":
property.type = property.fields[name].type # PULL DEFAULT TYPE
for i, n, p in enumerate(property.fields):
if n == name:
# DEFAULT
columns.append({"name": struct.join_field(split_field(path)[1::]), "type": p.type, "useSource": p.index == "no"})
else:
columns.append({"name": struct.join_field(split_field(path)[1::]) + "\\." + n, "type": p.type, "useSource": p.index == "no"})
continue
if property.type in ["string", "boolean", "integer", "date", "long", "double"]:
columns.append({
"name": struct.join_field(split_field(path)[1::]),
"type": property.type,
"useSource": property.index == "no"
})
if property.index_name and name != property.index_name:
columns.append({
"name": property.index_name,
"type": property.type,
"useSource": property.index == "no"
})
elif property.enabled == False:
columns.append({
"name": struct.join_field(split_field(path)[1::]),
"type": property.type,
"useSource": "yes"
})
else:
Log.warning("unknown type {{type}} for property {{path}}", {"type": property.type, "path": path})
# SPECIAL CASE FOR PROPERTIES THAT WILL CAUSE OutOfMemory EXCEPTIONS
for c in columns:
if name == "bugs" and (c.name == "dependson" or c.name == "blocked"):
c.useSource = True
return columns
def compileTime2Term(edge):
"""
RETURN MVEL CODE THAT MAPS TIME AND DURATION DOMAINS DOWN TO AN INTEGER AND
AND THE JAVASCRIPT THAT WILL TURN THAT INTEGER BACK INTO A PARTITION (INCLUDING NULLS)
"""
if edge.esscript:
Log.error("edge script not supported yet")
# IS THERE A LIMIT ON THE DOMAIN?
numPartitions = len(edge.domain.partitions)
value = edge.value
if MVEL.isKeyword(value):
value = "doc[\"" + value + "\"].value"
nullTest = compileNullTest(edge)
ref = nvl(edge.domain.min, edge.domain.max, datetime(2000, 1, 1))
if edge.domain.interval.month > 0:
offset = ref.subtract(ref.floorMonth(), durations.DAY).milli
if offset > durations.DAY.milli * 28:
offset = ref.subtract(ref.ceilingMonth(), durations.DAY).milli
partition2int = "milli2Month(" + value + ", " + MVEL.value2MVEL(offset) + ")"
partition2int = "((" + nullTest + ") ? 0 : " + partition2int + ")"
def int2Partition(value):
if Math.round(value) == 0:
return edge.domain.NULL
d = datetime(str(value)[:4:], str(value).right(2), 1)
d = d.addMilli(offset)
return edge.domain.getPartByKey(d)
else:
partition2int = "Math.floor((" + value + "-" + MVEL.value2MVEL(ref) + ")/" + edge.domain.interval.milli + ")"
partition2int = "((" + nullTest + ") ? " + numPartitions + " : " + partition2int + ")"
def int2Partition(value):
if Math.round(value) == numPartitions:
return edge.domain.NULL
return edge.domain.getPartByKey(ref.add(edge.domain.interval.multiply(value)))
return Struct(toTerm={"head": "", "body": partition2int}, fromTerm=int2Partition)
# RETURN MVEL CODE THAT MAPS DURATION DOMAINS DOWN TO AN INTEGER AND
# AND THE JAVASCRIPT THAT WILL TURN THAT INTEGER BACK INTO A PARTITION (INCLUDING NULLS)
def compileDuration2Term(edge):
if edge.esscript:
Log.error("edge script not supported yet")
# IS THERE A LIMIT ON THE DOMAIN?
numPartitions = len(edge.domain.partitions)
value = edge.value
if MVEL.isKeyword(value):
value = "doc[\"" + value + "\"].value"
ref = nvl(edge.domain.min, edge.domain.max, durations.ZERO)
nullTest = compileNullTest(edge)
ms = edge.domain.interval.milli
if edge.domain.interval.month > 0:
ms = durations.YEAR.milli / 12 * edge.domain.interval.month
partition2int = "Math.floor((" + value + "-" + MVEL.value2MVEL(ref) + ")/" + ms + ")"
partition2int = "((" + nullTest + ") ? " + numPartitions + " : " + partition2int + ")"
def int2Partition(value):
if Math.round(value) == numPartitions:
return edge.domain.NULL
return edge.domain.getPartByKey(ref.add(edge.domain.interval.multiply(value)))
return Struct(toTerm={"head": "", "body": partition2int}, fromTerm=int2Partition)
# RETURN MVEL CODE THAT MAPS THE numeric DOMAIN DOWN TO AN INTEGER AND
# AND THE JAVASCRIPT THAT WILL TURN THAT INTEGER BACK INTO A PARTITION (INCLUDING NULLS)
def compileNumeric2Term(edge):
if edge.script:
Log.error("edge script not supported yet")
if edge.domain.type != "numeric" and edge.domain.type != "count":
Log.error("can only translate numeric domains")
numPartitions = len(edge.domain.partitions)
value = edge.value
if MVEL.isKeyword(value):
value = "doc[\"" + value + "\"].value"
if not edge.domain.max:
if not edge.domain.min:
ref = 0
partition2int = "Math.floor(" + value + ")/" + MVEL.value2MVEL(edge.domain.interval) + ")"
nullTest = "false"
else:
ref = MVEL.value2MVEL(edge.domain.min)
partition2int = "Math.floor((" + value + "-" + ref + ")/" + MVEL.value2MVEL(edge.domain.interval) + ")"
nullTest = "" + value + "<" + ref
elif not edge.domain.min:
ref = MVEL.value2MVEL(edge.domain.max)
partition2int = "Math.floor((" + value + "-" + ref + ")/" + MVEL.value2MVEL(edge.domain.interval) + ")"
nullTest = "" + value + ">=" + ref
else:
top = MVEL.value2MVEL(edge.domain.max)
ref = MVEL.value2MVEL(edge.domain.min)
partition2int = "Math.floor((" + value + "-" + ref + ")/" + MVEL.value2MVEL(edge.domain.interval) + ")"
nullTest = "(" + value + "<" + ref + ") or (" + value + ">=" + top + ")"
partition2int = "((" + nullTest + ") ? " + numPartitions + " : " + partition2int + ")"
offset = CNV.value2int(ref)
def int2Partition(value):
if Math.round(value) == numPartitions:
return edge.domain.NULL
return edge.domain.getPartByKey((value * edge.domain.interval) + offset)
return Struct(toTerm={"head": "", "body": partition2int}, fromTerm=int2Partition)
def compileString2Term(edge):
if edge.esscript:
Log.error("edge script not supported yet")
value = edge.value
if MVEL.isKeyword(value):
value = strings.expand_template("getDocValue({{path}})", {"path": CNV.string2quote(value)})
else:
Log.error("not handled")
def fromTerm(value):
return edge.domain.getPartByKey(value)
return Struct(
toTerm={"head": "", "body": value},
fromTerm=fromTerm
)
def compileNullTest(edge):
"""
RETURN A MVEL EXPRESSION THAT WILL EVALUATE TO true FOR OUT-OF-BOUNDS
"""
if edge.domain.type not in domains.ALGEBRAIC:
Log.error("can only translate time and duration domains")
# IS THERE A LIMIT ON THE DOMAIN?
value = edge.value
if MVEL.isKeyword(value):
value = "doc[\"" + value + "\"].value"
if not edge.domain.max:
if not edge.domain.min:
return False
bot = MVEL.value2MVEL(edge.domain.min)
nullTest = "" + value + "<" + bot
elif not edge.domain.min:
top = MVEL.value2MVEL(edge.domain.max)
nullTest = "" + value + ">=" + top
else:
top = MVEL.value2MVEL(edge.domain.max)
bot = MVEL.value2MVEL(edge.domain.min)
nullTest = "(" + value + "<" + bot + ") or (" + value + ">=" + top + ")"
return nullTest
def compileEdges2Term(mvel_compiler, edges, constants):
"""
TERMS ARE ALWAYS ESCAPED SO THEY CAN BE COMPOUNDED WITH PIPE (|)
GIVE MVEL CODE THAT REDUCES A UNIQUE TUPLE OF PARTITIONS DOWN TO A UNIQUE TERM
GIVE LAMBDA THAT WILL CONVERT THE TERM BACK INTO THE TUPLE
RETURNS TUPLE OBJECT WITH "type" and "value" ATTRIBUTES.
"type" CAN HAVE A VALUE OF "script", "field" OR "count"
CAN USE THE constants (name, value pairs)
"""
# IF THE QUERY IS SIMPLE ENOUGH, THEN DO NOT USE TERM PACKING
edge0 = edges[0]
if len(edges) == 1 and edge0.domain.type in ["set", "default"]:
# THE TERM RETURNED WILL BE A MEMBER OF THE GIVEN SET
def temp(term):
return StructList([edge0.domain.getPartByKey(term)])
if edge0.value and MVEL.isKeyword(edge0.value):
return Struct(
field=edge0.value,
term2parts=temp
)
elif COUNT(edge0.domain.dimension.fields) == 1:
return Struct(
field=edge0.domain.dimension.fields[0],
term2parts=temp
)
elif not edge0.value and edge0.domain.partitions:
script = mvel_compiler.Parts2TermScript(edge0.domain)
return Struct(
expression=script,
term2parts=temp
)
else:
return Struct(
expression=mvel_compiler.compile_expression(edge0.value, constants),
term2parts=temp
)
mvel_terms = [] # FUNCTION TO PACK TERMS
fromTerm2Part = [] # UNPACK TERMS BACK TO PARTS
for e in edges:
domain = e.domain
fields = domain.dimension.fields
if not e.value and fields:
code, decode = mvel_compiler.Parts2Term(e.domain)
t = Struct(
toTerm=code,
fromTerm=decode
)
elif fields:
Log.error("not expected")
elif e.domain.type == "time":
t = compileTime2Term(e)
elif e.domain.type == "duration":
t = compileDuration2Term(e)
elif e.domain.type in domains.ALGEBRAIC:
t = compileNumeric2Term(e)
elif e.domain.type == "set" and not fields:
def fromTerm(term):
return e.domain.getPartByKey(term)
code, decode = mvel_compiler.Parts2Term(e.domain)
t = Struct(
toTerm=code,
fromTerm=decode
)
else:
t = compileString2Term(e)
if not t.toTerm.body:
mvel_compiler.Parts2Term(e.domain)
Log.error("")
fromTerm2Part.append(t.fromTerm)
mvel_terms.append(t.toTerm.body)
# REGISTER THE DECODE FUNCTION
def temp(term):
terms = term.split('|')
output = StructList([t2p(t) for t, t2p in zip(terms, fromTerm2Part)])
return output
return Struct(
expression=mvel_compiler.compile_expression("+'|'+".join(mvel_terms), constants),
term2parts=temp
)
def fix_es_stats(s):
"""
ES RETURNS BAD DEFAULT VALUES FOR STATS
"""
s = wrap(s)
if s.count == 0:
return stats.zero
return s
#MAP NAME TO SQL FUNCTION
aggregates = {
"none": "none",
"one": "count",
"sum": "total",
"add": "total",
"count": "count",
"maximum": "max",
"minimum": "min",
"max": "max",
"min": "min",
"mean": "mean",
"average": "mean",
"avg": "mean",
"N": "count",
"X0": "count",
"X1": "total",
"X2": "sum_of_squares",
"std": "std_deviation",
"stddev": "std_deviation",
"var": "variance",
"variance": "variance"
}

Просмотреть файл

@ -0,0 +1,148 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http:# mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
from ..structs.wraps import wrap
TRUE_FILTER = True
FALSE_FILTER = False
def simplify(esfilter):
output = normalize(esfilter)
if output is TRUE_FILTER:
return {"match_all": {}}
output.isNormal = None
return output
def removeOr(esfilter):
if esfilter["not"]:
return {"not": removeOr(esfilter["not"])}
if esfilter["and"]:
return {"and": [removeOr(v) for v in esfilter["and"]]}
if esfilter["or"]: # CONVERT OR TO NOT.AND.NOT
return {"not": {"and": [{"not": removeOr(v)} for v in esfilter["or"]]}}
return esfilter
def normalize(esfilter):
"""
SIMPLFY THE LOGIC EXPRESSION
"""
return wrap(_normalize(wrap(esfilter)))
def _normalize(esfilter):
"""
DO NOT USE Structs, WE ARE SPENDING TOO MUCH TIME WRAPPING/UNWRAPPING
REALLY, WE JUST COLLAPSE CASCADING and AND or FILTERS
"""
if esfilter is TRUE_FILTER or esfilter is FALSE_FILTER or esfilter.isNormal:
return esfilter
# Log.note("from: " + CNV.object2JSON(esfilter))
isDiff = True
while isDiff:
isDiff = False
if esfilter["and"]:
output = []
for a in esfilter["and"]:
if isinstance(a, (list, set)):
from dzAlerts.util.env.logs import Log
Log.error("and clause is not allowed a list inside a list")
a_ = normalize(a)
if a_ is not a:
isDiff = True
a = a_
if a == TRUE_FILTER:
isDiff = True
continue
if a == FALSE_FILTER:
return FALSE_FILTER
if a.get("and", None):
isDiff = True
a.isNormal = None
output.extend(a.get("and", None))
else:
a.isNormal = None
output.append(a)
if not output:
return TRUE_FILTER
elif len(output) == 1:
# output[0].isNormal = True
esfilter = output[0]
break
elif isDiff:
esfilter = wrap({"and": output})
continue
if esfilter["or"]:
output = []
for a in esfilter["or"]:
a_ = _normalize(a)
if a_ is not a:
isDiff = True
a = a_
if a == TRUE_FILTER:
return TRUE_FILTER
if a == FALSE_FILTER:
isDiff = True
continue
if a.get("or", None):
a.isNormal = None
isDiff = True
output.extend(a["or"])
else:
a.isNormal = None
output.append(a)
if not output:
return FALSE_FILTER
elif len(output) == 1:
esfilter = output[0]
break
elif isDiff:
esfilter = wrap({"or": output})
continue
if esfilter.term != None:
if esfilter.term.keys():
esfilter.isNormal = True
return esfilter
else:
return TRUE_FILTER
if esfilter.terms != None:
for k, v in esfilter.terms.items():
if len(v) > 0:
esfilter.isNormal = True
return esfilter
return FALSE_FILTER
if esfilter["not"] != None:
_sub = esfilter["not"]
sub = _normalize(_sub)
if sub is FALSE_FILTER:
return TRUE_FILTER
elif sub is TRUE_FILTER:
return FALSE_FILTER
elif sub is not _sub:
sub.isNormal = None
return wrap({"not": sub, "isNormal": True})
else:
sub.isNormal = None
esfilter.isNormal = True
return esfilter

Просмотреть файл

@ -0,0 +1,87 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
from ..collections import MIN
from ..env.logs import Log
from ..struct import nvl, split_field, StructList
from ..structs.wraps import wrap
class FlatList(list):
"""
FlatList IS A RESULT OF FILTERING SETS OF TREES
WE SAVED OURSELVES FROM COPYING ALL OBJECTS IN ALL PATHS OF ALL TREES,
BUT WE ARE LEFT WITH THIS LIST OF TUPLES THAT POINT TO THE SAME
"""
def __init__(self, path, data):
"""
data IS A LIST OF TUPLES
EACH TUPLE IS THE SEQUENCE OF OBJECTS FOUND ALONG A PATH IN A TREE
IT IS EXPECTED len(data[i]) == len(path)+1 (data[i][0] IS THE ORIGINAL ROW OBJECT)
"""
list.__init__(self)
self.data = data
self.path = path
def __len__(self):
return len(self.data)
def __iter__(self):
"""
WE ARE NOW DOOMED TO COPY THE RECORDS (BECAUSE LISTS DOWN THE PATH ARE SPECIFIC ELEMENTS)
"""
for d in self.data:
r = d[-1]
for i in range(len(self.path)):
temp = dict(d[-i - 2])
temp[self.path[-i - 1]] = r
r = temp
yield r
def select(self, field_name):
if isinstance(field_name, dict):
field_name=field_name.value
if isinstance(field_name, basestring):
# RETURN LIST OF VALUES
if len(split_field(field_name)) == 1:
if self.path[0] == field_name:
return [d[1] for d in self.data]
else:
return [d[0][field_name] for d in self.data]
else:
keys = split_field(field_name)
depth = nvl(MIN([i for i, (k, p) in enumerate(zip(keys, self.path)) if k != p]), len(self.path)) # LENGTH OF COMMON PREFIX
short_keys = keys[depth:]
output = StructList()
_select1((wrap(d[depth]) for d in self.data), short_keys, 0, output)
return output
Log.error("multiselect over FlatList not supported")
def _select1(data, field, depth, output):
"""
SELECT A SINGLE FIELD
"""
for d in data:
for i, f in enumerate(field[depth:]):
d = d[f]
if d == None:
output.append(None)
break
elif isinstance(d, list):
_select1(d, field, i + 1, output)
break
else:
output.append(d)

Просмотреть файл

@ -0,0 +1,169 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
import sys
from .cube import Cube
from ..queries.index import value2key
from ..struct import StructList, Struct
from ..structs.wraps import listwrap, wrap
from ..env.logs import Log
from ..collections.multiset import Multiset
def groupby(data, keys=None, size=None, min_size=None, max_size=None, contiguous=False):
"""
return list of (keys, values) pairs where
group by the set of set of keys
values IS LIST OF ALL data that has those keys
contiguous - MAINTAIN THE ORDER OF THE DATA, STARTING THE NEW GROUP WHEN THE SELECTOR CHANGES
"""
if size != None or min_size != None or max_size != None:
if size != None:
max_size = size
return groupby_min_max_size(data, min_size=min_size, max_size=max_size)
if isinstance(data, Cube):
return data.groupby(keys)
def value2hash(x):
return value2key(keys, x)
def get_keys(d):
output = Struct()
for k in keys:
output[k] = d[k]
return output
if contiguous:
try:
if not data:
return wrap([])
agg = StructList()
acc = StructList()
curr_key = value2hash(data[0])
for d in data:
key = value2key(keys, d)
if key != curr_key:
agg.append((get_keys(acc[0]), acc))
curr_key = key
acc = [d]
else:
acc.append(d)
agg.append((get_keys(acc[0]), acc))
return wrap(agg)
except Exception, e:
Log.error("Problem grouping contiguous values", e)
try:
agg = {}
for d in data:
key = value2key(keys, d)
pair = agg.get(key, None)
if pair is None:
pair = (get_keys(d), StructList())
agg[key] = pair
pair[1].append(d)
return agg.values()
except Exception, e:
Log.error("Problem grouping", e)
def groupby_size(data, size):
if hasattr(data, "next"):
iterator = data
elif hasattr(data, "__iter__"):
iterator = data.__iter__()
else:
Log.error("do not know how to handle this type")
done = StructList()
def more():
output = StructList()
for i in range(size):
try:
output.append(iterator.next())
except StopIteration:
done.append(True)
break
return output
#THIS IS LAZY
i = 0
while True:
output = more()
yield (i, output)
if len(done) > 0:
break
i += 1
def groupby_Multiset(data, min_size, max_size):
# GROUP multiset BASED ON POPULATION OF EACH KEY, TRYING TO STAY IN min/max LIMITS
if min_size == None:
min_size = 0
total = 0
i = 0
g = list()
for k, c in data.items():
if total < min_size or total + c < max_size:
total += c
g.append(k)
elif total < max_size:
yield (i, g)
i += 1
total = c
g = [k]
if total >= max_size:
Log.error("({{min}}, {{max}}) range is too strict given step of {{increment}}", {
"min": min_size, "max": max_size, "increment": c
})
if g:
yield (i, g)
def groupby_min_max_size(data, min_size=0, max_size=None, ):
if max_size == None:
max_size = sys.maxint
if isinstance(data, (bytearray, basestring, list)):
def _iter():
num = (len(data) - 1) / max_size + 1
for i in range(0, num):
output = (i, data[i * max_size:i * max_size + max_size:])
yield output
return _iter()
elif hasattr(data, "__iter__"):
def _iter():
g = 0
out = StructList()
for i, d in enumerate(data):
out.append(d)
if (i + 1) % max_size == 0:
yield g, out
g += 1
out = StructList()
if out:
yield g, out
return _iter()
elif not isinstance(data, Multiset):
return groupby_size(data, max_size)
else:
return groupby_Multiset(data, min_size, max_size)

174
tests/util/queries/index.py Normal file
Просмотреть файл

@ -0,0 +1,174 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
from ..queries.unique_index import UniqueIndex
from ..env.logs import Log
from ..structs.wraps import wrap, unwrap, tuplewrap
class Index(object):
"""
USING DATABASE TERMINOLOGY, THIS IS A NON-UNIQUE INDEX
"""
def __init__(self, keys):
self._data = {}
self._keys = tuplewrap(keys)
self.count = 0
def __getitem__(self, key):
try:
if isinstance(key, (list, tuple)) and len(key) < len(self._keys):
# RETURN ANOTHER Index
filter_key = tuple(self._keys[0:len(key):])
key = value2key(filter_key, key)
key = key[:len(filter_key)]
d = self._data
for k in key:
d = d.get(k, {})
output = Index(filter_key)
output._data = d
return output
key = value2key(self._keys, key)
d = self._data
for k in key:
d = d.get(k, {})
return wrap(list(d))
except Exception, e:
Log.error("something went wrong", e)
def __setitem__(self, key, value):
Log.error("Not implemented")
def add(self, val):
key = value2key(self._keys, val)
d = self._data
for k in key[:-1]:
e = d.get(k, None)
if e is None:
e = {}
d[k] = e
d = e
k = key[-1]
e = d.get(k, None)
if e is None:
e = []
d[k] = e
e.append(unwrap(val))
self.count += 1
def __contains__(self, key):
expected = True if self[key] else False
testing = self._test_contains(key)
if testing==expected:
return testing
else:
Log.error("not expected")
def _test_contains(self, key):
try:
if isinstance(key, (list, tuple)) and len(key) < len(self._keys):
# RETURN ANOTHER Index
length = len(key)
key = value2key(self._keys[0:length:], key)
d = self._data
for k in key[:length]:
try:
d = d[k]
except Exception, e:
return False
return True
key = value2key(self._keys, key)
d = self._data
for k in key:
try:
d = d[k]
except Exception, e:
return False
return True
except Exception, e:
Log.error("something went wrong", e)
def __nonzero__(self):
if self._data.keys():
return True
else:
return False
def __iter__(self):
def iter(data, depth):
if depth == 0:
for v in data:
yield wrap(v)
return
for v in data.values():
for v1 in iter(v, depth - 1):
yield wrap(v1)
return iter(self._data, len(self._keys))
def __sub__(self, other):
output = UniqueIndex(self._keys)
for v in self:
if v not in other:
output.add(v)
return output
def __and__(self, other):
output = UniqueIndex(self._keys)
for v in self:
if v in other:
output.add(v)
return output
def __or__(self, other):
output = UniqueIndex(self._keys)
for v in self:
output.add(v)
for v in other:
output.add(v)
return output
def __len__(self):
if self.count == 0:
for d in self:
self.count += 1
return self.count
def subtract(self, other):
return self.__sub__(other)
def intersect(self, other):
return self.__and__(other)
def value2key(keys, val):
if len(keys) == 1:
if isinstance(val, dict):
return val[keys[0]],
elif isinstance(val, (list, tuple)):
return val[0],
return val,
else:
if isinstance(val, dict):
return tuple(val[k] for k in keys)
elif isinstance(val, (list, tuple)):
return tuple(val)
else:
Log.error("do not know what to do here")

365
tests/util/queries/query.py Normal file
Просмотреть файл

@ -0,0 +1,365 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
from .. import struct
from .dimensions import Dimension
from .domains import Domain
from ..collections import AND, reverse
from ..env.logs import Log
from ..queries import MVEL
from ..queries.filters import TRUE_FILTER, simplify
from ..struct import nvl, Struct, EmptyList, split_field, join_field, StructList
from ..structs.wraps import wrap, unwrap, listwrap
from .es_query_util import INDEX_CACHE
class Query(object):
def __new__(cls, query, schema=None):
if isinstance(query, Query):
return query
return object.__new__(cls)
def __init__(self, query, schema=None):
"""
NORMALIZE QUERY SO IT CAN STILL BE JSON
"""
if isinstance(query, Query):
return
object.__init__(self)
query = wrap(query)
self.name = query.name
select = query.select
if isinstance(select, list):
select = wrap([unwrap(_normalize_select(s, schema=schema)) for s in select])
elif select:
select = _normalize_select(select, schema=schema)
else:
select = StructList()
self.select2index = {} # MAP FROM NAME TO data INDEX
for i, s in enumerate(listwrap(select)):
self.select2index[s.name] = i
self.select = select
self.edges = _normalize_edges(query.edges, schema=schema)
self.frum = _normalize_from(query["from"], schema=schema)
self.where = _normalize_where(query.where, schema=schema)
self.window = [_normalize_window(w) for w in listwrap(query.window)]
self.sort = _normalize_sort(query.sort)
self.limit = query.limit
self.isLean = query.isLean
@property
def columns(self):
return self.select + self.edges
def __getitem__(self, item):
if item == "from":
return self.frum
return Struct.__getitem__(self, item)
def copy(self):
output = object.__new__(Query)
source = object.__getattribute__(self, "__dict__")
dest = object.__getattribute__(output, "__dict__")
struct.set_default(dest, source)
return output
def _normalize_selects(selects, schema=None):
if isinstance(selects, list):
return wrap([_normalize_select(s, schema=schema) for s in selects])
else:
return _normalize_select(selects, schema=schema)
def _normalize_select(select, schema=None):
if isinstance(select, basestring):
if schema:
s = schema[select]
if s:
return s.getSelect()
return Struct(
name=select.rstrip("."), # TRAILING DOT INDICATES THE VALUE, BUT IS INVALID FOR THE NAME
value=select,
aggregate="none"
)
else:
if not select.name:
select = select.copy()
select.name = nvl(select.value, select.aggregate)
select.aggregate = nvl(select.aggregate, "none")
return select
def _normalize_edges(edges, schema=None):
return [_normalize_edge(e, schema=schema) for e in listwrap(edges)]
def _normalize_edge(edge, schema=None):
if isinstance(edge, basestring):
if schema:
e = schema[edge]
if e:
return Struct(
name=edge,
domain=e.getDomain()
)
return Struct(
name=edge,
value=edge,
domain=_normalize_domain(schema=schema)
)
else:
return Struct(
name=nvl(edge.name, edge.value),
value=edge.value,
range=edge.range,
allowNulls=False if edge.allowNulls is False else True,
domain=_normalize_domain(edge.domain, schema=schema)
)
def _normalize_from(frum, schema=None):
frum = wrap(frum)
if isinstance(frum, basestring):
return Struct(name=frum)
elif isinstance(frum, dict) and (frum["from"] or isinstance(frum["from"], (list, set))):
return Query(frum, schema=schema)
else:
return frum
def _normalize_domain(domain=None, schema=None):
if not domain:
return Domain(type="default")
elif isinstance(domain, Dimension):
return domain.getDomain()
elif schema and isinstance(domain, basestring) and schema[domain]:
return schema[domain].getDomain()
elif isinstance(domain, Domain):
return domain
if not domain.name:
domain = domain.copy()
domain.name = domain.type
return Domain(**struct.unwrap(domain))
def _normalize_window(window, schema=None):
return Struct(
name=nvl(window.name, window.value),
value=window.value,
edges=[_normalize_edge(e, schema) for e in listwrap(window.edges)],
sort=_normalize_sort(window.sort),
aggregate=window.aggregate,
range=_normalize_range(window.range),
where=_normalize_where(window.where, schema=schema)
)
def _normalize_range(range):
if range == None:
return None
return Struct(
min=range.min,
max=range.max
)
def _normalize_where(where, schema=None):
if where == None:
return TRUE_FILTER
if schema == None:
return where
where = simplify(_where_terms(where, where, schema))
return where
def _map_term_using_schema(master, path, term, schema_edges):
"""
IF THE WHERE CLAUSE REFERS TO FIELDS IN THE SCHEMA, THEN EXPAND THEM
"""
output = StructList()
for k, v in term.items():
dimension = schema_edges[k]
if isinstance(dimension, Dimension):
domain = dimension.getDomain()
if dimension.fields:
if isinstance(dimension.fields, dict):
# EXPECTING A TUPLE
for local_field, es_field in dimension.fields.items():
local_value = v[local_field]
if local_value == None:
output.append({"missing": {"field": es_field}})
else:
output.append({"term": {es_field: local_value}})
continue
if len(dimension.fields) == 1 and MVEL.isKeyword(dimension.fields[0]):
# SIMPLE SINGLE-VALUED FIELD
if domain.getPartByKey(v) is domain.NULL:
output.append({"missing": {"field": dimension.fields[0]}})
else:
output.append({"term": {dimension.fields[0]: v}})
continue
if AND(MVEL.isKeyword(f) for f in dimension.fields):
# EXPECTING A TUPLE
if not isinstance(v, tuple):
Log.error("expecing {{name}}={{value}} to be a tuple", {"name": k, "value": v})
for i, f in enumerate(dimension.fields):
vv = v[i]
if vv == None:
output.append({"missing": {"field": f}})
else:
output.append({"term": {f: vv}})
continue
if len(dimension.fields) == 1 and MVEL.isKeyword(dimension.fields[0]):
if domain.getPartByKey(v) is domain.NULL:
output.append({"missing": {"field": dimension.fields[0]}})
else:
output.append({"term": {dimension.fields[0]: v}})
continue
if domain.partitions:
part = domain.getPartByKey(v)
if part is domain.NULL or not part.esfilter:
Log.error("not expected to get NULL")
output.append(part.esfilter)
continue
else:
Log.error("not expected")
elif isinstance(v, dict):
sub = _map_term_using_schema(master, path + [k], v, schema_edges[k])
output.append(sub)
continue
output.append({"term": {k: v}})
return {"and": output}
def _move_nested_term(master, where, schema):
"""
THE WHERE CLAUSE CAN CONTAIN NESTED PROPERTY REFERENCES, THESE MUST BE MOVED
TO A NESTED FILTER
"""
items = where.term.items()
if len(items) != 1:
Log.error("Expecting only one term")
k, v = items[0]
nested_path = _get_nested_path(k, schema)
if nested_path:
return {"nested": {
"path": nested_path,
"query": {"filtered": {
"query": {"match_all": {}},
"filter": {"and": [
{"term": {k: v}}
]}
}}
}}
return where
def _get_nested_path(field, schema):
if MVEL.isKeyword(field):
field = join_field([schema.es.alias]+split_field(field))
for i, f in reverse(enumerate(split_field(field))):
path = join_field(split_field(field)[0:i+1:])
if path in INDEX_CACHE:
return join_field(split_field(path)[1::])
return None
def _where_terms(master, where, schema):
"""
USE THE SCHEMA TO CONVERT DIMENSION NAMES TO ES FILTERS
master - TOP LEVEL WHERE (FOR PLACING NESTED FILTERS)
"""
if isinstance(where, dict):
if where.term:
#MAP TERM
try:
output = _map_term_using_schema(master, [], where.term, schema.edges)
return output
except Exception, e:
Log.error("programmer problem?", e)
elif where.terms:
#MAP TERM
output = StructList()
for k, v in where.terms.items():
if not isinstance(v, (list, set)):
Log.error("terms filter expects list of values")
edge = schema.edges[k]
if not edge:
output.append({"terms": {k: v}})
else:
if isinstance(edge, basestring):
#DIRECT FIELD REFERENCE
return {"terms": {edge: v}}
try:
domain = edge.getDomain()
except Exception, e:
Log.error("programmer error", e)
fields = domain.dimension.fields
if isinstance(fields, dict):
or_agg = []
for vv in v:
and_agg = []
for local_field, es_field in fields.items():
vvv = vv[local_field]
if vvv != None:
and_agg.append({"term": {es_field: vvv}})
or_agg.append({"and": and_agg})
output.append({"or": or_agg})
elif isinstance(fields, list) and len(fields) == 1 and MVEL.isKeyword(fields[0]):
output.append({"terms": {fields[0]: v}})
elif domain.partitions:
output.append({"or": [domain.getPartByKey(vv).esfilter for vv in v]})
return {"and": output}
elif where["or"]:
return {"or": [unwrap(_where_terms(master, vv, schema)) for vv in where["or"]]}
elif where["and"]:
return {"and": [unwrap(_where_terms(master, vv, schema)) for vv in where["and"]]}
elif where["not"]:
return {"not": unwrap(_where_terms(master, where["not"], schema))}
return where
def _normalize_sort(sort=None):
"""
CONVERT SORT PARAMETERS TO A NORMAL FORM SO EASIER TO USE
"""
if not sort:
return EmptyList
output = StructList()
for s in listwrap(sort):
if isinstance(s, basestring):
output.append({"field": s, "sort": 1})
else:
output.append({"field": nvl(s.field, s.value), "sort": nvl(sort_direction[s.sort], 1)})
return wrap(output)
sort_direction = {
"asc": 1,
"desc": -1,
"none": 0,
1: 1,
0: 0,
-1: -1
}

Просмотреть файл

@ -0,0 +1,29 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
from ..structs.wraps import listwrap
class Record(object):
def __init__(self, coord, cube):
self.coord = coord
self.cube = cube
def __getitem__(self, item):
for s in listwrap(self.cube.select):
if s.name == item:
return self.cube.data[item]
for i, e in enumerate(self.cube.edges):
if e.name == item:
return e.domain.partition[self.coord[i]]
def __getattr__(self, item):
return self[item]

Просмотреть файл

@ -0,0 +1,110 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
from ..env.logs import Log
from ..structs.wraps import wrap, unwrap, tuplewrap
class UniqueIndex(object):
"""
DEFINE A SET OF ATTRIBUTES THAT UNIQUELY IDENTIFIES EACH OBJECT IN A list.
THIS ALLOWS set-LIKE COMPARISIONS (UNION, INTERSECTION, DIFFERENCE, ETC) WHILE
STILL MAINTAINING list-LIKE FEATURES
"""
def __init__(self, keys):
self._data = {}
self._keys = tuplewrap(keys)
self.count = 0
def __getitem__(self, key):
try:
key = value2key(self._keys, key)
d = self._data.get(key, None)
return wrap(d)
except Exception, e:
Log.error("something went wrong", e)
def __setitem__(self, key, value):
try:
key = value2key(self._keys, key)
d = self._data.get(key, None)
if d != None:
Log.error("key already filled")
self._data[key] = unwrap(value)
self.count += 1
except Exception, e:
Log.error("something went wrong", e)
def add(self, val):
key = value2key(self._keys, val)
d = self._data.get(key, None)
if d is None:
self._data[key] = unwrap(val)
self.count += 1
elif d is not val:
Log.error("key already filled")
def __contains__(self, key):
return self[key] != None
def __iter__(self):
return (wrap(v) for v in self._data.itervalues())
def __sub__(self, other):
output = UniqueIndex(self._keys)
for v in self:
if v not in other:
output.add(v)
return output
def __and__(self, other):
output = UniqueIndex(self._keys)
for v in self:
if v in other: output.add(v)
return output
def __or__(self, other):
output = UniqueIndex(self._keys)
for v in self: output.add(v)
for v in other: output.add(v)
return output
def __len__(self):
if self.count == 0:
for d in self:
self.count += 1
return self.count
def subtract(self, other):
return self.__sub__(other)
def intersect(self, other):
return self.__and__(other)
def value2key(keys, val):
if len(keys)==1:
if isinstance(val, dict):
return val[keys[0]]
elif isinstance(val, (list, tuple)):
return val[0]
else:
return val
else:
if isinstance(val, dict):
return wrap({k: val[k] for k in keys})
elif isinstance(val, (list, tuple)):
return wrap(dict(zip(keys, val)))
else:
Log.error("do not know what to do here")

Просмотреть файл

@ -8,10 +8,17 @@
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from ..logs import Log
from __future__ import unicode_literals
import functools
from ..struct import StructList
from ..maths import stats
from ..collections import MIN, MAX
from ..env.logs import Log
from ..maths import Math
from ..multiset import Multiset
from ..stats import Z_moment, stats2z_moment, z_moment2stats
from ..collections.multiset import Multiset
from ..maths.stats import Z_moment, z_moment2stats
# A VARIETY OF SLIDING WINDOW FUNCTIONS
class AggregationFunction(object):
@ -73,21 +80,63 @@ class WindowFunction(AggregationFunction):
Log.error("not implemented yet")
class Stats(WindowFunction):
def __init__(self):
object.__init__(self)
self.total = Z_moment(0, 0, 0)
def Stats(**kwargs):
if not kwargs:
return _SimpleStats
else:
return functools.partial(_Stats, *[], **kwargs)
class _Stats(WindowFunction):
"""
TRACK STATS, BUT IGNORE OUTLIERS
"""
def __init__(self, middle=None):
object.__init__(self)
self.middle = middle
self.samples = StructList()
def add(self, value):
if value == None:
return
self.total += stats2z_moment(value)
self.samples.append(value)
def sub(self, value):
if value == None:
return
self.total -= stats2z_moment(value)
self.samples.remove(value)
def merge(self, agg):
Log.error("Do not know how to handle")
def end(self):
ignore = Math.ceiling(len(self.samples) * (1 - self.middle) / 2)
if ignore * 2 >= len(self.samples):
return stats.Stats()
output = stats.Stats(samples=sorted(self.samples)[ignore:len(self.samples) - ignore:])
output.samples = list(self.samples)
return output
class _SimpleStats(WindowFunction):
"""
AGGREGATE Stats OBJECTS, NOT JUST VALUES
"""
def __init__(self):
object.__init__(self)
self.total = Z_moment(0, 0, 0)
def add(self, value):
if value == None:
return
self.total += Z_moment.new_instance([value])
def sub(self, value):
if value == None:
return
self.total -= Z_moment.new_instance([value])
def merge(self, agg):
self.total += agg.total
@ -104,6 +153,7 @@ class Min(WindowFunction):
def add(self, value):
if value == None:
return
self.total.add(value)
@ -113,7 +163,7 @@ class Min(WindowFunction):
self.total.remove(value)
def end(self):
return Math.min(self.total)
return MIN(self.total)
class Max(WindowFunction):
@ -133,7 +183,7 @@ class Max(WindowFunction):
self.total.remove(value)
def end(self):
return Math.max(self.total)
return MAX(*self.total)
class Count(WindowFunction):
@ -174,5 +224,3 @@ class Sum(WindowFunction):
def end(self):
return self.total

Просмотреть файл

@ -1,31 +0,0 @@
# encoding: utf-8
#
import random
import string
SIMPLE_ALPHABET=string.ascii_letters + string.digits
SEED=random.Random()
class Random(object):
@staticmethod
def string(length, alphabet=SIMPLE_ALPHABET):
result = ''
for i in range(0, length):
result += SEED.choice(alphabet)
return result
@staticmethod
def hex(length):
return Random.string(length, string.digits + 'ABCDEF')
@staticmethod
def int(*args):
return random.randrange(*args)
@staticmethod
def sample(data, count):
num=len(data)
return [data[Random.int(num)] for i in range(count)]

7
tests/util/regex.py Normal file
Просмотреть файл

@ -0,0 +1,7 @@
import re
def match(pattern, text):
result = re.match(pattern, text)
return result.groups()

Просмотреть файл

@ -0,0 +1 @@
__author__ = 'klahnakoski'

Просмотреть файл

@ -8,20 +8,24 @@
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
from datetime import datetime
import json
import subprocess
from pymysql import connect
from . import struct
from .maths import Math
from .strings import expand_template
from .struct import nvl
from .cnv import CNV
from .logs import Log, Except
from .queries import Q
from .strings import indent
from .strings import outdent
from .files import File
from pymysql import connect, InterfaceError
from .. import struct
from ..jsons import json_scrub
from ..maths import Math
from ..strings import expand_template
from ..struct import nvl
from ..structs.wraps import wrap, listwrap
from ..cnv import CNV
from ..env.logs import Log, Except
from ..queries import Q
from ..strings import indent
from ..strings import outdent
from ..env.files import File
DEBUG = False
@ -32,16 +36,26 @@ all_db = []
class DB(object):
"""
Parameterize SQL by name rather than by position. Return records as objects
rather than tuples.
"""
def __init__(self, settings, schema=None, preamble=None):
def __init__(self, settings, schema=None, preamble=None, readonly=False):
"""
OVERRIDE THE settings.schema WITH THE schema PARAMETER
preamble WILL BE USED TO ADD COMMENTS TO THE BEGINNING OF ALL SQL
THE INTENT IS TO HELP ADMINISTRATORS ID THE SQL RUNNING ON THE DATABASE
schema - NAME OF DEFAULT database/schema IN QUERIES
preamble - A COMMENT TO BE ADDED TO EVERY SQL STATEMENT SENT
readonly - USED ONLY TO INDICATE IF A TRANSACTION WILL BE OPENED UPON
USE IN with CLAUSE, YOU CAN STILL SEND UPDATES, BUT MUST OPEN A
TRANSACTION BEFORE YOU DO
"""
if settings == None:
Log.warning("No settings provided")
return
all_db.append(self)
@ -50,7 +64,7 @@ class DB(object):
settings = settings.settings
self.settings = settings.copy()
self.settings.schema = nvl(schema, self.settings.schema)
self.settings.schema = nvl(schema, self.settings.schema, self.settings.database)
preamble = nvl(preamble, self.settings.preamble)
if preamble == None:
@ -58,6 +72,7 @@ class DB(object):
else:
self.preamble = indent(preamble, "# ").strip() + "\n"
self.readonly = readonly
self.debug = nvl(self.settings.debug, DEBUG)
self._open()
@ -74,7 +89,10 @@ class DB(object):
use_unicode=True
)
except Exception, e:
Log.error(u"Failure to connect", e)
if self.settings.host.find("://") == -1:
Log.error(u"Failure to connect", e)
else:
Log.error(u"Failure to connect. PROTOCOL PREFIX IS PROBABLY BAD", e)
self.cursor = None
self.partial_rollback = False
self.transaction_level = 0
@ -82,17 +100,22 @@ class DB(object):
def __enter__(self):
self.begin()
if not self.readonly:
self.begin()
return self
def __exit__(self, type, value, traceback):
if self.readonly:
self.close()
return
if isinstance(value, BaseException):
try:
if self.cursor: self.cursor.close()
self.cursor = None
self.rollback()
except Exception, e:
Log.warning(u"can not rollback()", e)
Log.warning(u"can not rollback()", [value, e])
finally:
self.close()
return
@ -112,14 +135,15 @@ class DB(object):
return Transaction(self)
def begin(self):
if self.transaction_level == 0: self.cursor = self.db.cursor()
if self.transaction_level == 0:
self.cursor = self.db.cursor()
self.transaction_level += 1
self.execute("SET TIME_ZONE='+00:00'")
def close(self):
if self.transaction_level > 0:
Log.error(u"expecting commit() or rollback() before close")
Log.error("expecting commit() or rollback() before close")
self.cursor = None #NOT NEEDED
try:
self.db.close()
@ -127,7 +151,7 @@ class DB(object):
if e.message.find("Already closed") >= 0:
return
Log.warning(u"can not close()", e)
Log.warning("can not close()", e)
finally:
all_db.remove(self)
@ -139,17 +163,17 @@ class DB(object):
self.rollback()
except Exception:
pass
Log.error(u"Error while processing backlog", e)
Log.error("Error while processing backlog", e)
if self.transaction_level == 0:
Log.error(u"No transaction has begun")
Log.error("No transaction has begun")
elif self.transaction_level == 1:
if self.partial_rollback:
try:
self.rollback()
except Exception:
pass
Log.error(u"Commit after nested rollback is not allowed")
Log.error("Commit after nested rollback is not allowed")
else:
if self.cursor: self.cursor.close()
self.cursor = None
@ -161,18 +185,18 @@ class DB(object):
try:
self.commit()
except Exception, e:
Log.error(u"Can not flush", e)
Log.error("Can not flush", e)
try:
self.begin()
except Exception, e:
Log.error(u"Can not flush", e)
Log.error("Can not flush", e)
def rollback(self):
self.backlog = [] #YAY! FREE!
if self.transaction_level == 0:
Log.error(u"No transaction has begun")
Log.error("No transaction has begun")
elif self.transaction_level == 1:
self.transaction_level -= 1
if self.cursor != None:
@ -182,7 +206,7 @@ class DB(object):
else:
self.transaction_level -= 1
self.partial_rollback = True
Log.warning(u"Can not perform partial rollback!")
Log.warning("Can not perform partial rollback!")
def call(self, proc_name, params):
@ -193,23 +217,29 @@ class DB(object):
self.cursor.close()
self.cursor = self.db.cursor()
except Exception, e:
Log.error(u"Problem calling procedure " + proc_name, e)
Log.error("Problem calling procedure " + proc_name, e)
def query(self, sql, param=None):
"""
RETURN RESULTS IN [row_num][column] GRID
"""
self._execute_backlog()
try:
old_cursor = self.cursor
if not old_cursor: #ALLOW NON-TRANSACTIONAL READS
self.cursor = self.db.cursor()
self.cursor.execute("SET TIME_ZONE='+00:00'")
self.cursor.close()
self.cursor = self.db.cursor()
if param: sql = expand_template(sql, self.quote_param(param))
if param:
sql = expand_template(sql, self.quote_param(param))
sql = self.preamble + outdent(sql)
if self.debug:
Log.note(u"Execute SQL:\n{{sql}}", {u"sql": indent(sql)})
Log.note("Execute SQL:\n{{sql}}", {"sql": indent(sql)})
self.cursor.execute(sql)
columns = [utf8_to_unicode(d[0]) for d in nvl(self.cursor.description, [])]
fixed = [[utf8_to_unicode(c) for c in row] for row in self.cursor]
result = CNV.table2list(columns, fixed)
@ -220,9 +250,45 @@ class DB(object):
return result
except Exception, e:
if e.message.find("InterfaceError") >= 0:
Log.error(u"Did you close the db connection?", e)
Log.error(u"Problem executing SQL:\n" + indent(sql.strip()), e, offset=1)
if isinstance(e, InterfaceError) or e.message.find("InterfaceError") >= 0:
Log.error("Did you close the db connection?", e)
Log.error("Problem executing SQL:\n" + indent(sql.strip()), e, offset=1)
def column_query(self, sql, param=None):
"""
RETURN RESULTS IN [column][row_num] GRID
"""
self._execute_backlog()
try:
old_cursor = self.cursor
if not old_cursor: #ALLOW NON-TRANSACTIONAL READS
self.cursor = self.db.cursor()
self.cursor.execute("SET TIME_ZONE='+00:00'")
self.cursor.close()
self.cursor = self.db.cursor()
if param:
sql = expand_template(sql, self.quote_param(param))
sql = self.preamble + outdent(sql)
if self.debug:
Log.note("Execute SQL:\n{{sql}}", {"sql": indent(sql)})
self.cursor.execute(sql)
grid = [[utf8_to_unicode(c) for c in row] for row in self.cursor]
# columns = [utf8_to_unicode(d[0]) for d in nvl(self.cursor.description, [])]
result = zip(*grid)
if not old_cursor: #CLEANUP AFTER NON-TRANSACTIONAL READS
self.cursor.close()
self.cursor = None
return result
except Exception, e:
if isinstance(e, InterfaceError) or e.message.find("InterfaceError") >= 0:
Log.error("Did you close the db connection?", e)
Log.error("Problem executing SQL:\n" + indent(sql.strip()), e, offset=1)
# EXECUTE GIVEN METHOD FOR ALL ROWS RETURNED
@ -236,30 +302,31 @@ class DB(object):
if not old_cursor: #ALLOW NON-TRANSACTIONAL READS
self.cursor = self.db.cursor()
if param: sql = expand_template(sql, self.quote_param(param))
if param:
sql = expand_template(sql, self.quote_param(param))
sql = self.preamble + outdent(sql)
if self.debug:
Log.note(u"Execute SQL:\n{{sql}}", {u"sql": indent(sql)})
Log.note("Execute SQL:\n{{sql}}", {"sql": indent(sql)})
self.cursor.execute(sql)
columns = tuple([utf8_to_unicode(d[0]) for d in self.cursor.description])
for r in self.cursor:
num += 1
_execute(struct.wrap(dict(zip(columns, [utf8_to_unicode(c) for c in r]))))
_execute(wrap(dict(zip(columns, [utf8_to_unicode(c) for c in r]))))
if not old_cursor: #CLEANUP AFTER NON-TRANSACTIONAL READS
self.cursor.close()
self.cursor = None
except Exception, e:
Log.error(u"Problem executing SQL:\n" + indent(sql.strip()), e, offset=1)
Log.error("Problem executing SQL:\n" + indent(sql.strip()), e, offset=1)
return num
def execute(self, sql, param=None):
if self.transaction_level == 0:
Log.error(u"Expecting transaction to be started before issuing queries")
Log.error("Expecting transaction to be started before issuing queries")
if param:
sql = expand_template(sql, self.quote_param(param))
@ -284,29 +351,33 @@ class DB(object):
# MWe have no way to execute an entire SQL file in bulk, so we
# have to shell out to the commandline client.
args = [
u"mysql",
u"-h{0}".format(settings.host),
u"-u{0}".format(settings.username),
u"-p{0}".format(settings.password),
u"{0}".format(settings.schema)
"mysql",
"-h{0}".format(settings.host),
"-u{0}".format(settings.username),
"-p{0}".format(settings.password),
"{0}".format(settings.schema)
]
proc = subprocess.Popen(
args,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
bufsize=-1
)
(output, _) = proc.communicate(sql)
try:
proc = subprocess.Popen(
args,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
bufsize=-1
)
if isinstance(sql, unicode):
sql = sql.encode("utf8")
(output, _) = proc.communicate(sql)
except Exception, e:
Log.error("Can not call \"mysql\"", e)
if proc.returncode:
if len(sql) > 10000:
sql = u"<" + unicode(len(sql)) + u" bytes of sql>"
Log.error(u"Unable to execute sql: return code {{return_code}}, {{output}}:\n {{sql}}\n", {
u"sql": indent(sql),
u"return_code": proc.returncode,
u"output": output
sql = "<" + unicode(len(sql)) + " bytes of sql>"
Log.error("Unable to execute sql: return code {{return_code}}, {{output}}:\n {{sql}}\n", {
"sql": indent(sql),
"return_code": proc.returncode,
"output": output
})
@staticmethod
@ -321,31 +392,31 @@ class DB(object):
if not self.backlog: return
(backlog, self.backlog) = (self.backlog, [])
if self.db.__module__.startswith(u"pymysql"):
if self.db.__module__.startswith("pymysql"):
# BUG IN PYMYSQL: CAN NOT HANDLE MULTIPLE STATEMENTS
# https://github.com/PyMySQL/PyMySQL/issues/157
for b in backlog:
sql = self.preamble + b
try:
if self.debug:
Log.note(u"Execute SQL:\n{{sql|indent}}", {u"sql": sql})
Log.note("Execute SQL:\n{{sql|indent}}", {"sql": sql})
self.cursor.execute(b)
except Exception, e:
Log.error(u"Can not execute sql:\n{{sql}}", {u"sql": sql}, e)
Log.error("Can not execute sql:\n{{sql}}", {"sql": sql}, e)
self.cursor.close()
self.cursor = self.db.cursor()
else:
for i, g in Q.groupby(backlog, size=MAX_BATCH_SIZE):
sql = self.preamble + u";\n".join(g)
sql = self.preamble + ";\n".join(g)
try:
if self.debug:
Log.note(u"Execute block of SQL:\n{{sql|indent}}", {u"sql": sql})
Log.note("Execute block of SQL:\n{{sql|indent}}", {"sql": sql})
self.cursor.execute(sql)
self.cursor.close()
self.cursor = self.db.cursor()
except Exception, e:
Log.error(u"Problem executing SQL:\n{{sql}}", {u"sql": indent(sql.strip())}, e, offset=1)
Log.error("Problem executing SQL:\n{{sql}}", {"sql": indent(sql.strip())}, e, offset=1)
## Insert dictionary of values into table
@ -353,28 +424,28 @@ class DB(object):
keys = record.keys()
try:
command = u"INSERT INTO " + self.quote_column(table_name) + u"(" + \
u",".join([self.quote_column(k) for k in keys]) + \
u") VALUES (" + \
u",".join([self.quote_value(record[k]) for k in keys]) + \
u")"
command = "INSERT INTO " + self.quote_column(table_name) + "(" + \
",".join([self.quote_column(k) for k in keys]) + \
") VALUES (" + \
",".join([self.quote_value(record[k]) for k in keys]) + \
")"
self.execute(command)
except Exception, e:
Log.error(u"problem with record: {{record}}", {u"record": record}, e)
Log.error("problem with record: {{record}}", {"record": record}, e)
# candidate_key IS LIST OF COLUMNS THAT CAN BE USED AS UID (USUALLY PRIMARY KEY)
# ONLY INSERT IF THE candidate_key DOES NOT EXIST YET
def insert_new(self, table_name, candidate_key, new_record):
candidate_key = struct.listwrap(candidate_key)
candidate_key = listwrap(candidate_key)
condition = u" AND\n".join([self.quote_column(k) + u"=" + self.quote_value(new_record[k]) if new_record[k] != None else self.quote_column(k) + u" IS Null" for k in candidate_key])
command = u"INSERT INTO " + self.quote_column(table_name) + u" (" + \
u",".join([self.quote_column(k) for k in new_record.keys()]) + \
u")\n" + \
u"SELECT a.* FROM (SELECT " + u",".join([self.quote_value(v) + u" " + self.quote_column(k) for k, v in new_record.items()]) + u" FROM DUAL) a\n" + \
u"LEFT JOIN " + \
u"(SELECT 'dummy' exist FROM " + self.quote_column(table_name) + u" WHERE " + condition + u" LIMIT 1) b ON 1=1 WHERE exist IS Null"
condition = " AND\n".join([self.quote_column(k) + "=" + self.quote_value(new_record[k]) if new_record[k] != None else self.quote_column(k) + " IS Null" for k in candidate_key])
command = "INSERT INTO " + self.quote_column(table_name) + " (" + \
",".join([self.quote_column(k) for k in new_record.keys()]) + \
")\n" + \
"SELECT a.* FROM (SELECT " + ",".join([self.quote_value(v) + " " + self.quote_column(k) for k, v in new_record.items()]) + " FROM DUAL) a\n" + \
"LEFT JOIN " + \
"(SELECT 'dummy' exist FROM " + self.quote_column(table_name) + " WHERE " + condition + " LIMIT 1) b ON 1=1 WHERE exist IS Null"
self.execute(command, {})
@ -395,15 +466,15 @@ class DB(object):
try:
command = \
u"INSERT INTO " + self.quote_column(table_name) + u"(" + \
u",".join([self.quote_column(k) for k in keys]) + \
u") VALUES " + ",".join([
"(" + u",".join([self.quote_value(r[k]) for k in keys]) + u")"
"INSERT INTO " + self.quote_column(table_name) + "(" + \
",".join([self.quote_column(k) for k in keys]) + \
") VALUES " + ",\n".join([
"(" + ",".join([self.quote_value(r[k]) for k in keys]) + ")"
for r in records
])
self.execute(command)
except Exception, e:
Log.error(u"problem with record: {{record}}", {u"record": records}, e)
Log.error("problem with record: {{record}}", {"record": records}, e)
def update(self, table_name, where_slice, new_values):
@ -412,15 +483,15 @@ class DB(object):
"""
new_values = self.quote_param(new_values)
where_clause = u" AND\n".join([
self.quote_column(k) + u"=" + self.quote_value(v) if v != None else self.quote_column(k) + " IS NULL"
where_clause = " AND\n".join([
self.quote_column(k) + "=" + self.quote_value(v) if v != None else self.quote_column(k) + " IS NULL"
for k, v in where_slice.items()]
)
command = u"UPDATE " + self.quote_column(table_name) + u"\n" + \
u"SET " + \
u",\n".join([self.quote_column(k) + u"=" + v for k, v in new_values.items()]) + u"\n" + \
u"WHERE " + \
command = "UPDATE " + self.quote_column(table_name) + "\n" + \
"SET " + \
",\n".join([self.quote_column(k) + "=" + v for k, v in new_values.items()]) + "\n" + \
"WHERE " + \
where_clause
self.execute(command, {})
@ -445,17 +516,17 @@ class DB(object):
elif isinstance(value, basestring):
return self.db.literal(value)
elif isinstance(value, datetime):
return u"str_to_date('" + value.strftime(u"%Y%m%d%H%M%S") + u"', '%Y%m%d%H%i%s')"
return "str_to_date('" + value.strftime("%Y%m%d%H%M%S") + "', '%Y%m%d%H%i%s')"
elif hasattr(value, '__iter__'):
return self.db.literal(CNV.object2JSON(value))
return self.db.literal(json_encode(value))
elif isinstance(value, dict):
return self.db.literal(CNV.object2JSON(value))
return self.db.literal(json_encode(value))
elif Math.is_number(value):
return unicode(value)
else:
return self.db.literal(value)
except Exception, e:
Log.error(u"problem quoting SQL", e)
Log.error("problem quoting SQL", e)
def quote_sql(self, value, param=None):
@ -471,121 +542,42 @@ class DB(object):
elif isinstance(value, basestring):
return value
elif isinstance(value, dict):
return self.db.literal(CNV.object2JSON(value))
return self.db.literal(json_encode(value))
elif hasattr(value, '__iter__'):
return u"(" + u",".join([self.quote_sql(vv) for vv in value]) + u")"
return "(" + ",".join([self.quote_sql(vv) for vv in value]) + ")"
else:
return unicode(value)
except Exception, e:
Log.error(u"problem quoting SQL", e)
Log.error("problem quoting SQL", e)
def quote_column(self, column_name, table=None):
if isinstance(column_name, basestring):
if table:
column_name = table + "." + column_name
return SQL(u"`" + column_name.replace(u".", u"`.`") + u"`") #MY SQL QUOTE OF COLUMN NAMES
return SQL("`" + column_name.replace(".", "`.`") + "`") #MY SQL QUOTE OF COLUMN NAMES
elif isinstance(column_name, list):
if table:
return SQL(u", ".join([self.quote_column(table + "." + c) for c in column_name]))
return SQL(u", ".join([self.quote_column(c) for c in column_name]))
return SQL(", ".join([self.quote_column(table + "." + c) for c in column_name]))
return SQL(", ".join([self.quote_column(c) for c in column_name]))
else:
#ASSUME {u"name":name, u"value":value} FORM
return SQL(column_name.value + u" AS " + self.quote_column(column_name.name))
#ASSUME {"name":name, "value":value} FORM
return SQL(column_name.value + " AS " + self.quote_column(column_name.name))
def sort2sqlorderby(self, sort):
sort = Q.normalize_sort(sort)
return u",\n".join([self.quote_column(s.field) + (" DESC" if s.sort == -1 else " ASC") for s in sort])
sort = Q.normalize_sort_parameters(sort)
return ",\n".join([self.quote_column(s.field) + (" DESC" if s.sort == -1 else " ASC") for s in sort])
def esfilter2sqlwhere(self, esfilter):
return SQL(self._filter2where(esfilter))
def isolate(self, separator, list):
if len(list) > 1:
return u"(\n" + indent((" " + separator + "\n").join(list)) + u"\n)"
else:
return list[0]
def _filter2where(self, esfilter):
esfilter = struct.wrap(esfilter)
if esfilter[u"and"]:
return self.isolate("AND", [self._filter2where(a) for a in esfilter[u"and"]])
elif esfilter[u"or"]:
return self.isolate("OR", [self._filter2where(a) for a in esfilter[u"or"]])
elif esfilter[u"not"]:
return u"NOT (" + self._filter2where(esfilter[u"not"]) + u")"
elif esfilter.term:
return self.isolate("AND", [self.quote_column(col) + u"=" + self.quote_value(val) for col, val in esfilter.term.items()])
elif esfilter.terms:
for col, v in esfilter.terms.items():
try:
int_list = CNV.value2intlist(v)
has_null = False
for vv in v:
if vv == None:
has_null = True
break
if int_list:
filter = int_list_packer(col, int_list)
if has_null:
return self._filter2where({"or": [{"missing": col}, filter]})
else:
return self._filter2where(filter)
else:
if has_null:
return self._filter2where({"missing": col})
else:
return "false"
except Exception, e:
if not hasattr(e, "contains") or not e.contains("no packing possible"):
Log.warning("Not an int-list: {{list}}", {"list": v}, e)
return self.quote_column(col) + u" in (" + ", ".join([self.quote_value(val) for val in v]) + ")"
elif esfilter.script:
return u"(" + esfilter.script + u")"
elif esfilter.range:
name2sign = {
u"gt": u">",
u"gte": u">=",
u"lte": u"<=",
u"lt": u"<"
}
def single(col, r):
min = nvl(r["gte"], r[">="])
max = nvl(r["lte"], r["<="])
if min and max:
#SPECIAL CASE (BETWEEN)
return self.quote_column(col) + u" BETWEEN " + self.quote_value(min) + u" AND " + self.quote_value(max)
else:
return " AND ".join(
self.quote_column(col) + name2sign[sign] + self.quote_value(value)
for sign, value in r.items()
)
output = self.isolate("AND", [single(col, ranges) for col, ranges in esfilter.range.items()])
return output
elif esfilter.missing:
if isinstance(esfilter.missing, basestring):
return u"(" + self.quote_column(esfilter.missing) + u" IS Null)"
else:
return u"(" + self.quote_column(esfilter.missing.field) + u" IS Null)"
elif esfilter.exists:
if isinstance(esfilter.exists, basestring):
return u"(" + self.quote_column(esfilter.exists) + u" IS NOT Null)"
else:
return u"(" + self.quote_column(esfilter.exists.field) + u" IS NOT Null)"
else:
Log.error(u"Can not convert esfilter to SQL: {{esfilter}}", {u"esfilter": esfilter})
def utf8_to_unicode(v):
try:
if isinstance(v, str):
return v.decode(u"utf8")
return v.decode("utf8")
else:
return v
except Exception, e:
Log.error(u"not expected", e)
Log.error("not expected", e)
#ACTUAL SQL, DO NOT QUOTE THIS STRING
@ -595,8 +587,12 @@ class SQL(unicode):
self.template = template
self.param = param
@property
def sql(self):
return expand_template(self.template, self.param)
def __str__(self):
Log.error(u"do not do this")
Log.error("do not do this")
def int_list_packer(term, values):
@ -616,7 +612,7 @@ def int_list_packer(term, values):
curr_start = last
curr_excl = set()
for v in sorted[1:]:
for v in sorted[1::]:
if v <= last + 1:
pass
elif v - last > 3:
@ -693,3 +689,25 @@ class Transaction(object):
self.db.rollback()
else:
self.db.commit()
json_encoder = json.JSONEncoder(
skipkeys=False,
ensure_ascii=False, # DIFF FROM DEFAULTS
check_circular=True,
allow_nan=True,
indent=None,
separators=None,
encoding='utf-8',
default=None,
sort_keys=True # <-- SEE?! sort_keys==True
)
def json_encode(value):
"""
FOR PUTTING JSON INTO DATABASE (sort_keys=True)
dicts CAN BE USED AS KEYS
"""
return unicode(json_encoder.encode(json_scrub(value)))

Просмотреть файл

@ -8,26 +8,29 @@
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from dzAlerts.util import struct
from __future__ import unicode_literals
from ..queries.db_query import esfilter2sqlwhere
from ..structs.wraps import wrap
def find_holes(db, table_name, column_name, filter, _range):
def find_holes(db, table_name, column_name, _range, filter=None):
"""
FIND HOLES IN A DENSE COLUMN OF INTEGERS
RETURNS A LIST OF {"min"min, "max":max} OBJECTS
"""
if not filter:
filter = {"match_all": {}}
_range = struct.wrap(_range)
_range = wrap(_range)
params = {
"min": _range.min,
"max": _range.max - 1,
"column_name": db.quote_column(column_name),
"table_name": db.quote_column(table_name),
"filter": db.esfilter2sqlwhere(filter)
"filter": esfilter2sqlwhere(db, filter)
}
min_max=db.query("""
min_max = db.query("""
SELECT
min({{column_name}}) `min`,
max({{column_name}})+1 `max`
@ -38,7 +41,6 @@ def find_holes(db, table_name, column_name, filter, _range):
{{filter}}
""", params)[0]
db.execute("SET @last={{min}}-1", {"min": _range.min})
ranges = db.query("""
SELECT
@ -61,7 +63,6 @@ def find_holes(db, table_name, column_name, filter, _range):
diff>1
""", params)
if ranges:
ranges.append({"min": min_max.max, "max": _range.max})
else:
@ -71,5 +72,4 @@ def find_holes(db, table_name, column_name, filter, _range):
else:
ranges.append(_range)
return ranges

Просмотреть файл

@ -1,77 +0,0 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
import argparse
import struct
from .struct import listwrap
from .cnv import CNV
from .logs import Log
from .files import File
#PARAMETERS MATCH argparse.ArgumentParser.add_argument()
#http://docs.python.org/dev/library/argparse.html#the-add-argument-method
#name or flags - Either a name or a list of option strings, e.g. foo or -f, --foo.
#action - The basic type of action to be taken when this argument is encountered at the command line.
#nargs - The number of command-line arguments that should be consumed.
#const - A constant value required by some action and nargs selections.
#default - The value produced if the argument is absent from the command line.
#type - The type to which the command-line argument should be converted.
#choices - A container of the allowable values for the argument.
#required - Whether or not the command-line option may be omitted (optionals only).
#help - A brief description of what the argument does.
#metavar - A name for the argument in usage messages.
#dest - The name of the attribute to be added to the object returned by parse_args().
def _argparse(defs):
parser = argparse.ArgumentParser()
for d in listwrap(defs):
args = d.copy()
name = args.name
args.name = None
parser.add_argument(*listwrap(name).list, **args.dict)
namespace = parser.parse_args()
output = {k: getattr(namespace, k) for k in vars(namespace)}
return struct.wrap(output)
def read_settings(filename=None, defs=None):
# READ SETTINGS
if filename:
settings_file = File(filename)
if not settings_file.exists:
Log.error("Can not file settings file {{filename}}", {
"filename": settings_file.abspath
})
json = settings_file.read()
settings = CNV.JSON2object(json, flexible=True)
if defs:
settings.args = _argparse(defs)
return settings
else:
defs = listwrap(defs)
defs.append({
"name": ["--settings", "--settings-file", "--settings_file"],
"help": "path to JSON file with settings",
"type": str,
"dest": "filename",
"default": "./settings.json",
"required": False
})
args = _argparse(defs)
settings_file = File(args.filename)
if not settings_file.exists:
Log.error("Can not file settings file {{filename}}", {
"filename": settings_file.abspath
})
json = settings_file.read()
settings = CNV.JSON2object(json, flexible=True)
settings.args = args
return settings

Просмотреть файл

@ -1,185 +0,0 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from math import sqrt
from .cnv import CNV
from .struct import nvl, Struct, Null
from .logs import Log
DEBUG=True
EPSILON=0.000001
def stats2z_moment(stats):
# MODIFIED FROM http://statsmodels.sourceforge.net/devel/_modules/statsmodels/stats/moment_helpers.html
# ADDED count
# FIXED ERROR IN COEFFICIENTS
mc0, mc1, mc2, skew, kurt = (stats.count, stats.mean, stats.variance, stats.skew, stats.kurtosis)
mz0 = mc0
mz1 = mc1 * mc0
mz2 = (mc2 + mc1*mc1)*mc0
mc3 = skew*(mc2**1.5) # 3rd central moment
mz3 = (mc3 + 3*mc1*mc2 - mc1**3)*mc0 # 3rd non-central moment
mc4 = (kurt+3.0)*(mc2**2.0) # 4th central moment
mz4 = (mc4 + 4*mc1*mc3 + 6*mc1*mc1*mc2 + mc1**4) * mc0
m=Z_moment(stats.count, mz1, mz2, mz3, mz4)
if DEBUG:
v = z_moment2stats(m, unbiased=False)
if not closeEnough(v.count, stats.count): Log.error("convertion error")
if not closeEnough(v.mean, stats.mean): Log.error("convertion error")
if not closeEnough(v.variance, stats.variance):
Log.error("convertion error")
return m
def closeEnough(a, b):
if abs(a-b)<=EPSILON*(abs(a)+abs(b)+1): return True
return False
def z_moment2stats(z_moment, unbiased=True):
free=0
if unbiased: free=1
N=z_moment.S[0]
if N==0: return Stats()
return Stats(
count=N,
mean=z_moment.S[1] / N if N > 0 else float('nan'),
variance=(z_moment.S[2] - (z_moment.S[1] ** 2) / N) / (N - free) if N - free > 0 else float('nan'),
unbiased=unbiased
)
class Stats(Struct):
def __init__(self, **args):
Struct.__init__(self)
if "count" not in args:
self.count=0
self.mean=0
self.variance=0
self.skew=0
self.kurtosis=0
elif "mean" not in args:
self.count=args["count"]
self.mean=0
self.variance=0
self.skew=0
self.kurtosis=0
elif "variance" not in args and "std" not in args:
self.count=args["count"]
self.mean=args["mean"]
self.variance=0
self.skew=0
self.kurtosis=0
elif "skew" not in args:
self.count=args["count"]
self.mean=args["mean"]
self.variance=args["variance"] if "variance" in args else args["std"]**2
self.skew=0
self.kurtosis=0
elif "kurtosis" not in args:
self.count=args["count"]
self.mean=args["mean"]
self.variance=args["variance"] if "variance" in args else args["std"]**2
self.skew=args["skew"]
self.kurtosis=0
else:
self.count=args["count"]
self.mean=args["mean"]
self.variance=args["variance"] if "variance" in args else args["std"]**2
self.skew=args["skew"]
self.kurtosis=args["kurtosis"]
self.unbiased=\
args["unbiased"] if "unbiased" in args else \
not args["biased"] if "biased" in args else \
False
@property
def std(self):
return sqrt(self.variance)
class Z_moment(object):
"""
ZERO-CENTERED MOMENTS
"""
def __init__(self, *args):
self.S=tuple(args)
def __add__(self, other):
return Z_moment(*map(add, self.S, other.S))
def __sub__(self, other):
return Z_moment(*map(sub, self.S, other.S))
@property
def tuple(self):
#RETURN AS ORDERED TUPLE
return self.S
@property
def dict(self):
#RETURN HASH OF SUMS
return {"s"+unicode(i): m for i, m in enumerate(self.S)}
@staticmethod
def new_instance(values=None):
if values == None: return Z_moment()
values=[float(v) for v in values if v != None]
return Z_moment(
len(values),
sum([n for n in values]),
sum([pow(n, 2) for n in values]),
sum([pow(n, 3) for n in values]),
sum([pow(n, 4) for n in values])
)
def add(a,b):
return nvl(a, 0)+nvl(b,0)
def sub(a,b):
return nvl(a, 0)-nvl(b,0)
def z_moment2dict(z):
#RETURN HASH OF SUMS
return {"s" + unicode(i): m for i, m in enumerate(z.S)}
setattr(CNV, "z_moment2dict", staticmethod(z_moment2dict))
def median(values):
try:
if not values:
return Null
l = len(values)
_sorted = sorted(values)
if l % 2 == 0:
return (_sorted[l / 2 - 1] + _sorted[l / 2]) / 2
else:
return _sorted[l / 2]
except Exception, e:
Log.error("problem with median", e)

Просмотреть файл

@ -8,106 +8,235 @@
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
from datetime import timedelta, date
from datetime import datetime as builtin_datetime
import re
from .jsons import json_encoder
import struct
from .struct import Struct
from . import struct
import math
import __builtin__
from .structs.wraps import unwrap, wrap
def datetime(value):
from .cnv import CNV
if isinstance(value, (date, builtin_datetime)):
pass
elif value < 10000000000:
value = CNV.unix2datetime(value)
else:
value = CNV.milli2datetime(value)
return CNV.datetime2string(value, "%Y-%m-%d %H:%M:%S")
def unix(value):
from .cnv import CNV
if isinstance(value, (date, builtin_datetime)):
pass
elif value < 10000000000:
value = CNV.unix2datetime(value)
else:
value = CNV.milli2datetime(value)
return str(CNV.datetime2unix(value))
def upper(value):
return value.upper()
def lower(value):
return value.lower()
def newline(value):
"""
ADD NEWLINE, IF SOMETHING
"""
return "\n"+value.lstrip("\n")
return "\n" + toString(value).lstrip("\n")
def replace(value, find, replace):
return value.replace(find, replace)
def json(value):
from .cnv import CNV
return CNV.object2JSON(value)
def indent(value, prefix=u"\t", indent=None):
if indent != None:
prefix=prefix*indent
prefix = prefix * indent
value = toString(value)
try:
content=value.rstrip()
suffix=value[len(content):]
lines=content.splitlines()
return prefix+(u"\n"+prefix).join(lines)+suffix
content = value.rstrip()
suffix = value[len(content):]
lines = content.splitlines()
return prefix + (u"\n" + prefix).join(lines) + suffix
except Exception, e:
raise Exception(u"Problem with indent of value ("+e.message+u")\n"+unicode(value))
raise Exception(u"Problem with indent of value (" + e.message + u")\n" + unicode(toString(value)))
def outdent(value):
try:
num=100
lines=value.splitlines()
num = 100
lines = toString(value).splitlines()
for l in lines:
trim=len(l.lstrip())
if trim>0: num=min(num, len(l)-len(l.lstrip()))
trim = len(l.lstrip())
if trim > 0:
num = min(num, len(l) - len(l.lstrip()))
return u"\n".join([l[num:] for l in lines])
except Exception, e:
from .logs import Log
from .env.logs import Log
Log.error("can not outdent value", e)
def between(value, prefix, suffix):
s = value.find(prefix)
if s==-1: return None
s+=len(prefix)
def round(value, decimal=None, digits=None):
if digits != None:
m = pow(10, math.ceil(math.log10(abs(value))))
return __builtin__.round(value / m, digits) * m
e=value.find(suffix, s)
if e==-1:
return __builtin__.round(value, decimal)
def percent(value, decimal=None, digits=None):
per = round(value*100, decimal, digits)
return str(per)+"%"
def between(value, prefix, suffix):
value = toString(value)
s = value.find(prefix)
if s == -1: return None
s += len(prefix)
e = value.find(suffix, s)
if e == -1:
return None
s=value.rfind(prefix, 0, e)+len(prefix) #WE KNOW THIS EXISTS, BUT THERE MAY BE A RIGHT-MORE ONE
s = value.rfind(prefix, 0, e) + len(prefix) #WE KNOW THIS EXISTS, BUT THERE MAY BE A RIGHT-MORE ONE
return value[s:e]
def right(value, len):
if len<=0: return u""
if len <= 0:
return u""
return value[-len:]
def left(value, len):
if len <= 0:
return u""
return value[0:len]
def find_first(value, find_arr, start=0):
i=len(value)
i = len(value)
for f in find_arr:
temp=value.find(f, start)
if temp==-1: continue
i=min(i, temp)
if i==len(value): return -1
temp = value.find(f, start)
if temp == -1: continue
i = min(i, temp)
if i == len(value): return -1
return i
pattern = re.compile(r"\{\{([\w_\.]+(\|[^\}^\|]+)*)\}\}")
def expand_template(template, value):
"""
template IS A STRING WITH {{variable_name}} INSTANCES, WHICH WILL
BE EXPANDED TO WHAT IS IS IN THE value dict
"""
value = wrap(value)
if isinstance(template, basestring):
return _simple_expand(template, (value,))
return _expand(template, (value,))
pattern=re.compile(r"\{\{([\w_\.]+(\|[\w_]+)*)\}\}")
def expand_template(template, values):
values=struct.wrap(values)
def _expand(template, seq):
"""
seq IS TUPLE OF OBJECTS IN PATH ORDER INTO THE DATA TREE
"""
if isinstance(template, basestring):
return _simple_expand(template, seq)
elif isinstance(template, dict):
template = wrap(template)
assert template["from"], "Expecting template to have 'from' attribute"
assert template.template, "Expecting template to have 'template' attribute"
data = seq[-1][template["from"]]
output = []
for d in data:
s = seq + (d,)
output.append(_expand(template.template, s))
return struct.nvl(template.separator, "").join(output)
elif isinstance(template, list):
return "".join(_expand(t, seq) for t in template)
else:
from .env.logs import Log
Log.error("can not handle")
def _simple_expand(template, seq):
"""
seq IS TUPLE OF OBJECTS IN PATH ORDER INTO THE DATA TREE
seq[-1] IS THE CURRENT CONTEXT
"""
def replacer(found):
seq=found.group(1).split("|")
ops = found.group(1).split("|")
var=seq[0]
path = ops[0]
var = path.lstrip(".")
depth = min(len(seq), max(1, len(path) - len(var)))
try:
val=values[var]
val=toString(val)
for filter in seq[1:]:
val=eval(filter+"(val)")
val = seq[-depth][var]
for filter in ops[1:]:
parts = filter.split('(')
if len(parts) > 1:
val = eval(parts[0] + "(val, " + ("(".join(parts[1::])))
else:
val = eval(filter + "(val)")
val = toString(val)
return val
except Exception, e:
try:
if e.message.find(u"is not JSON serializable"):
if e.message.find("is not JSON serializable"):
#WORK HARDER
val=toString(val)
val = toString(val)
return val
except Exception:
raise Exception(u"Can not expand "+"|".join(seq)+u" in template:\n"+indent(template), e)
except Exception, f:
from .env.logs import Log
Log.warning("Can not expand " + "|".join(ops) + " in template: {{template|json}}", {
"template": template
}, e)
return "[template expansion error: ("+str(e.message)+")]"
return pattern.sub(replacer, template)
def toString(val):
if isinstance(val, Struct):
return json_encoder.encode(val.dict, pretty=True)
elif isinstance(val, dict) or isinstance(val, list) or isinstance(val, set):
val=json_encoder.encode(val, pretty=True)
return val
return unicode(val)
if val == None:
return ""
elif isinstance(val, (dict, list, set)):
from .jsons import json_encoder
return json_encoder(val, pretty=True)
elif hasattr(val, "__json__"):
return val.__json__()
elif isinstance(val, timedelta):
duration = val.total_seconds()
return unicode(round(duration, 3))+" seconds"
try:
return unicode(val)
except Exception, e:
from .env.logs import Log
Log.error(str(type(val))+" type can not be converted to unicode", e)
def edit_distance(s1, s2):
@ -132,4 +261,68 @@ def edit_distance(s1, s2):
current_row.append(min(insertions, deletions, substitutions))
previous_row = current_row
return float(previous_row[-1])/len(s1)
return float(previous_row[-1]) / len(s1)
DIFF_PREFIX = re.compile(r"@@ -(\d+(?:\s*,\d+)?) \+(\d+(?:\s*,\d+)?) @@")
def apply_diff(text, diff, reverse=False):
"""
SOME EXAMPLES OF diff
#@@ -1 +1 @@
#-before china goes live, the content team will have to manually update the settings for the china-ready apps currently in marketplace.
#+before china goes live (end January developer release, June general audience release) , the content team will have to manually update the settings for the china-ready apps currently in marketplace.
@@ -0,0 +1,3 @@
+before china goes live, the content team will have to manually update the settings for the china-ready apps currently in marketplace.
+
+kward has the details.
@@ -1 +1 @@
-before china goes live (end January developer release, June general audience release), the content team will have to manually update the settings for the china-ready apps currently in marketplace.
+before china goes live , the content team will have to manually update the settings for the china-ready apps currently in marketplace.
@@ -3 +3 ,6 @@
-kward has the details.+kward has the details.
+
+Target Release Dates :
+https://mana.mozilla.org/wiki/display/PM/Firefox+OS+Wave+Launch+Cross+Functional+View
+
+Content Team Engagement & Tasks : https://appreview.etherpad.mozilla.org/40
"""
if not diff:
return text
if diff[0].strip() == "":
return text
matches = DIFF_PREFIX.match(diff[0].strip())
if not matches:
from .env.logs import Log
Log.error("Can not handle {{diff}}\n", {"diff": diff[0]})
remove = [int(i.strip()) for i in matches.group(1).split(",")]
if len(remove) == 1:
remove = [remove[0], 1] # DEFAULT 1
add = [int(i.strip()) for i in matches.group(2).split(",")]
if len(add) == 1:
add = [add[0], 1]
# UNUSUAL CASE WHERE @@ -x +x, n @@ AND FIRST LINE HAS NOT CHANGED
half = len(diff[1]) / 2
first_half = diff[1][:half]
last_half = diff[1][half:half * 2]
if remove[1] == 1 and add[0] == remove[0] and first_half[1:] == last_half[1:]:
diff[1] = first_half
diff.insert(2, last_half)
if not reverse:
if remove[1] != 0:
text = text[:remove[0] - 1] + text[remove[0] + remove[1] - 1:]
text = text[:add[0] - 1] + [d[1:] for d in diff[1 + remove[1]:1 + remove[1] + add[1]]] + text[add[0] - 1:]
text = apply_diff(text, diff[add[1]+remove[1]+1:], reverse=reverse)
else:
text = apply_diff(text, diff[add[1]+remove[1]+1:], reverse=reverse)
if add[1] != 0:
text = text[:add[0] - 1] + text[add[0] + add[1] - 1:]
text = text[:remove[0] - 1] + [d[1:] for d in diff[1:1 + remove[1]]] + text[remove[0] - 1:]
return text

Просмотреть файл

@ -7,7 +7,12 @@
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
SPECIAL = ["keys", "values", "items", "iteritems", "dict", "copy"]
from __future__ import unicode_literals
_get = object.__getattribute__
_set = object.__setattr__
DEBUG = False
class Struct(dict):
@ -15,65 +20,102 @@ class Struct(dict):
Struct is an anonymous class with some properties good for manipulating JSON
0) a.b==a["b"]
1) the IDE does tab completion, so my spelling mistakes get found at "compile time"
2) it deals with missing keys gracefully, so I can put it into set operations (database operations) without choking
1) the IDE does tab completion, and my spelling mistakes get found at "compile time"
2) it deals with missing keys gracefully, so I can put it into set operations (database
operations) without choking
a = wrap({})
> a == {}
a.b is Null
> True
a.b.c == None
> True
2b) missing keys is important when dealing with JSON, which is often almost anything
3) also, which I hardly use, is storing JSON paths in a variable, so : a["b.c"]==a.b.c
3) you can access paths as a variable: a["b.c"]==a.b.c
4) you can set paths to values, missing objects along the path are created:
a = wrap({})
> a == {}
a["b.c"] = 42
> a == {"b": {"c": 42}}
5) attribute names (keys) are corrected to unicode - it appears Python object.getattribute()
is called with str() even when using from __future__ import unicode_literals
MORE ON MISSING VALUES: http://www.numpy.org/NA-overview.html
IT ONLY CONSIDERS THE LEGITIMATE-FIELD-WITH-MISSING-VALUE (Statistical Null)
AND DOES NOT LOOK AT FIELD-DOES-NOT-EXIST-IN-THIS-CONTEXT (Database Null)
This is a common pattern in many frameworks (I am still working on this list)
The Struct is a common pattern in many frameworks (I am still working on this list)
jinja2.environment.Environment.getattr()
argparse.Environment() - code performs setattr(e, name, value) on instances of Environment
collections.namedtuple() - gives attribute names to tuple indicies
"""
def __init__(self, **map):
"""
THIS WILL MAKE A COPY, WHICH IS UNLIKELY TO BE USEFUL
USE struct.wrap() INSTEAD
CALLING Struct(**something) WILL RESULT IN A COPY OF something, WHICH IS UNLIKELY TO BE USEFUL
USE wrap() INSTEAD
"""
dict.__init__(self)
object.__setattr__(self, "__dict__", map) #map IS A COPY OF THE PARAMETERS
if DEBUG:
d = _get(self, "__dict__")
for k, v in map.items():
d[literal_field(k)] = unwrap(v)
else:
if map:
_set(self, "__dict__", map)
def __bool__(self):
return True
def __nonzero__(self):
return True
d = _get(self, "__dict__")
return True if d else False
def __str__(self):
return dict.__str__(object.__getattribute__(self, "__dict__"))
try:
return "Struct("+dict.__str__(_get(self, "__dict__"))+")"
except Exception, e:
return "{}"
def __repr__(self):
try:
return "Struct("+dict.__repr__(_get(self, "__dict__"))+")"
except Exception, e:
return "Struct{}"
def __contains__(self, item):
if Struct.__getitem__(self, item):
return True
return False
def __getitem__(self, key):
if not isinstance(key, str):
key = key.encode("utf-8")
if isinstance(key, str):
key = key.decode("utf8")
d = object.__getattribute__(self, "__dict__")
d = _get(self, "__dict__")
if key.find(".") >= 0:
key = key.replace("\.", "\a")
seq = [k.replace("\a", ".") for k in key.split(".")]
seq = split_field(key)
for n in seq:
d = getdefault(d, n)
d = _getdefault(d, n)
return wrap(d)
return wrap(getdefault(d, key))
def __setattr__(self, key, value):
Struct.__setitem__(self, key, value)
o = d.get(key, None)
if o == None:
return NullType(d, key)
return wrap(o)
def __setitem__(self, key, value):
if not isinstance(key, str):
key = key.encode("utf-8")
if key == "":
from .env.logs import Log
Log.error("key is empty string. Probably a bad idea")
if isinstance(key, str):
key = key.decode("utf8")
try:
d = object.__getattribute__(self, "__dict__")
d = _get(self, "__dict__")
value = unwrap(value)
if key.find(".") == -1:
if value is None:
@ -82,10 +124,9 @@ class Struct(dict):
d[key] = value
return self
key = key.replace("\.", "\a")
seq = [k.replace("\a", ".") for k in key.split(".")]
seq = split_field(key)
for k in seq[:-1]:
d = getdefault(d, k)
d = _getdefault(d, k)
if value == None:
d.pop(seq[-1], None)
else:
@ -95,71 +136,148 @@ class Struct(dict):
raise e
def __getattribute__(self, key):
if not isinstance(key, str):
key = key.encode("utf-8")
try:
output = _get(self, key)
return wrap(output)
except Exception:
d = _get(self, "__dict__")
if isinstance(key, str):
key = key.decode("utf8")
d = object.__getattribute__(self, "__dict__")
if key not in SPECIAL:
return wrap(getdefault(d, key))
return NullType(d, key)
#SOME dict FUNCTIONS
if key == "items":
def temp():
_is = dict.__getattribute__(d, "items")
return [(k, wrap(v)) for k, v in _is()]
def __setattr__(self, key, value):
if isinstance(key, str):
ukey = key.decode("utf8")
else:
ukey = key
return temp
if key == "iteritems":
#LOW LEVEL ITERATION
return d.iteritems
if key == "keys":
def temp():
k = dict.__getattribute__(d, "keys")
return set(k())
value = unwrap(value)
if value is None:
d = _get(self, "__dict__")
d.pop(key, None)
else:
_set(self, ukey, value)
return self
return temp
if key == "values":
def temp():
vs = dict.__getattribute__(d, "values")
return [wrap(v) for v in vs()]
def __hash__(self):
d = _get(self, "__dict__")
return hash_value(d)
return temp
if key == "dict":
return d
if key == "copy":
o = wrap({k: v for k, v in d.items()})
def __eq__(self, other):
if not isinstance(other, dict):
return False
e = unwrap(other)
d = _get(self, "__dict__")
for k, v in d.items():
if e.get(k, None) != v:
return False
for k, v in e.items():
if d.get(k, None) != v:
return False
return True
def output():
return o
def __ne__(self, other):
return not self.__eq__(other)
return output
def get(self, key, default):
d = _get(self, "__dict__")
return d.get(key, default)
def items(self):
d = _get(self, "__dict__")
return ((k, wrap(v)) for k, v in d.items())
def all_items(self):
"""
GET ALL KEY-VALUES OF LEAF NODES IN Struct
"""
d = _get(self, "__dict__")
output = []
for k, v in d.items():
if isinstance(v, dict):
_all_items(output, k, v)
else:
output.append((k, v))
return output
def iteritems(self):
#LOW LEVEL ITERATION, NO WRAPPING
d = _get(self, "__dict__")
return d.iteritems()
def keys(self):
d = _get(self, "__dict__")
return set(d.keys())
def values(self):
d = _get(self, "__dict__")
return (wrap(v) for v in d.values())
def copy(self):
d = _get(self, "__dict__")
return Struct(**d)
def __delitem__(self, key):
if not isinstance(key, str):
key = key.encode("utf-8")
d = object.__getattribute__(self, "__dict__")
if isinstance(key, str):
key = key.decode("utf8")
if key.find(".") == -1:
d = _get(self, "__dict__")
d.pop(key, None)
return
key = key.replace("\.", "\a")
seq = [k.replace("\a", ".") for k in key.split(".")]
d = _get(self, "__dict__")
seq = split_field(key)
for k in seq[:-1]:
d = d[k]
d.pop(seq[-1], None)
def __delattr__(self, key):
if isinstance(key, str):
key = key.decode("utf8")
d = _get(self, "__dict__")
d.pop(key, None)
def keys(self):
d = object.__getattribute__(self, "__dict__")
d = _get(self, "__dict__")
return d.keys()
def setdefault(self, k, d=None):
if self[k] == None:
self[k] = d
# KEEP TRACK OF WHAT ATTRIBUTES ARE REQUESTED, MAYBE SOME (BUILTIN) ARE STILL USEFUL
requested = set()
def setdefault(obj, key, value):
def _all_items(output, key, d):
for k, v in d:
if isinstance(v, dict):
_all_items(output, key+"."+k, v)
else:
output.append((key+"."+k, v))
def _str(value, depth):
"""
FOR DEBUGGING POSSIBLY RECURSIVE STRUCTURES
"""
output = []
if depth >0 and isinstance(value, dict):
for k, v in value.items():
output.append(str(k) + "=" + _str(v, depth - 1))
return "{" + ",\n".join(output) + "}"
elif depth >0 and isinstance(value, list):
for v in value:
output.append(_str(v, depth-1))
return "[" + ",\n".join(output) + "]"
else:
return str(type(value))
def _setdefault(obj, key, value):
"""
DO NOT USE __dict__.setdefault(obj, key, value), IT DOES NOT CHECK FOR obj[key] == None
"""
@ -170,39 +288,91 @@ def setdefault(obj, key, value):
return v
def getdefault(obj, key):
o = obj.get(key, None)
if o == None:
return NullStruct(obj, key)
return unwrap(o)
def _assign(null, key, value, force=True):
def set_default(*params):
"""
value IS ONLY ASSIGNED IF self.obj[self.path][key] DOES NOT EXIST
I+NPUT dicts IN PRIORITY ORDER
UPDATES FIRST dict WITH THE MERGE RESULT, WHERE MERGE RESULT IS DEFINED AS:
FOR EACH LEAF, RETURN THE HIGHEST PRIORITY LEAF VALUE
"""
d = object.__getattribute__(null, "__dict__")
o = d["obj"]
if isinstance(o, NullStruct):
o = _assign(o, d["path"], {}, False)
else:
o = setdefault(o, d["path"], {})
agg = params[0] if params[0] != None else {}
for p in params[1:]:
p = unwrap(p)
if p is None:
continue
_all_default(agg, p)
return wrap(agg)
if force:
o[key] = value
else:
value = setdefault(o, key, value)
return value
class NullStruct(object):
def _all_default(d, default):
"""
ANY VALUE NOT SET WILL BE SET BY THE default
THIS IS RECURSIVE
"""
if default is None:
return
for k, default_value in default.items():
existing_value = d.get(k, None)
if existing_value is None:
d[k] = default_value
elif isinstance(existing_value, dict) and isinstance(default_value, dict):
_all_default(existing_value, default_value)
def _getdefault(obj, key):
"""
TRY BOTH ATTRIBUTE AND ITEM ACCESS, OR RETURN Null
"""
try:
return obj.__getattribute__(key)
except Exception, e:
try:
return obj[key]
except Exception, f:
return NullType(obj, key)
def _assign(obj, path, value, force=True):
"""
value IS ASSIGNED TO obj[self.path][key]
force=False IF YOU PREFER TO use setDefault()
"""
if isinstance(obj, NullType):
d = _get(obj, "__dict__")
o = d["obj"]
p = d["path"]
s = split_field(p)+path
return _assign(o, s, value)
path0 = path[0]
if len(path) == 1:
if force:
obj[path0] = value
else:
_setdefault(obj, path0, value)
return
old_value = obj.get(path0, None)
if old_value == None:
if value == None:
return
else:
old_value = {}
obj[path0] = old_value
_assign(old_value, path[1:], value)
class NullType(object):
"""
Structural Null provides closure under the dot (.) operator
Null[x] == Null
Null.x == Null
Null INSTANCES WILL TRACK THE
"""
def __init__(self, obj=None, path=None):
d = object.__getattribute__(self, "__dict__")
d = _get(self, "__dict__")
d["obj"] = obj
d["path"] = path
@ -212,6 +382,33 @@ class NullStruct(object):
def __nonzero__(self):
return False
def __add__(self, other):
return Null
def __radd__(self, other):
return Null
def __sub__(self, other):
return Null
def __rsub__(self, other):
return Null
def __neg__(self):
return Null
def __mul__(self, other):
return Null
def __rmul__(self, other):
return Null
def __div__(self, other):
return Null
def __rdiv__(self, other):
return Null
def __gt__(self, other):
return False
@ -225,13 +422,13 @@ class NullStruct(object):
return False
def __eq__(self, other):
return other is None or isinstance(other, NullStruct)
return other is None or isinstance(other, NullType)
def __ne__(self, other):
return other is not None and not isinstance(other, NullStruct)
return other is not None and not isinstance(other, NullType)
def __getitem__(self, key):
return NullStruct(self, key)
return NullType(self, key)
def __len__(self):
return 0
@ -239,79 +436,73 @@ class NullStruct(object):
def __iter__(self):
return ZeroList.__iter__()
def last(self):
"""
IN CASE self IS INTERPRETED AS A list
"""
return Null
def right(self, num=None):
return EmptyList
def __getattribute__(self, key):
if key not in SPECIAL:
return NullStruct(self, key)
#SOME dict FUNCTIONS
if key == "items":
def temp():
return ZeroList
return temp
if key == "iteritems":
#LOW LEVEL ITERATION
return self.__iter__()
if key == "keys":
def temp():
return ZeroList
return temp
if key == "values":
def temp():
return ZeroList
return temp
if key == "dict":
return Null
if key == "copy":
#THE INTENT IS USUALLY PREPARE FOR UPDATES
def output():
return Struct()
try:
output = _get(self, key)
return output
except Exception, e:
return NullType(self, key)
def __setattr__(self, key, value):
NullStruct.__setitem__(self, key, value)
NullType.__setitem__(self, key, value)
def __setitem__(self, key, value):
try:
value = unwrap(value)
if key.find(".") == -1:
_assign(self, key, value)
return self
d = _get(self, "__dict__")
o = d["obj"]
path = d["path"]
seq = split_field(path)+split_field(key)
key = key.replace("\.", "\a")
seq = [k.replace("\a", ".") for k in key.split(".")]
d = _assign(self, seq[0], {}, False)
for k in seq[1:-1]:
o = {}
d[k] = o
d = o
d[seq[-1]] = value
return self
_assign(o, seq, value)
except Exception, e:
raise e
def keys(self):
return set()
def items(self):
return []
def pop(self, key, default=None):
return None
return Null
def __str__(self):
return "None"
def __repr__(self):
return "Null"
Null = NullStruct()
Null = NullType()
EmptyList = Null
ZeroList = []
def return_zero_list():
return []
def return_zero_set():
return set()
class StructList(list):
"""
ENCAPSULATES HANDING OF Nulls BY wrapING ALL MEMBERS AS NEEDED
ENCAPSULATES FLAT SLICES ([::]) FOR USE IN WINDOW FUNCTIONS
"""
EMPTY = None
def __init__(self, vals=None):
""" USE THE vals, NOT A COPY """
list.__init__(self)
# list.__init__(self)
if vals == None:
self.list = []
elif isinstance(vals, StructList):
@ -320,88 +511,145 @@ class StructList(list):
self.list = vals
def __getitem__(self, index):
if index < 0 or len(self.list) <= index:
if isinstance(index, slice):
# IMPLEMENT FLAT SLICES (for i not in range(0, len(self)): assert self[i]==None)
if index.step is not None:
from .env.logs import Log
Log.error("slice step must be None, do not know how to deal with values")
length = len(_get(self, "list"))
i = index.start
i = min(max(i, 0), length)
j = index.stop
if j is None:
j = length
else:
j = max(min(j, length), 0)
return StructList(_get(self, "list")[i:j])
if index < 0 or len(_get(self, "list")) <= index:
return Null
return wrap(self.list[index])
return wrap(_get(self, "list")[index])
def __setitem__(self, i, y):
self.list[i] = unwrap(y)
_get(self, "list")[i] = unwrap(y)
def __getattribute__(self, key):
try:
if key != "index": # WE DO NOT WANT TO IMPLEMENT THE index METHOD
output = _get(self, key)
return output
except Exception, e:
if key[0:2] == "__": # SYSTEM LEVEL ATTRIBUTES CAN NOT BE USED FOR SELECT
raise e
return StructList.select(self, key)
def select(self, key):
output = []
for v in _get(self, "list"):
try:
output.append(v.__getattribute__(key))
except Exception, e:
try:
output.append(v.__getitem__(key))
except Exception, f:
output.append(None)
return StructList(output)
def __iter__(self):
return (wrap(v) for v in self.list)
return (wrap(v) for v in _get(self, "list"))
def __contains__(self, item):
return list.__contains__(_get(self, "list"), item)
def append(self, val):
self.list.append(unwrap(val))
_get(self, "list").append(unwrap(val))
return self
def __str__(self):
return self.list.__str__()
return _get(self, "list").__str__()
def __len__(self):
return self.list.__len__()
return _get(self, "list").__len__()
def __getslice__(self, i, j):
return wrap(self.list[i:j])
from .env.logs import Log
Log.error("slicing is broken in Python 2.7: a[i:j] == a[i+len(a), j] sometimes. Use [start:stop:step]")
def copy(self):
return StructList(list(_get(self, "list")))
def remove(self, x):
self.list.remove(x)
_get(self, "list").remove(x)
return self
def extend(self, values):
for v in values:
self.list.append(unwrap(v))
_get(self, "list").append(unwrap(v))
return self
def pop(self):
return self.list.pop()
return wrap(_get(self, "list").pop())
def __add__(self, value):
output = list(self.list)
output = list(_get(self, "list"))
output.extend(value)
return StructList(vals=output)
def __or__(self, value):
output = list(self.list)
output = list(_get(self, "list"))
output.append(value)
return StructList(vals=output)
def right(self, num=None):
if num == None:
return StructList(vals=[self.list[-1]])
if num == 0:
return StructList()
return StructList(vals=self.list[-num])
def __radd__(self, other):
output = list(other)
output.extend(_get(self, "list"))
return StructList(vals=output)
def right(self, num=None):
"""
WITH SLICES BEING FLAT, WE NEED A SIMPLE WAY TO SLICE FROM THE RIGHT [-num:]
"""
if num == None:
return StructList([_get(self, "list")[-1]])
if num <= 0:
return EmptyList
return StructList(_get(self, "list")[-num:])
def leftBut(self, num):
"""
WITH SLICES BEING FLAT, WE NEED A SIMPLE WAY TO SLICE FROM THE LEFT [:-num:]
"""
if num == None:
return StructList([_get(self, "list")[:-1:]])
if num <= 0:
return EmptyList
return StructList(_get(self, "list")[:-num:])
def last(self):
"""
RETURN LAST ELEMENT IN StructList
RETURN LAST ELEMENT IN StructList [-1]
"""
return self.list[-1]
def wrap(v):
if v is None:
lst = _get(self, "list")
if lst:
return wrap(lst[-1])
return Null
if isinstance(v, (Struct, NullStruct, StructList)):
return v
if isinstance(v, dict):
m = Struct()
object.__setattr__(m, "__dict__", v) #INJECT m.__dict__=v SO THERE IS NO COPY
return m
if isinstance(v, list):
return StructList(v)
return v
def map(self, oper, includeNone=True):
if includeNone:
return StructList([oper(v) for v in _get(self, "list")])
else:
return StructList([oper(v) for v in _get(self, "list") if v != None])
StructList.EMPTY = StructList()
def unwrap(v):
if isinstance(v, Struct):
return object.__getattribute__(v, "__dict__")
if isinstance(v, StructList):
return v.list
if v == None:
return None
return v
def inverse(d):
@ -419,36 +667,88 @@ def nvl(*args):
#pick the first none-null value
for a in args:
if a != None:
return a
return wrap(a)
return Null
def zip(keys, values):
output = Struct()
for i, k in enumerate(keys):
output[k] = values[i]
return output
def listwrap(value):
def literal_field(field):
"""
OFTEN IT IS NICE TO ALLOW FUNCTION PARAMETERS TO BE ASSIGNED A VALUE,
OR A list-OF-VALUES, OR NULL. CHECKING FOR THIS IS TEDIOUS AND WE WANT TO CAST
FROM THOSE THREE CASES TO THE SINGLE CASE OF A LIST
Null -> []
value -> [value]
[...] -> [...] (unchanged list)
#BEFORE
if a is not None:
if not isinstance(a, list):
a=[a]
for x in a:
#do something
#AFTER
for x in listwrap(a):
#do something
RETURN SAME WITH . ESCAPED
"""
if value == None:
return []
elif isinstance(value, list):
return wrap(value)
try:
return field.replace(".", "\.")
except Exception, e:
from .env.logs import Log
Log.error("bad literal", e)
def cpython_split_field(field):
"""
RETURN field AS ARRAY OF DOT-SEPARATED FIELDS
"""
if field.find(".") >= 0:
field = field.replace("\.", "\a")
return [k.replace("\a", ".") for k in field.split(".")]
else:
return wrap([value])
return [field]
def pypy_split_field(field):
"""
RETURN field AS ARRAY OF DOT-SEPARATED FIELDS
"""
from .jsons import UnicodeBuilder
if not field:
return []
output = []
curr = UnicodeBuilder()
i = 0
while i < len(field):
c = field[i]
i += 1
if c == "\\":
c = field[i]
i += 1
if c == ".":
curr.append(".")
else:
curr.append("\\")
curr.append(c)
elif c == ".":
output.append(curr.build())
curr = UnicodeBuilder()
output.append(curr.build())
return output
# try:
# import __pypy__
# split_field = pypy_split_field
# except ImportError:
split_field = cpython_split_field
def join_field(field):
"""
RETURN field SEQUENCE AS STRING
"""
return ".".join([f.replace(".", "\.") for f in field])
def hash_value(v):
if isinstance(v, (set, tuple, list)):
return hash(tuple(hash_value(vv) for vv in v))
elif not isinstance(v, dict):
return hash(v)
else:
return hash(tuple(sorted(hash_value(vv) for vv in v.values())))
from .structs.wraps import unwrap, wrap

Просмотреть файл

@ -0,0 +1 @@
__author__ = 'klahnakoski'

Просмотреть файл

@ -0,0 +1,36 @@
from types import GeneratorType
from ..struct import StructList, Struct
_get = object.__getattribute__
_set = object.__setattr__
def slow_wrap(v):
return wrapper.get(_get(v, "__class__"), _no_wrap)(v)
def _wrap_dict(v):
m = Struct()
_set(m, "__dict__", v) # INJECT m.__dict__=v SO THERE IS NO COPY
return m
def _wrap_list(v):
return StructList(v)
def _wrap_generator(v):
return (slow_wrap(vv) for vv in v)
def _no_wrap(v):
return v
wrapper = {
dict: _wrap_dict,
list: _wrap_list,
GeneratorType: _wrap_generator
}

139
tests/util/structs/wraps.py Normal file
Просмотреть файл

@ -0,0 +1,139 @@
from types import NoneType, GeneratorType
from ..struct import Null, StructList, Struct
_get = object.__getattribute__
_set = object.__setattr__
def wrap(v):
"""
THIS IS THE CANDIDATE WE ARE TESTING TO WRAP FASTER, BUT DOES NOT SEEM TO BE
"""
type_ = _get(v, "__class__")
if type_ is dict:
m = Struct()
_set(m, "__dict__", v) # INJECT m.__dict__=v SO THERE IS NO COPY
return m
elif type_ is list:
return StructList(v)
elif type_ is GeneratorType:
return (wrap(vv) for vv in v)
elif type_ is NoneType:
return Null
else:
return v
def wrap_dot(value):
"""
dict WITH DOTS IN KEYS IS INTERPRETED AS A PATH
"""
return wrap(_wrap_dot(value))
def _wrap_dot(value):
if value == None:
return None
if isinstance(value, (basestring, int, float)):
return value
if isinstance(value, dict):
if isinstance(value, Struct):
value = unwrap(value)
output = {}
for key, value in value.iteritems():
value = _wrap_dot(value)
if key == "":
from ..env.logs import Log
Log.error("key is empty string. Probably a bad idea")
if isinstance(key, str):
key = key.decode("utf8")
d = output
if key.find(".") == -1:
if value is None:
d.pop(key, None)
else:
d[key] = value
else:
seq = split_field(key)
for k in seq[:-1]:
e = d.get(k, None)
if e is None:
d[k] = {}
e = d[k]
d = e
if value == None:
d.pop(seq[-1], None)
else:
d[seq[-1]] = value
return output
if hasattr(value, '__iter__'):
output = []
for v in value:
v = wrap_dot(v)
output.append(v)
return output
return value
def unwrap(v):
_type = _get(v, "__class__")
if _type is Struct:
d = _get(v, "__dict__")
return d
elif _type is StructList:
return v.list
elif _type is NullType:
return None
elif _type is GeneratorType:
return (unwrap(vv) for vv in v)
else:
return v
def listwrap(value):
"""
OFTEN IT IS NICE TO ALLOW FUNCTION PARAMETERS TO BE ASSIGNED A VALUE,
OR A list-OF-VALUES, OR NULL. CHECKING FOR THIS IS TEDIOUS AND WE WANT TO CAST
FROM THOSE THREE CASES TO THE SINGLE CASE OF A LIST
Null -> []
value -> [value]
[...] -> [...] (unchanged list)
#BEFORE
if a is not None:
if not isinstance(a, list):
a=[a]
for x in a:
#do something
#AFTER
for x in listwrap(a):
#do something
"""
if value == None:
return []
elif isinstance(value, list):
return wrap(value)
else:
return wrap([unwrap(value)])
def tuplewrap(value):
"""
INTENDED TO TURN lists INTO tuples FOR USE AS KEYS
"""
if isinstance(value, (list, set, tuple, GeneratorType)):
return tuple(tuplewrap(v) if isinstance(v, (list, tuple, GeneratorType)) else v for v in value)
return unwrap(value),
from ..struct import StructList, Struct, split_field, NullType

Просмотреть файл

@ -0,0 +1 @@
__author__ = 'klahnakoski'

Просмотреть файл

@ -0,0 +1,83 @@
# encoding: utf-8
#
from .util import struct
from .util.cnv import CNV
from .util.env.elasticsearch import ElasticSearch
from .util.env.logs import Log
from .util.env.files import File
from .util.queries import Q
from .util.struct import Struct
from .util.structs.wraps import unwrap, wrap
def make_test_instance(name, settings):
if settings.filename:
File(settings.filename).delete()
return open_test_instance(name, settings)
def open_test_instance(name, settings):
if settings.filename:
Log.note("Using {{filename}} as {{type}}", {
"filename": settings.filename,
"type": name
})
return Fake_ES(settings)
else:
Log.note("Using ES cluster at {{host}} as {{type}}", {
"host": settings.host,
"type": name
})
ElasticSearch.delete_index(settings)
schema = CNV.JSON2object(File(settings.schema_file).read(), flexible=True, paths=True)
es = ElasticSearch.create_index(settings, schema, limit_replicas=True)
return es
class Fake_ES():
def __init__(self, settings):
self.settings = wrap({"host":"fake", "index":"fake"})
self.filename = settings.filename
try:
self.data = CNV.JSON2object(File(self.filename).read())
except IOError:
self.data = Struct()
def search(self, query):
query=wrap(query)
f = CNV.esfilter2where(query.query.filtered.filter)
filtered=wrap([{"_id": i, "_source": d} for i, d in self.data.items() if f(d)])
if query.fields:
return wrap({"hits": {"total":len(filtered), "hits": [{"_id":d._id, "fields":unwrap(Q.select([unwrap(d._source)], query.fields)[0])} for d in filtered]}})
else:
return wrap({"hits": {"total":len(filtered), "hits": filtered}})
def extend(self, records):
"""
JUST SO WE MODEL A Queue
"""
records = {v["id"]: v["value"] for v in records}
struct.unwrap(self.data).update(records)
data_as_json = CNV.object2JSON(self.data, pretty=True)
File(self.filename).write(data_as_json)
Log.note("{{num}} items added", {"num": len(records)})
def add(self, record):
if isinstance(record, list):
Log.error("no longer accepting lists, use extend()")
return self.extend([record])
def delete_record(self, filter):
f = CNV.esfilter2where(filter)
self.data = wrap({k: v for k, v in self.data.items() if not f(v)})
def set_refresh_interval(self, seconds):
pass

Просмотреть файл

@ -0,0 +1 @@
__author__ = 'klahnakoski'

Просмотреть файл

@ -6,8 +6,9 @@
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
from multiprocessing.queues import Queue
from .logs import Log
from ..env.logs import Log
class worker(object):

Просмотреть файл

@ -0,0 +1,173 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
from collections import Iterable
from types import GeneratorType
from ..struct import nvl
from ..env.logs import Log
from ..thread.threads import Queue, Thread
DEBUG = False
class Multithread(object):
"""
SIMPLE SEMANTICS FOR SYMMETRIC MULTITHREADING
PASS A SET OF functions TO BE EXECUTED (ONE PER THREAD)
SET outbound==False TO SIMPLY THROW AWAY RETURN VALUES, IF ANY
threads - IF functions IS NOT AN ARRAY, THEN threads IS USED TO MAKE AN ARRAY
THE inbound QUEUE IS EXPECTING dicts, EACH dict IS USED AS kwargs TO GIVEN functions
"""
def __init__(self, functions, threads=None, outbound=None, silent_queues=None):
if outbound is None:
self.outbound = Queue(silent=silent_queues)
elif outbound is False:
self.outbound = None
else:
self.outbound = outbound
self.inbound = Queue(silent=silent_queues)
#MAKE THREADS
if isinstance(functions, Iterable):
if threads:
Log.error("do not know how to handle an array of functions AND a thread multiplier")
self.threads = []
for t, f in enumerate(functions):
thread = worker_thread("worker " + unicode(t), self.inbound, self.outbound, f)
self.threads.append(thread)
else:
#ASSUME functions IS A SINGLE FUNCTION
self.threads = []
for t in range(nvl(threads, 1)):
thread = worker_thread("worker " + unicode(t), self.inbound, self.outbound, functions)
self.threads.append(thread)
def __enter__(self):
return self
#WAIT FOR ALL QUEUED WORK TO BE DONE BEFORE RETURNING
def __exit__(self, type, value, traceback):
try:
if isinstance(value, Exception):
self.inbound.close()
self.inbound.add(Thread.STOP)
self.join()
except Exception, e:
Log.warning("Problem sending stops", e)
#IF YOU SENT A stop(), OR Thread.STOP, YOU MAY WAIT FOR SHUTDOWN
def join(self):
try:
#WAIT FOR FINISH
for t in self.threads:
t.join()
except (KeyboardInterrupt, SystemExit):
Log.note("Shutdow Started, please be patient")
except Exception, e:
Log.error("Unusual shutdown!", e)
finally:
for t in self.threads:
t.keep_running = False
self.inbound.close()
if self.outbound: self.outbound.close()
for t in self.threads:
t.join()
def execute(self, requests):
"""
RETURN A GENERATOR THAT HAS len(requests) RESULTS (ANY ORDER)
EXPECTING requests TO BE A list OF dicts, EACH dict IS USED AS kwargs TO GIVEN functions
"""
if not isinstance(requests,(list, tuple, GeneratorType)):
Log.error("Expecting requests to be a list or generator", offset=1)
#FILL QUEUE WITH WORK
self.inbound.extend(requests)
num = len(requests)
def output():
for i in xrange(num):
result = self.outbound.pop()
if "exception" in result:
raise result["exception"]
else:
yield result["response"]
if self.outbound is not None:
return output()
else:
return
#EXTERNAL COMMAND THAT RETURNS IMMEDIATELY
def stop(self):
self.inbound.close() #SEND STOPS TO WAKE UP THE WORKERS WAITING ON inbound.pop()
for t in self.threads:
t.keep_running = False
class worker_thread(Thread):
#in_queue MUST CONTAIN HASH OF PARAMETERS FOR load()
def __init__(self, name, in_queue, out_queue, function):
Thread.__init__(self, name, self.event_loop)
self.in_queue = in_queue
self.out_queue = out_queue
self.function = function
self.num_runs = 0
self.start()
def event_loop(self, please_stop):
got_stop = False
while not please_stop.is_go():
request = self.in_queue.pop()
if request == Thread.STOP:
got_stop = True
if self.in_queue.queue:
Log.warning("programmer error, queue not empty. {{num}} requests lost:\n{{requests}}", {
"num": len(self.in_queue.queue),
"requests": self.in_queue.queue[:5:] + self.in_queue.queue[-5::]
})
break
if please_stop.is_go():
break
try:
if DEBUG and hasattr(self.function, "func_name"):
Log.note("run {{function}}", {"function": self.function.func_name})
result = self.function(**request)
if self.out_queue != None:
self.out_queue.add({"response": result})
except Exception, e:
Log.warning("Can not execute with params={{params}}", {"params": request}, e)
if self.out_queue != None:
self.out_queue.add({"exception": e})
finally:
self.num_runs += 1
please_stop.go()
del self.function
if self.num_runs == 0:
Log.warning("{{name}} thread did no work", {"name": self.name})
if DEBUG and self.num_runs != 1:
Log.note("{{name}} thread did {{num}} units of work", {
"name": self.name,
"num": self.num_runs
})
if got_stop and self.in_queue.queue:
Log.warning("multithread programmer error, queue not empty. {{num}} requests lost", {"num": len(self.in_queue.queue)})
if DEBUG:
Log.note("{{thread}} DONE", {"thread": self.name})

Просмотреть файл

@ -7,13 +7,18 @@
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
from datetime import datetime, timedelta
import threading
import thread
import threading
import time
from .struct import nvl
import sys
from ..struct import nvl, Struct
# THIS THREADING MODULE IS PERMEATED BY THE please_stop SIGNAL.
# THIS SIGNAL IS IMPORTANT FOR PROPER SIGNALLING WHICH ALLOWS
# FOR FAST AND PREDICTABLE SHUTDOWN AND CLEANUP OF THREADS
DEBUG = True
@ -21,9 +26,10 @@ class Lock(object):
"""
SIMPLE LOCK (ACTUALLY, A PYTHON threadind.Condition() WITH notify() BEFORE EVERY RELEASE)
"""
def __init__(self, name=""):
self.monitor=threading.Condition()
self.name=name
self.monitor = threading.Condition()
self.name = name
def __enter__(self):
self.monitor.acquire()
@ -35,10 +41,10 @@ class Lock(object):
def wait(self, timeout=None, till=None):
if till:
timeout=(datetime.utcnow()-till).total_seconds()
if timeout<0:
timeout = (datetime.utcnow() - till).total_seconds()
if timeout < 0:
return
self.monitor.wait(timeout=timeout)
self.monitor.wait(timeout=float(timeout) if timeout else None)
def notify_all(self):
self.monitor.notify_all()
@ -48,39 +54,70 @@ class Queue(object):
"""
SIMPLE MESSAGE QUEUE, multiprocessing.Queue REQUIRES SERIALIZATION, WHICH IS HARD TO USE JUST BETWEEN THREADS
"""
def __init__(self, max=None):
def __init__(self, max=None, silent=False):
"""
max - LIMIT THE NUMBER IN THE QUEUE, IF TOO MANY add() AND extend() WILL BLOCK
silent - COMPLAIN IF THE READERS ARE TOO SLOW
"""
self.max = nvl(max, 2**30)
self.max = nvl(max, 2 ** 10)
self.silent = silent
self.keep_running = True
self.lock = Lock("lock for queue")
self.queue = []
self.next_warning=datetime.utcnow() # FOR DEBUGGING
def __iter__(self):
while self.keep_running:
try:
value=self.pop()
if value!=Thread.STOP:
value = self.pop()
if value is not Thread.STOP:
yield value
except Exception, e:
from .logs import Log
from ..env.logs import Log
Log.warning("Tell me about what happened here", e)
def add(self, value):
with self.lock:
self.wait_for_queue_space()
if self.keep_running:
self.queue.append(value)
while self.keep_running and len(self.queue) > self.max:
self.lock.wait()
return self
def extend(self, values):
with self.lock:
# ONCE THE queue IS BELOW LIMIT, ALLOW ADDING MORE
self.wait_for_queue_space()
if self.keep_running:
self.queue.extend(values)
while self.keep_running and len(self.queue) > self.max:
return self
def wait_for_queue_space(self):
"""
EXPECT THE self.lock TO BE HAD, WAITS FOR self.queue TO HAVE A LITTLE SPACE
"""
wait_time = 5
now = datetime.utcnow()
if self.next_warning < now:
self.next_warning = now + timedelta(seconds=wait_time)
while self.keep_running and len(self.queue) > self.max:
if self.silent:
self.lock.wait()
else:
self.lock.wait(wait_time)
if len(self.queue) > self.max:
now = datetime.utcnow()
if self.next_warning < now:
self.next_warning = now + timedelta(seconds=wait_time)
from ..env.logs import Log
Log.warning("Queue is full ({{num}}} items), thread(s) have been waiting {{wait_time}} sec", {
"num": len(self.queue),
"wait_time": wait_time
})
def __len__(self):
with self.lock:
@ -90,9 +127,9 @@ class Queue(object):
with self.lock:
while self.keep_running:
if self.queue:
value=self.queue.pop(0)
if value==Thread.STOP: #SENDING A STOP INTO THE QUEUE IS ALSO AN OPTION
self.keep_running=False
value = self.queue.pop(0)
if value is Thread.STOP: #SENDING A STOP INTO THE QUEUE IS ALSO AN OPTION
self.keep_running = False
return value
self.lock.wait()
return Thread.STOP
@ -108,7 +145,7 @@ class Queue(object):
return []
for v in self.queue:
if v == Thread.STOP: #SENDING A STOP INTO THE QUEUE IS ALSO AN OPTION
if v is Thread.STOP: #SENDING A STOP INTO THE QUEUE IS ALSO AN OPTION
self.keep_running = False
output = list(self.queue)
@ -117,8 +154,7 @@ class Queue(object):
def close(self):
with self.lock:
self.keep_running=False
self.keep_running = False
class AllThread(object):
@ -127,7 +163,7 @@ class AllThread(object):
"""
def __init__(self):
self.threads=[]
self.threads = []
def __enter__(self):
return self
@ -137,31 +173,35 @@ class AllThread(object):
self.join()
def join(self):
exceptions=[]
exceptions = []
try:
for t in self.threads:
response=t.join()
response = t.join()
if "exception" in response:
exceptions.append(response["exception"])
except Exception, e:
from .logs import Log
from ..env.logs import Log
Log.warning("Problem joining", e)
if exceptions:
from .logs import Log
Log.error("Problem in child threads", exceptions)
from ..env.logs import Log
Log.error("Problem in child threads", exceptions)
def add(self, target, *args, **kwargs):
"""
target IS THE FUNCTION TO EXECUTE IN THE THREAD
"""
t=Thread.run(target.__name__, target, *args, **kwargs)
t = Thread.run(target.__name__, target, *args, **kwargs)
self.threads.append(t)
ALL_LOCK = Lock()
MAIN_THREAD = Struct(name="Main Thread", id=thread.get_ident())
ALL = dict()
ALL[thread.get_ident()] = MAIN_THREAD
class Thread(object):
@ -170,22 +210,22 @@ class Thread(object):
run() ENHANCED TO CAPTURE EXCEPTIONS
"""
num_threads=0
STOP="stop"
TIMEOUT="TIMEOUT"
num_threads = 0
STOP = "stop"
TIMEOUT = "TIMEOUT"
def __init__(self, name, target, *args, **kwargs):
self.id = -1
self.name = name
self.target = target
self.response = None
self.synch_lock=Lock()
self.synch_lock = Lock()
self.args = args
#ENSURE THERE IS A SHARED please_stop SIGNAL
self.kwargs = kwargs.copy()
self.kwargs["please_stop"]=self.kwargs.get("please_stop", Signal())
self.kwargs["please_stop"] = self.kwargs.get("please_stop", Signal())
self.please_stop = self.kwargs["please_stop"]
self.stopped = Signal()
@ -206,38 +246,49 @@ class Thread(object):
def start(self):
try:
self.thread=thread.start_new_thread(Thread._run, (self, ))
thread.start_new_thread(Thread._run, (self, ))
except Exception, e:
from .logs import Log
from ..env.logs import Log
Log.error("Can not start thread", e)
def stop(self):
self.please_stop.go()
def _run(self):
self.id = thread.get_ident()
with ALL_LOCK:
ALL[self.id] = self
try:
if self.target is not None:
response=self.target(*self.args, **self.kwargs)
response = self.target(*self.args, **self.kwargs)
with self.synch_lock:
self.response={"response":response}
self.response = Struct(response=response)
except Exception, e:
with self.synch_lock:
self.response={"exception":e}
from .logs import Log
Log.error("Problem in thread", e)
self.response = Struct(exception=e)
try:
from ..env.logs import Log
Log.fatal("Problem in thread {{name}}", {"name": self.name}, e)
except Exception, f:
sys.stderr.write("ERROR in thread: " + str(self.name) + " " + str(e) + "\n")
finally:
self.stopped.go()
del self.target, self.args, self.kwargs
with ALL_LOCK:
del ALL[self.id]
def is_alive(self):
return not self.stopped
def join(self, timeout=None, till=None):
"""
RETURN THE RESULT OF THE THREAD EXECUTION (INCLUDING EXCEPTION)
RETURN THE RESULT {"response":r, "exception":e} OF THE THREAD EXECUTION (INCLUDING EXCEPTION, IF EXISTS)
"""
if not till and timeout:
till=datetime.utcnow()+timedelta(seconds=timeout)
till = datetime.utcnow() + timedelta(seconds=timeout)
if till is None:
while True:
@ -247,22 +298,25 @@ class Thread(object):
return self.response
self.synch_lock.wait(0.5)
from .logs import Log
if DEBUG:
Log.note("Waiting on thread {{thread}}", {"thread":self.name})
from ..env.logs import Log
Log.note("Waiting on thread {{thread|json}}", {"thread": self.name})
else:
self.stopped.wait_for_go(till=till)
if self.stopped:
return self.response
else:
from logs import Except
from ..env.logs import Except
raise Except(type=Thread.TIMEOUT)
@staticmethod
def run(name, target, *args, **kwargs):
#ENSURE target HAS please_stop ARGUMENT
if "please_stop" not in target.__code__.co_varnames:
from logs import Log
from ..env.logs import Log
Log.error("function must have please_stop argument for signalling emergency shutdown")
Thread.num_threads += 1
@ -280,6 +334,14 @@ class Thread(object):
if duration > 0:
time.sleep(duration)
@staticmethod
def current():
id = thread.get_ident()
with ALL_LOCK:
try:
return ALL[id]
except KeyError, e:
return MAIN_THREAD
class Signal(object):
@ -290,7 +352,7 @@ class Signal(object):
def __init__(self):
self.lock = Lock()
self._go = False
self.job_queue=[]
self.job_queue = []
def __bool__(self):
@ -315,8 +377,8 @@ class Signal(object):
return
self._go = True
jobs=self.job_queue
self.job_queue=[]
jobs = self.job_queue
self.job_queue = []
self.lock.notify_all()
for j in jobs:
@ -337,37 +399,39 @@ class Signal(object):
self.job_queue.append(target)
class ThreadedQueue(Queue):
"""
TODO: Check that this queue is not dropping items at shutdown
DISPATCH TO ANOTHER (SLOWER) queue IN BATCHES OF GIVEN size
"""
def __init__(self, queue, size, max=None):
def __init__(self, queue, size=None, max=None, period=None, silent=False):
if max == None:
#REASONABLE DEFAULT
max = size*2
max = size * 2
Queue.__init__(self, max=max)
Queue.__init__(self, max=max, silent=silent)
def size_pusher(please_stop):
please_stop.on_go(lambda: self.add(Thread.STOP))
#queue IS A MULTI-THREADED QUEUE, SO THIS WILL BLOCK UNTIL THE size ARE READY
from .queries import Q
from ..queries import Q
for i, g in Q.groupby(self, size=size):
try:
queue.extend(g)
if please_stop:
from logs import Log
from ..env.logs import Log
Log.warning("ThreadedQueue stopped early, with {{num}} items left in queue", {
"num":len(self)
"num": len(self)
})
return
except Exception, e:
from logs import Log
Log.warning("Can not push data to given queue", e)
from ..env.logs import Log
Log.warning("Problem with pushing {{num}} items to data sink", {"num": len(g)}, e)
self.thread = Thread.run("threaded queue", size_pusher)

Просмотреть файл

@ -1,48 +0,0 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
import time
from .strings import expand_template
from .logs import Log
class Timer:
"""
USAGE:
with Timer("doing hard time"):
something_that_takes_long()
OUTPUT:
doing hard time took 45.468 sec
"""
def __init__(self, description, param=None):
self.description=expand_template(description, param) #WE WOULD LIKE TO KEEP THIS TEMPLATE, AND PASS IT TO THE LOGGER ON __exit__(), WE FAKE IT FOR NOW
def __enter__(self):
Log.note("Timer start: {{description}}", {
"description":self.description
})
self.start = time.clock()
return self
def __exit__(self, type, value, traceback):
self.end = time.clock()
self.interval = self.end - self.start
Log.note("Timer end : {{description}} (took {{duration}} sec)", {
"description":self.description,
"duration":round(self.interval, 3)
})
@property
def duration(self):
return self.interval

Просмотреть файл

@ -0,0 +1 @@
__author__ = 'klahnakoski'

Просмотреть файл

@ -0,0 +1,299 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from __future__ import unicode_literals
from .. import regex
from ..cnv import CNV
from ..collections import MIN
from ..env.logs import Log
from ..maths import Math
from ..structs.wraps import unwrap, wrap
class Duration(object):
def __new__(cls, obj=None):
output = object.__new__(cls)
if obj == None:
return None
if Math.is_number(obj):
output.milli = obj
output.month = 0
return output
elif isinstance(obj, basestring):
return parse(obj)
elif isinstance(obj, Duration):
output.milli = obj.milli
output.month = obj.month
return output
elif Math.is_nan(obj):
return None
else:
Log.error("Do not know type of object (" + CNV.object2JSON(obj) + ")of to make a Duration")
def __add__(self, other):
output = Duration(0)
output.milli = self.milli + other.milli
output.month = self.month + other.month
return output
def __mul__(self, amount):
output = Duration(0)
output.milli = self.milli * amount
output.month = self.month * amount
return output
def __rmul__(self, amount):
output = Duration(0)
output.milli = self.milli * amount
output.month = self.month * amount
return output
def __div__(self, amount):
if isinstance(amount, Duration) and not amount.month:
m = self.month
r = self.milli
# DO NOT CONSIDER TIME OF DAY
tod = r % MILLI_VALUES.day
r = r - tod
if m == 0 and r > (MILLI_VALUES.year / 3):
m = Math.floor(12 * self.milli / MILLI_VALUES.year)
r -= (m / 12) * MILLI_VALUES.year
else:
r = r - (self.month * MILLI_VALUES.month)
if r >= MILLI_VALUES.day * 31:
Log.error("Do not know how to handle")
r = MIN(29 / 30, (r + tod) / (MILLI_VALUES.day * 30))
output = Math.floor(m / amount.month) + r
return output
elif Math.is_number(amount):
output = Duration(0)
output.milli = self.milli / amount
output.month = self.month / amount
return output
else:
return self.milli / amount.milli
def __sub__(self, duration):
output = Duration(0)
output.milli = self.milli - duration.milli
output.month = self.month - duration.month
return output
def floor(self, interval=None):
if not isinstance(interval, Duration):
Log.error("Expecting an interval as a Duration object")
output = Duration(0)
if interval.month:
if self.month:
output.month = Math.floor(self.month / interval.month) * interval.month
output.milli = output.month * MILLI_VALUES.month
return output
# A MONTH OF DURATION IS BIGGER THAN A CANONICAL MONTH
output.month = Math.floor(self.milli * 12 / MILLI_VALUES["year"] / interval.month) * interval.month
output.milli = output.month * MILLI_VALUES.month
else:
output.milli = Math.floor(self.milli / (interval.milli)) * (interval.milli)
return output
def __str__(self):
if not self.milli:
return "zero"
output = ""
rest = (self.milli - (MILLI_VALUES.month * self.month)) # DO NOT INCLUDE THE MONTH'S MILLIS
isNegative = (rest < 0)
rest = Math.abs(rest)
# MILLI
rem = rest % 1000
if rem != 0:
output = "+" + rem + "milli" + output
rest = Math.floor(rest / 1000)
# SECOND
rem = rest % 60
if rem != 0:
output = "+" + rem + "second" + output
rest = Math.floor(rest / 60)
# MINUTE
rem = rest % 60
if rem != 0:
output = "+" + rem + "minute" + output
rest = Math.floor(rest / 60)
# HOUR
rem = rest % 24
if rem != 0:
output = "+" + rem + "hour" + output
rest = Math.floor(rest / 24)
# DAY
if rest < 11 and rest != 7:
rem = rest
rest = 0
else:
rem = rest % 7
rest = Math.floor(rest / 7)
if rem != 0:
output = "+" + rem + "day" + output
# WEEK
if rest != 0:
output = "+" + rest + "week" + output
if isNegative:
output = output.replace("+", "-")
# MONTH AND YEAR
if self.month:
sign = "-" if self.month < 0 else "+"
month = Math.abs(self.month)
if month <= 18 and month != 12:
output = sign + month + "month" + output
else:
m = month % 12
if m != 0:
output = sign + m + "month" + output
y = Math.floor(month / 12)
output = sign + y + "year" + output
if output[0] == "+":
output = output[1::]
if output[0] == '1' and not Math.is_number(output[1]):
output = output[1::]
return output
def format(self, interval, rounding):
return self.round(Duration.newInstance(interval), rounding) + interval
def round(self, interval, rounding=0):
output = self / interval
output = Math.round(output, rounding)
return output
def _string2Duration(text):
"""
CONVERT SIMPLE <float><type> TO A DURATION OBJECT
"""
if text == "" or text == "zero":
return Duration(0)
amount, interval = regex.match(r"([\d\.]*)(.*)", text)
amount = CNV.value2int(amount) if amount else 0
if MILLI_VALUES[interval] == None:
Log.error(interval + " is not a recognized duration type (did you use the pural form by mistake?")
output = Duration(0)
if MONTH_VALUES[interval] == 0:
output.milli = amount * MILLI_VALUES[interval]
else:
output.milli = amount * MONTH_VALUES[interval] * MILLI_VALUES.month
output.month = amount * MONTH_VALUES[interval]
return output
def parse(value):
output = Duration(0)
# EXPECTING CONCAT OF <sign><integer><type>
plist = value.split("+")
for p, pplist in enumerate(plist):
mlist = pplist.split("-")
output = output + _string2Duration(mlist[0])
for m in mlist[1::]:
output = output.subtract(_string2Duration(m))
return output
MILLI_VALUES = wrap({
"year": 52 * 7 * 24 * 60 * 60 * 1000, # 52weeks
"quarter": 13 * 7 * 24 * 60 * 60 * 1000, # 13weeks
"month": 28 * 24 * 60 * 60 * 1000, # 4weeks
"week": 7 * 24 * 60 * 60 * 1000,
"day": 24 * 60 * 60 * 1000,
"hour": 60 * 60 * 1000,
"minute": 60 * 1000,
"second": 1000,
"milli": 1
})
MONTH_VALUES = wrap({
"year": 12,
"quarter": 3,
"month": 1,
"week": 0,
"day": 0,
"hour": 0,
"minute": 0,
"second": 0,
"milli": 0
})
# A REAL MONTH IS LARGER THAN THE CANONICAL MONTH
MONTH_SKEW = MILLI_VALUES["year"] / 12 - MILLI_VALUES.month
def compare(a, b):
return a.milli - b.milli
DOMAIN = {
"type": "duration",
"compare": compare
}
ZERO = Duration(0)
SECOND = Duration("second")
MINUTE = Duration("minute")
HOUR = Duration("hour")
DAY = Duration("day")
WEEK = Duration("week")
MONTH = Duration("month")
QUARTER = Duration("quarter")
YEAR = Duration("year")
COMMON_INTERVALS = [
Duration("second"),
Duration("15second"),
Duration("30second"),
Duration("minute"),
Duration("5minute"),
Duration("15minute"),
Duration("30minute"),
Duration("hour"),
Duration("2hour"),
Duration("3hour"),
Duration("6hour"),
Duration("12hour"),
Duration("day"),
Duration("2day"),
Duration("week"),
Duration("2week"),
Duration("month"),
Duration("2month"),
Duration("quarter"),
Duration("6month"),
Duration("year")
]

49
tests/util/times/timer.py Normal file
Просмотреть файл

@ -0,0 +1,49 @@
# encoding: utf-8
#
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
#
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from datetime import timedelta
from time import clock
from ..struct import nvl, Struct
from ..structs.wraps import wrap
from ..env.logs import Log
class Timer:
"""
USAGE:
with Timer("doing hard time"):
something_that_takes_long()
OUTPUT:
doing hard time took 45.468 sec
"""
def __init__(self, description, param=None, debug=True):
self.template = description
self.param = nvl(wrap(param), Struct())
self.debug = debug
def __enter__(self):
if self.debug:
Log.note("Timer start: " + self.template, self.param, stack_depth=1)
self.start = clock()
return self
def __exit__(self, type, value, traceback):
if self.debug:
self.end = clock()
self.interval = self.end - self.start
param = wrap(self.param)
param.duration = timedelta(seconds=self.interval)
Log.note("Timer end : " + self.template + " (took {{duration}})", self.param, stack_depth=1)
@property
def duration(self):
return self.interval

1
tests/util/vendor/__init__.py поставляемый Normal file
Просмотреть файл

@ -0,0 +1 @@
__author__ = 'klahnakoski'

9
tests/util/vendor/aespython/README.md поставляемый Normal file
Просмотреть файл

@ -0,0 +1,9 @@
About
-----
**DO NOT USE THIS LIBRARY IF REAL ENCRYPTION IS REQUIRED**, THIS VENDOR LIBRARY HAS NOT BEEN VETTED FOR CORRECTNESS.
* Snagged from [https://github.com/caller9/pythonaes](https://github.com/caller9/pythonaes)
* Licensed under the MIT license [http://www.opensource.org/licenses/mit-license.php](http://www.opensource.org/licenses/mit-license.php)

0
tests/util/vendor/aespython/__init__.py поставляемый Normal file
Просмотреть файл

151
tests/util/vendor/aespython/aes_cipher.py поставляемый Normal file
Просмотреть файл

@ -0,0 +1,151 @@
#!/usr/bin/env python
"""
AES Block Cipher.
Performs single block cipher decipher operations on a 16 element list of integers.
These integers represent 8 bit bytes in a 128 bit block.
The result of cipher or decipher operations is the transformed 16 element list of integers.
Running this file as __main__ will result in a self-test of the algorithm.
Algorithm per NIST FIPS-197 http://csrc.nist.gov/publications/fips/fips197/fips-197.pdf
Thanks to serprex for many optimizations in this code. For even more, see his github fork of this project.
Copyright (c) 2010, Adam Newman http://www.caller9.com/
Demur Rumed https://github.com/serprex
Licensed under the MIT license http://www.opensource.org/licenses/mit-license.php
"""
__author__ = "Adam Newman"
#Normally use relative import. In test mode use local import.
try:
from . import aes_tables
except ValueError:
import aes_tables
class AESCipher:
"""Perform single block AES cipher/decipher"""
def __init__ (self, expanded_key):
#Store epanded key
self._expanded_key = expanded_key
#Number of rounds determined by expanded key length
self._Nr = int(len(expanded_key) / 16) - 1
def _sub_bytes (self, state):
#Run state through sbox
for i,s in enumerate(state):state[i]=aes_tables.sbox[s]
def _i_sub_bytes (self, state):
#Run state through inverted sbox
for i,s in enumerate(state):state[i]=aes_tables.i_sbox[s]
def _shift_row (self, row, shift):
#Circular shift row left by shift amount
row+=row[:shift]
del row[:shift]
return row
def _i_shift_row (self, row, shift):
#Circular shift row left by shift amount
row+=row[:shift]
del row[:4+shift]
return row
def _shift_rows (self, state):
#Extract rows as every 4th item starting at [1..3]
#Replace row with shift_row operation
for i in 1,2,3:
state[i::4] = self._shift_row(state[i::4],i)
def _i_shift_rows (self, state):
#Extract rows as every 4th item starting at [1..3]
#Replace row with inverse shift_row operation
for i in 1,2,3:
state[i::4] = self._i_shift_row(state[i::4],-i)
def _mix_column (self, column, inverse):
#Use galois lookup tables instead of performing complicated operations
#If inverse, use matrix with inverse values
g0,g1,g2,g3=aes_tables.galI if inverse else aes_tables.galNI
c0,c1,c2,c3=column
return (
g0[c0]^g1[c1]^g2[c2]^g3[c3],
g3[c0]^g0[c1]^g1[c2]^g2[c3],
g2[c0]^g3[c1]^g0[c2]^g1[c3],
g1[c0]^g2[c1]^g3[c2]^g0[c3])
def _mix_columns (self, state, inverse):
#Perform mix_column for each column in the state
for i,j in (0,4),(4,8),(8,12),(12,16):
state[i:j] = self._mix_column(state[i:j], inverse)
def _add_round_key (self, state, round):
#XOR the state with the current round key
for k,(i,j) in enumerate(zip(state, self._expanded_key[round*16:(round+1)*16])):state[k]=i^j
def cipher_block (self, state):
"""Perform AES block cipher on input"""
#PKCS7 Padding
state=state+[16-len(state)]*(16-len(state))#Fails test if it changes the input with +=
self._add_round_key(state, 0)
for i in range(1, self._Nr):
self._sub_bytes(state)
self._shift_rows(state)
self._mix_columns(state, False)
self._add_round_key(state, i)
self._sub_bytes(state)
self._shift_rows(state)
self._add_round_key(state, self._Nr)
return state
def decipher_block (self, state):
"""Perform AES block decipher on input"""
#null padding. Padding actually should not be needed here with valid input.
state=state+[0]*(16-len(state))
self._add_round_key(state, self._Nr)
for i in range(self._Nr - 1, 0, -1):
self._i_shift_rows(state)
self._i_sub_bytes(state)
self._add_round_key(state, i)
self._mix_columns(state, True)
self._i_shift_rows(state)
self._i_sub_bytes(state)
self._add_round_key(state, 0)
return state
import unittest
class TestCipher(unittest.TestCase):
def test_cipher(self):
"""Test AES cipher with all key lengths"""
try:
from . import test_keys, key_expander
except:
import test_keys, key_expander
test_data = test_keys.TestKeys()
for key_size in 128, 192, 256:
test_key_expander = key_expander.KeyExpander(key_size)
test_expanded_key = test_key_expander.expand(test_data.test_key[key_size])
test_cipher = AESCipher(test_expanded_key)
test_result_ciphertext = test_cipher.cipher_block(test_data.test_block_plaintext)
self.assertEquals(len([i for i, j in zip(test_result_ciphertext, test_data.test_block_ciphertext_validated[key_size]) if i == j]),
16,
msg='Test %d bit cipher'%key_size)
test_result_plaintext = test_cipher.decipher_block(test_data.test_block_ciphertext_validated[key_size])
self.assertEquals(len([i for i, j in zip(test_result_plaintext, test_data.test_block_plaintext) if i == j]),
16,
msg='Test %d bit decipher'%key_size)
if __name__ == "__main__":
unittest.main()

167
tests/util/vendor/aespython/aes_tables.py поставляемый Normal file
Просмотреть файл

@ -0,0 +1,167 @@
"""
Instantiate AES tables for rcon,sbox,i_sbox,and galois_lookup.
Copyright (c) 2010,Adam Newman http://www.caller9.com/
Licensed under the MIT license http://www.opensource.org/licenses/mit-license.php
"""
__author__ = "Adam Newman"
rcon=(
0x8d,0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80,0x1b,0x36,0x6c,0xd8,0xab,0x4d,0x9a,
0x2f,0x5e,0xbc,0x63,0xc6,0x97,0x35,0x6a,0xd4,0xb3,0x7d,0xfa,0xef,0xc5,0x91,0x39,
0x72,0xe4,0xd3,0xbd,0x61,0xc2,0x9f,0x25,0x4a,0x94,0x33,0x66,0xcc,0x83,0x1d,0x3a,
0x74,0xe8,0xcb,0x8d,0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80,0x1b,0x36,0x6c,0xd8,
0xab,0x4d,0x9a,0x2f,0x5e,0xbc,0x63,0xc6,0x97,0x35,0x6a,0xd4,0xb3,0x7d,0xfa,0xef,
0xc5,0x91,0x39,0x72,0xe4,0xd3,0xbd,0x61,0xc2,0x9f,0x25,0x4a,0x94,0x33,0x66,0xcc,
0x83,0x1d,0x3a,0x74,0xe8,0xcb,0x8d,0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80,0x1b,
0x36,0x6c,0xd8,0xab,0x4d,0x9a,0x2f,0x5e,0xbc,0x63,0xc6,0x97,0x35,0x6a,0xd4,0xb3,
0x7d,0xfa,0xef,0xc5,0x91,0x39,0x72,0xe4,0xd3,0xbd,0x61,0xc2,0x9f,0x25,0x4a,0x94,
0x33,0x66,0xcc,0x83,0x1d,0x3a,0x74,0xe8,0xcb,0x8d,0x01,0x02,0x04,0x08,0x10,0x20,
0x40,0x80,0x1b,0x36,0x6c,0xd8,0xab,0x4d,0x9a,0x2f,0x5e,0xbc,0x63,0xc6,0x97,0x35,
0x6a,0xd4,0xb3,0x7d,0xfa,0xef,0xc5,0x91,0x39,0x72,0xe4,0xd3,0xbd,0x61,0xc2,0x9f,
0x25,0x4a,0x94,0x33,0x66,0xcc,0x83,0x1d,0x3a,0x74,0xe8,0xcb,0x8d,0x01,0x02,0x04,
0x08,0x10,0x20,0x40,0x80,0x1b,0x36,0x6c,0xd8,0xab,0x4d,0x9a,0x2f,0x5e,0xbc,0x63,
0xc6,0x97,0x35,0x6a,0xd4,0xb3,0x7d,0xfa,0xef,0xc5,0x91,0x39,0x72,0xe4,0xd3,0xbd,
0x61,0xc2,0x9f,0x25,0x4a,0x94,0x33,0x66,0xcc,0x83,0x1d,0x3a,0x74,0xe8,0xcb)
sbox=(
0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5,0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76,
0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0,0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0,
0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc,0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15,
0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a,0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75,
0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0,0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84,
0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b,0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf,
0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85,0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8,
0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5,0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2,
0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17,0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73,
0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88,0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb,
0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c,0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79,
0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9,0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08,
0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6,0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a,
0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e,0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e,
0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94,0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf,
0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68,0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16)
i_sbox=(
0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38,0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb,
0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87,0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb,
0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d,0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e,
0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2,0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25,
0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16,0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92,
0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda,0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84,
0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a,0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06,
0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02,0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b,
0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea,0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73,
0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85,0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e,
0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89,0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b,
0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20,0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4,
0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31,0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f,
0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d,0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef,
0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0,0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61,
0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26,0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d)
gal1=tuple(range(256))
gal2=(
0x00,0x02,0x04,0x06,0x08,0x0a,0x0c,0x0e,0x10,0x12,0x14,0x16,0x18,0x1a,0x1c,0x1e,
0x20,0x22,0x24,0x26,0x28,0x2a,0x2c,0x2e,0x30,0x32,0x34,0x36,0x38,0x3a,0x3c,0x3e,
0x40,0x42,0x44,0x46,0x48,0x4a,0x4c,0x4e,0x50,0x52,0x54,0x56,0x58,0x5a,0x5c,0x5e,
0x60,0x62,0x64,0x66,0x68,0x6a,0x6c,0x6e,0x70,0x72,0x74,0x76,0x78,0x7a,0x7c,0x7e,
0x80,0x82,0x84,0x86,0x88,0x8a,0x8c,0x8e,0x90,0x92,0x94,0x96,0x98,0x9a,0x9c,0x9e,
0xa0,0xa2,0xa4,0xa6,0xa8,0xaa,0xac,0xae,0xb0,0xb2,0xb4,0xb6,0xb8,0xba,0xbc,0xbe,
0xc0,0xc2,0xc4,0xc6,0xc8,0xca,0xcc,0xce,0xd0,0xd2,0xd4,0xd6,0xd8,0xda,0xdc,0xde,
0xe0,0xe2,0xe4,0xe6,0xe8,0xea,0xec,0xee,0xf0,0xf2,0xf4,0xf6,0xf8,0xfa,0xfc,0xfe,
0x1b,0x19,0x1f,0x1d,0x13,0x11,0x17,0x15,0x0b,0x09,0x0f,0x0d,0x03,0x01,0x07,0x05,
0x3b,0x39,0x3f,0x3d,0x33,0x31,0x37,0x35,0x2b,0x29,0x2f,0x2d,0x23,0x21,0x27,0x25,
0x5b,0x59,0x5f,0x5d,0x53,0x51,0x57,0x55,0x4b,0x49,0x4f,0x4d,0x43,0x41,0x47,0x45,
0x7b,0x79,0x7f,0x7d,0x73,0x71,0x77,0x75,0x6b,0x69,0x6f,0x6d,0x63,0x61,0x67,0x65,
0x9b,0x99,0x9f,0x9d,0x93,0x91,0x97,0x95,0x8b,0x89,0x8f,0x8d,0x83,0x81,0x87,0x85,
0xbb,0xb9,0xbf,0xbd,0xb3,0xb1,0xb7,0xb5,0xab,0xa9,0xaf,0xad,0xa3,0xa1,0xa7,0xa5,
0xdb,0xd9,0xdf,0xdd,0xd3,0xd1,0xd7,0xd5,0xcb,0xc9,0xcf,0xcd,0xc3,0xc1,0xc7,0xc5,
0xfb,0xf9,0xff,0xfd,0xf3,0xf1,0xf7,0xf5,0xeb,0xe9,0xef,0xed,0xe3,0xe1,0xe7,0xe5)
gal3=(
0x00,0x03,0x06,0x05,0x0c,0x0f,0x0a,0x09,0x18,0x1b,0x1e,0x1d,0x14,0x17,0x12,0x11,
0x30,0x33,0x36,0x35,0x3c,0x3f,0x3a,0x39,0x28,0x2b,0x2e,0x2d,0x24,0x27,0x22,0x21,
0x60,0x63,0x66,0x65,0x6c,0x6f,0x6a,0x69,0x78,0x7b,0x7e,0x7d,0x74,0x77,0x72,0x71,
0x50,0x53,0x56,0x55,0x5c,0x5f,0x5a,0x59,0x48,0x4b,0x4e,0x4d,0x44,0x47,0x42,0x41,
0xc0,0xc3,0xc6,0xc5,0xcc,0xcf,0xca,0xc9,0xd8,0xdb,0xde,0xdd,0xd4,0xd7,0xd2,0xd1,
0xf0,0xf3,0xf6,0xf5,0xfc,0xff,0xfa,0xf9,0xe8,0xeb,0xee,0xed,0xe4,0xe7,0xe2,0xe1,
0xa0,0xa3,0xa6,0xa5,0xac,0xaf,0xaa,0xa9,0xb8,0xbb,0xbe,0xbd,0xb4,0xb7,0xb2,0xb1,
0x90,0x93,0x96,0x95,0x9c,0x9f,0x9a,0x99,0x88,0x8b,0x8e,0x8d,0x84,0x87,0x82,0x81,
0x9b,0x98,0x9d,0x9e,0x97,0x94,0x91,0x92,0x83,0x80,0x85,0x86,0x8f,0x8c,0x89,0x8a,
0xab,0xa8,0xad,0xae,0xa7,0xa4,0xa1,0xa2,0xb3,0xb0,0xb5,0xb6,0xbf,0xbc,0xb9,0xba,
0xfb,0xf8,0xfd,0xfe,0xf7,0xf4,0xf1,0xf2,0xe3,0xe0,0xe5,0xe6,0xef,0xec,0xe9,0xea,
0xcb,0xc8,0xcd,0xce,0xc7,0xc4,0xc1,0xc2,0xd3,0xd0,0xd5,0xd6,0xdf,0xdc,0xd9,0xda,
0x5b,0x58,0x5d,0x5e,0x57,0x54,0x51,0x52,0x43,0x40,0x45,0x46,0x4f,0x4c,0x49,0x4a,
0x6b,0x68,0x6d,0x6e,0x67,0x64,0x61,0x62,0x73,0x70,0x75,0x76,0x7f,0x7c,0x79,0x7a,
0x3b,0x38,0x3d,0x3e,0x37,0x34,0x31,0x32,0x23,0x20,0x25,0x26,0x2f,0x2c,0x29,0x2a,
0x0b,0x08,0x0d,0x0e,0x07,0x04,0x01,0x02,0x13,0x10,0x15,0x16,0x1f,0x1c,0x19,0x1a)
gal9=(
0x00,0x09,0x12,0x1b,0x24,0x2d,0x36,0x3f,0x48,0x41,0x5a,0x53,0x6c,0x65,0x7e,0x77,
0x90,0x99,0x82,0x8b,0xb4,0xbd,0xa6,0xaf,0xd8,0xd1,0xca,0xc3,0xfc,0xf5,0xee,0xe7,
0x3b,0x32,0x29,0x20,0x1f,0x16,0x0d,0x04,0x73,0x7a,0x61,0x68,0x57,0x5e,0x45,0x4c,
0xab,0xa2,0xb9,0xb0,0x8f,0x86,0x9d,0x94,0xe3,0xea,0xf1,0xf8,0xc7,0xce,0xd5,0xdc,
0x76,0x7f,0x64,0x6d,0x52,0x5b,0x40,0x49,0x3e,0x37,0x2c,0x25,0x1a,0x13,0x08,0x01,
0xe6,0xef,0xf4,0xfd,0xc2,0xcb,0xd0,0xd9,0xae,0xa7,0xbc,0xb5,0x8a,0x83,0x98,0x91,
0x4d,0x44,0x5f,0x56,0x69,0x60,0x7b,0x72,0x05,0x0c,0x17,0x1e,0x21,0x28,0x33,0x3a,
0xdd,0xd4,0xcf,0xc6,0xf9,0xf0,0xeb,0xe2,0x95,0x9c,0x87,0x8e,0xb1,0xb8,0xa3,0xaa,
0xec,0xe5,0xfe,0xf7,0xc8,0xc1,0xda,0xd3,0xa4,0xad,0xb6,0xbf,0x80,0x89,0x92,0x9b,
0x7c,0x75,0x6e,0x67,0x58,0x51,0x4a,0x43,0x34,0x3d,0x26,0x2f,0x10,0x19,0x02,0x0b,
0xd7,0xde,0xc5,0xcc,0xf3,0xfa,0xe1,0xe8,0x9f,0x96,0x8d,0x84,0xbb,0xb2,0xa9,0xa0,
0x47,0x4e,0x55,0x5c,0x63,0x6a,0x71,0x78,0x0f,0x06,0x1d,0x14,0x2b,0x22,0x39,0x30,
0x9a,0x93,0x88,0x81,0xbe,0xb7,0xac,0xa5,0xd2,0xdb,0xc0,0xc9,0xf6,0xff,0xe4,0xed,
0x0a,0x03,0x18,0x11,0x2e,0x27,0x3c,0x35,0x42,0x4b,0x50,0x59,0x66,0x6f,0x74,0x7d,
0xa1,0xa8,0xb3,0xba,0x85,0x8c,0x97,0x9e,0xe9,0xe0,0xfb,0xf2,0xcd,0xc4,0xdf,0xd6,
0x31,0x38,0x23,0x2a,0x15,0x1c,0x07,0x0e,0x79,0x70,0x6b,0x62,0x5d,0x54,0x4f,0x46)
gal11=(
0x00,0x0b,0x16,0x1d,0x2c,0x27,0x3a,0x31,0x58,0x53,0x4e,0x45,0x74,0x7f,0x62,0x69,
0xb0,0xbb,0xa6,0xad,0x9c,0x97,0x8a,0x81,0xe8,0xe3,0xfe,0xf5,0xc4,0xcf,0xd2,0xd9,
0x7b,0x70,0x6d,0x66,0x57,0x5c,0x41,0x4a,0x23,0x28,0x35,0x3e,0x0f,0x04,0x19,0x12,
0xcb,0xc0,0xdd,0xd6,0xe7,0xec,0xf1,0xfa,0x93,0x98,0x85,0x8e,0xbf,0xb4,0xa9,0xa2,
0xf6,0xfd,0xe0,0xeb,0xda,0xd1,0xcc,0xc7,0xae,0xa5,0xb8,0xb3,0x82,0x89,0x94,0x9f,
0x46,0x4d,0x50,0x5b,0x6a,0x61,0x7c,0x77,0x1e,0x15,0x08,0x03,0x32,0x39,0x24,0x2f,
0x8d,0x86,0x9b,0x90,0xa1,0xaa,0xb7,0xbc,0xd5,0xde,0xc3,0xc8,0xf9,0xf2,0xef,0xe4,
0x3d,0x36,0x2b,0x20,0x11,0x1a,0x07,0x0c,0x65,0x6e,0x73,0x78,0x49,0x42,0x5f,0x54,
0xf7,0xfc,0xe1,0xea,0xdb,0xd0,0xcd,0xc6,0xaf,0xa4,0xb9,0xb2,0x83,0x88,0x95,0x9e,
0x47,0x4c,0x51,0x5a,0x6b,0x60,0x7d,0x76,0x1f,0x14,0x09,0x02,0x33,0x38,0x25,0x2e,
0x8c,0x87,0x9a,0x91,0xa0,0xab,0xb6,0xbd,0xd4,0xdf,0xc2,0xc9,0xf8,0xf3,0xee,0xe5,
0x3c,0x37,0x2a,0x21,0x10,0x1b,0x06,0x0d,0x64,0x6f,0x72,0x79,0x48,0x43,0x5e,0x55,
0x01,0x0a,0x17,0x1c,0x2d,0x26,0x3b,0x30,0x59,0x52,0x4f,0x44,0x75,0x7e,0x63,0x68,
0xb1,0xba,0xa7,0xac,0x9d,0x96,0x8b,0x80,0xe9,0xe2,0xff,0xf4,0xc5,0xce,0xd3,0xd8,
0x7a,0x71,0x6c,0x67,0x56,0x5d,0x40,0x4b,0x22,0x29,0x34,0x3f,0x0e,0x05,0x18,0x13,
0xca,0xc1,0xdc,0xd7,0xe6,0xed,0xf0,0xfb,0x92,0x99,0x84,0x8f,0xbe,0xb5,0xa8,0xa3)
gal13=(
0x00,0x0d,0x1a,0x17,0x34,0x39,0x2e,0x23,0x68,0x65,0x72,0x7f,0x5c,0x51,0x46,0x4b,
0xd0,0xdd,0xca,0xc7,0xe4,0xe9,0xfe,0xf3,0xb8,0xb5,0xa2,0xaf,0x8c,0x81,0x96,0x9b,
0xbb,0xb6,0xa1,0xac,0x8f,0x82,0x95,0x98,0xd3,0xde,0xc9,0xc4,0xe7,0xea,0xfd,0xf0,
0x6b,0x66,0x71,0x7c,0x5f,0x52,0x45,0x48,0x03,0x0e,0x19,0x14,0x37,0x3a,0x2d,0x20,
0x6d,0x60,0x77,0x7a,0x59,0x54,0x43,0x4e,0x05,0x08,0x1f,0x12,0x31,0x3c,0x2b,0x26,
0xbd,0xb0,0xa7,0xaa,0x89,0x84,0x93,0x9e,0xd5,0xd8,0xcf,0xc2,0xe1,0xec,0xfb,0xf6,
0xd6,0xdb,0xcc,0xc1,0xe2,0xef,0xf8,0xf5,0xbe,0xb3,0xa4,0xa9,0x8a,0x87,0x90,0x9d,
0x06,0x0b,0x1c,0x11,0x32,0x3f,0x28,0x25,0x6e,0x63,0x74,0x79,0x5a,0x57,0x40,0x4d,
0xda,0xd7,0xc0,0xcd,0xee,0xe3,0xf4,0xf9,0xb2,0xbf,0xa8,0xa5,0x86,0x8b,0x9c,0x91,
0x0a,0x07,0x10,0x1d,0x3e,0x33,0x24,0x29,0x62,0x6f,0x78,0x75,0x56,0x5b,0x4c,0x41,
0x61,0x6c,0x7b,0x76,0x55,0x58,0x4f,0x42,0x09,0x04,0x13,0x1e,0x3d,0x30,0x27,0x2a,
0xb1,0xbc,0xab,0xa6,0x85,0x88,0x9f,0x92,0xd9,0xd4,0xc3,0xce,0xed,0xe0,0xf7,0xfa,
0xb7,0xba,0xad,0xa0,0x83,0x8e,0x99,0x94,0xdf,0xd2,0xc5,0xc8,0xeb,0xe6,0xf1,0xfc,
0x67,0x6a,0x7d,0x70,0x53,0x5e,0x49,0x44,0x0f,0x02,0x15,0x18,0x3b,0x36,0x21,0x2c,
0x0c,0x01,0x16,0x1b,0x38,0x35,0x22,0x2f,0x64,0x69,0x7e,0x73,0x50,0x5d,0x4a,0x47,
0xdc,0xd1,0xc6,0xcb,0xe8,0xe5,0xf2,0xff,0xb4,0xb9,0xae,0xa3,0x80,0x8d,0x9a,0x97)
gal14=(
0x00,0x0e,0x1c,0x12,0x38,0x36,0x24,0x2a,0x70,0x7e,0x6c,0x62,0x48,0x46,0x54,0x5a,
0xe0,0xee,0xfc,0xf2,0xd8,0xd6,0xc4,0xca,0x90,0x9e,0x8c,0x82,0xa8,0xa6,0xb4,0xba,
0xdb,0xd5,0xc7,0xc9,0xe3,0xed,0xff,0xf1,0xab,0xa5,0xb7,0xb9,0x93,0x9d,0x8f,0x81,
0x3b,0x35,0x27,0x29,0x03,0x0d,0x1f,0x11,0x4b,0x45,0x57,0x59,0x73,0x7d,0x6f,0x61,
0xad,0xa3,0xb1,0xbf,0x95,0x9b,0x89,0x87,0xdd,0xd3,0xc1,0xcf,0xe5,0xeb,0xf9,0xf7,
0x4d,0x43,0x51,0x5f,0x75,0x7b,0x69,0x67,0x3d,0x33,0x21,0x2f,0x05,0x0b,0x19,0x17,
0x76,0x78,0x6a,0x64,0x4e,0x40,0x52,0x5c,0x06,0x08,0x1a,0x14,0x3e,0x30,0x22,0x2c,
0x96,0x98,0x8a,0x84,0xae,0xa0,0xb2,0xbc,0xe6,0xe8,0xfa,0xf4,0xde,0xd0,0xc2,0xcc,
0x41,0x4f,0x5d,0x53,0x79,0x77,0x65,0x6b,0x31,0x3f,0x2d,0x23,0x09,0x07,0x15,0x1b,
0xa1,0xaf,0xbd,0xb3,0x99,0x97,0x85,0x8b,0xd1,0xdf,0xcd,0xc3,0xe9,0xe7,0xf5,0xfb,
0x9a,0x94,0x86,0x88,0xa2,0xac,0xbe,0xb0,0xea,0xe4,0xf6,0xf8,0xd2,0xdc,0xce,0xc0,
0x7a,0x74,0x66,0x68,0x42,0x4c,0x5e,0x50,0x0a,0x04,0x16,0x18,0x32,0x3c,0x2e,0x20,
0xec,0xe2,0xf0,0xfe,0xd4,0xda,0xc8,0xc6,0x9c,0x92,0x80,0x8e,0xa4,0xaa,0xb8,0xb6,
0x0c,0x02,0x10,0x1e,0x34,0x3a,0x28,0x26,0x7c,0x72,0x60,0x6e,0x44,0x4a,0x58,0x56,
0x37,0x39,0x2b,0x25,0x0f,0x01,0x13,0x1d,0x47,0x49,0x5b,0x55,0x7f,0x71,0x63,0x6d,
0xd7,0xd9,0xcb,0xc5,0xef,0xe1,0xf3,0xfd,0xa7,0xa9,0xbb,0xb5,0x9f,0x91,0x83,0x8d)
galI=gal14,gal11,gal13,gal9
galNI=gal2,gal3,gal1,gal1

54
tests/util/vendor/aespython/cbc_mode.py поставляемый Normal file
Просмотреть файл

@ -0,0 +1,54 @@
#!/usr/bin/env python
"""
CBC Mode of operation
Running this file as __main__ will result in a self-test of the algorithm.
Algorithm per NIST SP 800-38A http://csrc.nist.gov/publications/nistpubs/800-38a/sp800-38a.pdf
Copyright (c) 2010, Adam Newman http://www.caller9.com/
Licensed under the MIT license http://www.opensource.org/licenses/mit-license.php
"""
__author__ = "Adam Newman"
from cipher_mode import CipherMode
from mode_test import GeneralTestEncryptionMode
class CBCMode(CipherMode):
"""Perform CBC operation on a block and retain IV information for next operation"""
name = "CBC"
def __init__(self, block_cipher, block_size):
CipherMode.__init__(self, block_cipher, block_size)
def encrypt_block(self, plaintext):
xor = [i ^ j for i, j in zip(plaintext, self._iv)] + list(self._iv[len(plaintext)::])
ciphertext = bytearray(self._block_cipher.cipher_block(xor))
self._iv = ciphertext
return ciphertext
def decrypt_block(self, ciphertext):
result_decipher = self._block_cipher.decipher_block(list(ciphertext))
plaintext = bytearray(i ^ j for i, j in zip(self._iv, result_decipher))
self._iv = ciphertext
return plaintext
class TestEncryptionMode(GeneralTestEncryptionMode):
def test_mode(self):
"""Test CBC Mode Encrypt/Decrypt"""
try:
from aespython.test_keys import TestKeys
except:
from test_keys import TestKeys
test_data = TestKeys()
test_mode = CBCMode(self.get_keyed_cipher(test_data.test_mode_key), 16)
self.run_cipher(test_mode, test_data.test_mode_iv, test_data.test_cbc_ciphertext, test_data.test_mode_plaintext)
if __name__ == "__main__":
import unittest
unittest.main()

53
tests/util/vendor/aespython/cfb_mode.py поставляемый Normal file
Просмотреть файл

@ -0,0 +1,53 @@
#!/usr/bin/env python
"""
CFB Mode of operation
Running this file as __main__ will result in a self-test of the algorithm.
Algorithm per NIST SP 800-38A http://csrc.nist.gov/publications/nistpubs/800-38a/sp800-38a.pdf
Copyright (c) 2010, Adam Newman http://www.caller9.com/
Licensed under the MIT license http://www.opensource.org/licenses/mit-license.php
"""
__author__ = "Adam Newman"
from .cipher_mode import CipherMode
from .mode_test import GeneralTestEncryptionMode
class CFBMode(CipherMode):
"""Perform CFB operation on a block and retain IV information for next operation"""
name = "CFB"
def __init__(self, block_cipher, block_size):
CipherMode.__init__(self, block_cipher, block_size)
def encrypt_block(self, plaintext):
cipher_iv = self._block_cipher.cipher_block(self._iv)
ciphertext = [i ^ j for i,j in zip (plaintext, cipher_iv)]
self._iv = ciphertext
return ciphertext
def decrypt_block(self, ciphertext):
cipher_iv = self._block_cipher.cipher_block(self._iv)
plaintext = [i ^ j for i,j in zip (cipher_iv, ciphertext)]
self._iv = ciphertext
return plaintext
class TestEncryptionMode(GeneralTestEncryptionMode):
def test_mode(self):
"""Test CBC Mode Encrypt/Decrypt"""
try:
from .test_keys import TestKeys
except:
from test_keys import TestKeys
test_data = TestKeys()
test_mode = CFBMode(self.get_keyed_cipher(test_data.test_mode_key), 16)
self.run_cipher(test_mode, test_data.test_mode_iv, test_data.test_cfb_ciphertext, test_data.test_mode_plaintext)
if __name__ == "__main__":
import unittest
unittest.main()

37
tests/util/vendor/aespython/cipher_mode.py поставляемый Normal file
Просмотреть файл

@ -0,0 +1,37 @@
#!/usr/bin/env python
"""
Cipher Mode of operation
Running this file as __main__ will result in a self-test of the algorithm.
Algorithm per NIST SP 800-38A http://csrc.nist.gov/publications/nistpubs/800-38a/sp800-38a.pdf
Copyright (c) 2010, Adam Newman http://www.caller9.com/
Licensed under the MIT license http://www.opensource.org/licenses/mit-license.php
"""
from ...env.logs import Log
__author__ = "Adam Newman"
class CipherMode:
"""Perform Cipher operation on a block and retain IV information for next operation"""
name = "ABSTRACT"
def __init__(self, block_cipher, block_size):
self._block_cipher = block_cipher
self._block_size = block_size
self._iv = [0] * block_size
def set_iv(self, iv):
if len(iv) != self._block_size:
Log.error("iv is wrong size")
self._iv = iv
def encrypt_block(self, plaintext):
raise(NotImplementedError, "Abstract function")
def decrypt_block(self, ciphertext):
raise(NotImplementedError, "Abstract function")

144
tests/util/vendor/aespython/key_expander.py поставляемый Normal file
Просмотреть файл

@ -0,0 +1,144 @@
#!/usr/bin/env python
"""
AES Key Expansion.
Expands 128, 192, or 256 bit key for use with AES
Running this file as __main__ will result in a self-test of the algorithm.
Algorithm per NIST FIPS-197 http://csrc.nist.gov/publications/fips/fips197/fips-197.pdf
Copyright (c) 2010, Adam Newman http://www.caller9.com/
Licensed under the MIT license http://www.opensource.org/licenses/mit-license.php
"""
__author__ = "Adam Newman"
#Normally use relative import. In test mode use local import.
try:
from . import aes_tables
except ValueError:
import aes_tables
class KeyExpander:
"""Perform AES Key Expansion"""
_expanded_key_length = {128 : 176, 192 : 208, 256 : 240}
def __init__(self, key_length):
self._key_length = key_length
self._n = int(key_length / 8)
if key_length in self._expanded_key_length:
self._b = self._expanded_key_length[key_length]
else:
raise LookupError('Invalid Key Size')
def _core(self, key_array, iteration):
if len(key_array) != 4:
raise RuntimeError('_core(): key segment size invalid')
#Append the list of elements 1-3 and list comprised of element 0 (circular rotate left)
#For each element of this new list, put the result of sbox into output array.
#I was torn on readability vs pythonicity. This also may be faster.
output = [aes_tables.sbox[i] for i in key_array[1:] + key_array[:1]]
#First byte of output array is XORed with rcon(iteration)
output[0] = output[0] ^ aes_tables.rcon[iteration]
return output
def _xor_list(self, list_1, list_2):
return [ i ^ j for i,j in zip(list_1, list_2)]
def expand(self, key_array):
"""
Expand the encryption key per AES key schedule specifications
http://en.wikipedia.org/wiki/Rijndael_key_schedule#Key_schedule_description
"""
if len(key_array) != self._n:
raise RuntimeError('expand(): key size ' + str(len(key_array)) + ' is invalid')
#First n bytes are copied from key. Copy prevents inplace modification of original key
new_key = list(key_array)
rcon_iteration = 1
len_new_key = len(new_key)
#There are several parts of the code below that could be done with tidy list comprehensions like
#the one I put in _core, but I left this alone for readability.
#Grow the key until it is the correct length
while len_new_key < self._b:
#Copy last 4 bytes of extended key, apply _core function order i, increment i(rcon_iteration),
#xor with 4 bytes n bytes from end of extended key
t = new_key[-4:]
t = self._core(t, rcon_iteration)
rcon_iteration += 1
t = self._xor_list(t, new_key[-self._n : -self._n + 4])# self._n_bytes_before(len_new_key, new_key))
new_key.extend(t)
len_new_key += 4
#Run three passes of 4 byte expansion using copy of 4 byte tail of extended key
#which is then xor'd with 4 bytes n bytes from end of extended key
for j in range(3):
t = new_key[-4:]
t = self._xor_list(t, new_key[-self._n : -self._n + 4])
new_key.extend(t)
len_new_key += 4
#If key length is 256 and key is not complete, add 4 bytes tail of extended key
#run through sbox before xor with 4 bytes n bytes from end of extended key
if self._key_length == 256 and len_new_key < self._b:
t = new_key[-4:]
t2=[]
for x in t:
t2.append(aes_tables.sbox[x])
t = self._xor_list(t2, new_key[-self._n : -self._n + 4])
new_key.extend(t)
len_new_key += 4
#If key length is 192 or 256 and key is not complete, run 2 or 3 passes respectively
#of 4 byte tail of extended key xor with 4 bytes n bytes from end of extended key
if self._key_length != 128 and len_new_key < self._b:
if self._key_length == 192:
r = range(2)
else:
r = range(3)
for j in r:
t = new_key[-4:]
t = self._xor_list(t, new_key[-self._n : -self._n + 4])
new_key.extend(t)
len_new_key += 4
return new_key
import unittest
class TestKeyExpander(unittest.TestCase):
def test_keys(self):
"""Test All Key Expansions"""
try:
from . import test_keys
except:
import test_keys
test_data = test_keys.TestKeys()
for key_size in [128, 192, 256]:
test_expander = KeyExpander(key_size)
test_expanded_key = test_expander.expand(test_data.test_key[key_size])
self.assertEqual (len([i for i, j in zip(test_expanded_key, test_data.test_expanded_key_validated[key_size]) if i == j]),
len(test_data.test_expanded_key_validated[key_size]),
msg='Key expansion ' + str(key_size) + ' bit')
if __name__ == "__main__":
unittest.main()

37
tests/util/vendor/aespython/mode_test.py поставляемый Normal file
Просмотреть файл

@ -0,0 +1,37 @@
"""
Cipher Mode of operation
Abstract encryption mode test harness.
"""
from .key_expander import KeyExpander
from .aes_cipher import AESCipher
import unittest
class GeneralTestEncryptionMode(unittest.TestCase):
def get_keyed_cipher(self, key):
test_expander = KeyExpander(256)
test_expanded_key = test_expander.expand(key)
return AESCipher(test_expanded_key)
def run_cipher(self, cipher_mode, iv, ciphertext_list, plaintext_list):
"""Given an cipher mode, test key, and test iv, use known ciphertext, plaintext to test algorithm"""
cipher_mode.set_iv(iv)
for k in range(len(ciphertext_list)):
self.assertEquals(len([i for i, j in zip(ciphertext_list[k],cipher_mode.encrypt_block(plaintext_list[k])) if i == j]),
16,
msg=cipher_mode.name + ' encrypt test block' + str(k))
cipher_mode.set_iv(iv)
for k in range(len(plaintext_list)):
self.assertEquals(len([i for i, j in zip(plaintext_list[k],cipher_mode.decrypt_block(ciphertext_list[k])) if i == j]),
16,
msg=cipher_mode.name + ' decrypt test block' + str(k))
def test_mode(self):
"""Abstract Test Harness for Encrypt/Decrypt"""
pass

59
tests/util/vendor/aespython/ofb_mode.py поставляемый Normal file
Просмотреть файл

@ -0,0 +1,59 @@
#!/usr/bin/env python
"""
OFB Mode of operation
Running this file as __main__ will result in a self-test of the algorithm.
Algorithm per NIST SP 800-38A http://csrc.nist.gov/publications/nistpubs/800-38a/sp800-38a.pdf
Copyright (c) 2010, Adam Newman http://www.caller9.com/
Licensed under the MIT license http://www.opensource.org/licenses/mit-license.php
"""
__author__ = "Adam Newman"
from .cipher_mode import CipherMode
from .mode_test import GeneralTestEncryptionMode
class OFBMode(CipherMode):
"""Perform OFB operation on a block and retain IV information for next operation"""
name = "OFB"
def __init__(self, block_cipher, block_size):
self._block_cipher = block_cipher
self._block_size = block_size
self._iv = [0] * block_size
def set_iv(self, iv):
if len(iv) == self._block_size:
self._iv = iv
def encrypt_block(self, plaintext):
cipher_iv = self._block_cipher.cipher_block(self._iv)
ciphertext = [i ^ j for i,j in zip (plaintext, cipher_iv)]
self._iv = cipher_iv
return ciphertext
def decrypt_block(self, ciphertext):
cipher_iv = self._block_cipher.cipher_block(self._iv)
plaintext = [i ^ j for i,j in zip (cipher_iv, ciphertext)]
self._iv = cipher_iv
return plaintext
class TestEncryptionMode(GeneralTestEncryptionMode):
def test_mode(self):
"""Test OFB Mode Encrypt/Decrypt"""
try:
from aespython.test_keys import TestKeys
except:
from test_keys import TestKeys
test_data = TestKeys()
test_mode = OFBMode(self.get_keyed_cipher(test_data.test_mode_key), 16)
self.run_cipher(test_mode, test_data.test_mode_iv, test_data.test_ofb_ciphertext, test_data.test_mode_plaintext)
if __name__ == "__main__":
import unittest
unittest.main()

119
tests/util/vendor/aespython/test_keys.py поставляемый Normal file
Просмотреть файл

@ -0,0 +1,119 @@
"""
Test keys and data for self-test operations.
Test data from:
NIST SP 800-38A http://csrc.nist.gov/publications/nistpubs/800-38a/sp800-38a.pdf
NIST FIPS-197 http://csrc.nist.gov/publications/fips/fips197/fips-197.pdf
Copyright (c) 2010, Adam Newman http://www.caller9.com/
Licensed under the MIT license http://www.opensource.org/licenses/mit-license.php
"""
__author__ = "Adam Newman"
class TestKeys:
"""Test data, keys, IVs, and output to use in self-tests"""
test_key = {
128 : [
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f]
, 192 : [
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17]
, 256 : [
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f]
}
test_expanded_key_validated = {
128 : [
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0xd6, 0xaa, 0x74, 0xfd, 0xd2, 0xaf, 0x72, 0xfa, 0xda, 0xa6, 0x78, 0xf1, 0xd6, 0xab, 0x76, 0xfe,
0xb6, 0x92, 0xcf, 0x0b, 0x64, 0x3d, 0xbd, 0xf1, 0xbe, 0x9b, 0xc5, 0x00, 0x68, 0x30, 0xb3, 0xfe,
0xb6, 0xff, 0x74, 0x4e, 0xd2, 0xc2, 0xc9, 0xbf, 0x6c, 0x59, 0x0c, 0xbf, 0x04, 0x69, 0xbf, 0x41,
0x47, 0xf7, 0xf7, 0xbc, 0x95, 0x35, 0x3e, 0x03, 0xf9, 0x6c, 0x32, 0xbc, 0xfd, 0x05, 0x8d, 0xfd,
0x3c, 0xaa, 0xa3, 0xe8, 0xa9, 0x9f, 0x9d, 0xeb, 0x50, 0xf3, 0xaf, 0x57, 0xad, 0xf6, 0x22, 0xaa,
0x5e, 0x39, 0x0f, 0x7d, 0xf7, 0xa6, 0x92, 0x96, 0xa7, 0x55, 0x3d, 0xc1, 0x0a, 0xa3, 0x1f, 0x6b,
0x14, 0xf9, 0x70, 0x1a, 0xe3, 0x5f, 0xe2, 0x8c, 0x44, 0x0a, 0xdf, 0x4d, 0x4e, 0xa9, 0xc0, 0x26,
0x47, 0x43, 0x87, 0x35, 0xa4, 0x1c, 0x65, 0xb9, 0xe0, 0x16, 0xba, 0xf4, 0xae, 0xbf, 0x7a, 0xd2,
0x54, 0x99, 0x32, 0xd1, 0xf0, 0x85, 0x57, 0x68, 0x10, 0x93, 0xed, 0x9c, 0xbe, 0x2c, 0x97, 0x4e,
0x13, 0x11, 0x1d, 0x7f, 0xe3, 0x94, 0x4a, 0x17, 0xf3, 0x07, 0xa7, 0x8b, 0x4d, 0x2b, 0x30, 0xc5]
, 192 : [
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x58, 0x46, 0xf2, 0xf9, 0x5c, 0x43, 0xf4, 0xfe,
0x54, 0x4a, 0xfe, 0xf5, 0x58, 0x47, 0xf0, 0xfa, 0x48, 0x56, 0xe2, 0xe9, 0x5c, 0x43, 0xf4, 0xfe,
0x40, 0xf9, 0x49, 0xb3, 0x1c, 0xba, 0xbd, 0x4d, 0x48, 0xf0, 0x43, 0xb8, 0x10, 0xb7, 0xb3, 0x42,
0x58, 0xe1, 0x51, 0xab, 0x04, 0xa2, 0xa5, 0x55, 0x7e, 0xff, 0xb5, 0x41, 0x62, 0x45, 0x08, 0x0c,
0x2a, 0xb5, 0x4b, 0xb4, 0x3a, 0x02, 0xf8, 0xf6, 0x62, 0xe3, 0xa9, 0x5d, 0x66, 0x41, 0x0c, 0x08,
0xf5, 0x01, 0x85, 0x72, 0x97, 0x44, 0x8d, 0x7e, 0xbd, 0xf1, 0xc6, 0xca, 0x87, 0xf3, 0x3e, 0x3c,
0xe5, 0x10, 0x97, 0x61, 0x83, 0x51, 0x9b, 0x69, 0x34, 0x15, 0x7c, 0x9e, 0xa3, 0x51, 0xf1, 0xe0,
0x1e, 0xa0, 0x37, 0x2a, 0x99, 0x53, 0x09, 0x16, 0x7c, 0x43, 0x9e, 0x77, 0xff, 0x12, 0x05, 0x1e,
0xdd, 0x7e, 0x0e, 0x88, 0x7e, 0x2f, 0xff, 0x68, 0x60, 0x8f, 0xc8, 0x42, 0xf9, 0xdc, 0xc1, 0x54,
0x85, 0x9f, 0x5f, 0x23, 0x7a, 0x8d, 0x5a, 0x3d, 0xc0, 0xc0, 0x29, 0x52, 0xbe, 0xef, 0xd6, 0x3a,
0xde, 0x60, 0x1e, 0x78, 0x27, 0xbc, 0xdf, 0x2c, 0xa2, 0x23, 0x80, 0x0f, 0xd8, 0xae, 0xda, 0x32,
0xa4, 0x97, 0x0a, 0x33, 0x1a, 0x78, 0xdc, 0x09, 0xc4, 0x18, 0xc2, 0x71, 0xe3, 0xa4, 0x1d, 0x5d]
, 256 : [
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
0xa5, 0x73, 0xc2, 0x9f, 0xa1, 0x76, 0xc4, 0x98, 0xa9, 0x7f, 0xce, 0x93, 0xa5, 0x72, 0xc0, 0x9c,
0x16, 0x51, 0xa8, 0xcd, 0x02, 0x44, 0xbe, 0xda, 0x1a, 0x5d, 0xa4, 0xc1, 0x06, 0x40, 0xba, 0xde,
0xae, 0x87, 0xdf, 0xf0, 0x0f, 0xf1, 0x1b, 0x68, 0xa6, 0x8e, 0xd5, 0xfb, 0x03, 0xfc, 0x15, 0x67,
0x6d, 0xe1, 0xf1, 0x48, 0x6f, 0xa5, 0x4f, 0x92, 0x75, 0xf8, 0xeb, 0x53, 0x73, 0xb8, 0x51, 0x8d,
0xc6, 0x56, 0x82, 0x7f, 0xc9, 0xa7, 0x99, 0x17, 0x6f, 0x29, 0x4c, 0xec, 0x6c, 0xd5, 0x59, 0x8b,
0x3d, 0xe2, 0x3a, 0x75, 0x52, 0x47, 0x75, 0xe7, 0x27, 0xbf, 0x9e, 0xb4, 0x54, 0x07, 0xcf, 0x39,
0x0b, 0xdc, 0x90, 0x5f, 0xc2, 0x7b, 0x09, 0x48, 0xad, 0x52, 0x45, 0xa4, 0xc1, 0x87, 0x1c, 0x2f,
0x45, 0xf5, 0xa6, 0x60, 0x17, 0xb2, 0xd3, 0x87, 0x30, 0x0d, 0x4d, 0x33, 0x64, 0x0a, 0x82, 0x0a,
0x7c, 0xcf, 0xf7, 0x1c, 0xbe, 0xb4, 0xfe, 0x54, 0x13, 0xe6, 0xbb, 0xf0, 0xd2, 0x61, 0xa7, 0xdf,
0xf0, 0x1a, 0xfa, 0xfe, 0xe7, 0xa8, 0x29, 0x79, 0xd7, 0xa5, 0x64, 0x4a, 0xb3, 0xaf, 0xe6, 0x40,
0x25, 0x41, 0xfe, 0x71, 0x9b, 0xf5, 0x00, 0x25, 0x88, 0x13, 0xbb, 0xd5, 0x5a, 0x72, 0x1c, 0x0a,
0x4e, 0x5a, 0x66, 0x99, 0xa9, 0xf2, 0x4f, 0xe0, 0x7e, 0x57, 0x2b, 0xaa, 0xcd, 0xf8, 0xcd, 0xea,
0x24, 0xfc, 0x79, 0xcc, 0xbf, 0x09, 0x79, 0xe9, 0x37, 0x1a, 0xc2, 0x3c, 0x6d, 0x68, 0xde, 0x36]
}
test_block_ciphertext_validated = {
128 : [
0x69, 0xc4, 0xe0, 0xd8, 0x6a, 0x7b, 0x04, 0x30, 0xd8, 0xcd, 0xb7, 0x80, 0x70, 0xb4, 0xc5, 0x5a]
, 192 : [
0xdd, 0xa9, 0x7c, 0xa4, 0x86, 0x4c, 0xdf, 0xe0, 0x6e, 0xaf, 0x70, 0xa0, 0xec, 0x0d, 0x71, 0x91]
, 256 : [
0x8e, 0xa2, 0xb7, 0xca, 0x51, 0x67, 0x45, 0xbf, 0xea, 0xfc, 0x49, 0x90, 0x4b, 0x49, 0x60, 0x89]
}
test_block_plaintext = [
0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff]
#After initial validation, these deviated from test in SP 800-38A to use same key, iv, and plaintext on tests.
#Still valid, just easier to test with.
test_mode_key= [
0x60, 0x3d, 0xeb, 0x10, 0x15, 0xca, 0x71, 0xbe, 0x2b, 0x73, 0xae, 0xf0, 0x85, 0x7d, 0x77, 0x81,
0x1f, 0x35, 0x2c, 0x07, 0x3b, 0x61, 0x08, 0xd7, 0x2d, 0x98, 0x10, 0xa3, 0x09, 0x14, 0xdf, 0xf4]
test_mode_iv = [
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f]
test_mode_plaintext = [
[0x6b, 0xc1, 0xbe, 0xe2, 0x2e, 0x40, 0x9f, 0x96, 0xe9, 0x3d, 0x7e, 0x11, 0x73, 0x93, 0x17, 0x2a],
[0xae, 0x2d, 0x8a, 0x57, 0x1e, 0x03, 0xac, 0x9c, 0x9e, 0xb7, 0x6f, 0xac, 0x45, 0xaf, 0x8e, 0x51],
[0x30, 0xc8, 0x1c, 0x46, 0xa3, 0x5c, 0xe4, 0x11, 0xe5, 0xfb, 0xc1, 0x19, 0x1a, 0x0a, 0x52, 0xef],
[0xf6, 0x9f, 0x24, 0x45, 0xdf, 0x4f, 0x9b, 0x17, 0xad, 0x2b, 0x41, 0x7b, 0xe6, 0x6c, 0x37, 0x10]]
test_cbc_ciphertext = [
[0xf5, 0x8c, 0x4c, 0x04, 0xd6, 0xe5, 0xf1, 0xba, 0x77, 0x9e, 0xab, 0xfb, 0x5f, 0x7b, 0xfb, 0xd6],
[0x9c, 0xfc, 0x4e, 0x96, 0x7e, 0xdb, 0x80, 0x8d, 0x67, 0x9f, 0x77, 0x7b, 0xc6, 0x70, 0x2c, 0x7d],
[0x39, 0xf2, 0x33, 0x69, 0xa9, 0xd9, 0xba, 0xcf, 0xa5, 0x30, 0xe2, 0x63, 0x04, 0x23, 0x14, 0x61],
[0xb2, 0xeb, 0x05, 0xe2, 0xc3, 0x9b, 0xe9, 0xfc, 0xda, 0x6c, 0x19, 0x07, 0x8c, 0x6a, 0x9d, 0x1b]]
test_cfb_ciphertext = [
[0xdc, 0x7e, 0x84, 0xbf, 0xda, 0x79, 0x16, 0x4b, 0x7e, 0xcd, 0x84, 0x86, 0x98, 0x5d, 0x38, 0x60],
[0x39, 0xff, 0xed, 0x14, 0x3b, 0x28, 0xb1, 0xc8, 0x32, 0x11, 0x3c, 0x63, 0x31, 0xe5, 0x40, 0x7b],
[0xdf, 0x10, 0x13, 0x24, 0x15, 0xe5, 0x4b, 0x92, 0xa1, 0x3e, 0xd0, 0xa8, 0x26, 0x7a, 0xe2, 0xf9],
[0x75, 0xa3, 0x85, 0x74, 0x1a, 0xb9, 0xce, 0xf8, 0x20, 0x31, 0x62, 0x3d, 0x55, 0xb1, 0xe4, 0x71]]
test_ofb_ciphertext = [
[0xdc, 0x7e, 0x84, 0xbf, 0xda, 0x79, 0x16, 0x4b, 0x7e, 0xcd, 0x84, 0x86, 0x98, 0x5d, 0x38, 0x60],
[0x4f, 0xeb, 0xdc, 0x67, 0x40, 0xd2, 0x0b, 0x3a, 0xc8, 0x8f, 0x6a, 0xd8, 0x2a, 0x4f, 0xb0, 0x8d],
[0x71, 0xab, 0x47, 0xa0, 0x86, 0xe8, 0x6e, 0xed, 0xf3, 0x9d, 0x1c, 0x5b, 0xba, 0x97, 0xc4, 0x08],
[0x01, 0x26, 0x14, 0x1d, 0x67, 0xf3, 0x7b, 0xe8, 0x53, 0x8f, 0x5a, 0x8b, 0xe7, 0x40, 0xe4, 0x84]]
def hex_output(self, list):
#Debugging output helper
result = '['
for i in list[:-1]:
result += hex(i) + ','
return result + hex(list[-1]) + ']'

1
tests/util/vendor/strangman/README.md поставляемый Normal file
Просмотреть файл

@ -0,0 +1 @@
snagged from http://www.nmr.mgh.harvard.edu/Neural_Systems_Group/gary/python.html

2
tests/util/vendor/strangman/__init__.py поставляемый Normal file
Просмотреть файл

@ -0,0 +1,2 @@
from . import stats

626
tests/util/vendor/strangman/glplot.py поставляемый Normal file
Просмотреть файл

@ -0,0 +1,626 @@
##
## glplot.py ... combines OpenGL and wxPython to produce quick-and-dirty, zoomable line-plots
##
## Copyright (c) Gary Strangman, All Rights Reserved
## This software is provided AS-IS. Improvements are welcome. strang@nmr.mgh.harvard.edu
##
## NOTE: left button and drag creates a zoom box, right button returns to full-plot view
##
## Requires PyOpenGL, Numeric, and wxPython, and Python 2.2+
## Tested on Linux and Windoze platforms. Does what I need it to do on both.
##
try:
import im # im module only required to save the generated bitmaps
except:
pass
import glob, os, sys, string
import Numeric as N
from wxPython.wx import *
from OpenGL.GL import *
from OpenGL.GLU import *
from OpenGL import GLUT
from wxPython.wx import *
from wxPython.glcanvas import *
from Numeric import *
import math, os, sys
glplotcolors = [(0.,0.,1.), # blue
(0.,1.,0.), # green
(1.,0.,0.), # red
(0.,1.,1.), # cyan
(1.,0.,1.), # magenta
(1.,1.,0.)] # yellow
#---------------------------------------------------------------------------------------
class RawOpengl(wxGLCanvas):
def __init__(self, parent,*args,**kw):
apply(wxGLCanvas.__init__,(self,parent),kw)
EVT_SIZE(self,self.wxSize)
EVT_PAINT(self,self.wxPaint)
EVT_ERASE_BACKGROUND(self, self.wxEraseBackground)
def wxSize(self, event):
### Size callback
size = self.GetClientSize()
if self.GetContext():
self.SetCurrent()
glViewport(0, 0, size.width, size.height)
def wxEraseBackground(self, event):
pass # Do nothing, to avoid flashing.
def wxPaint(self,*dummy):
dc = wxPaintDC(self)
self.wxRedraw(None)
def wxRedraw(self, *dummy):
### Capture rendering context
dc = wxClientDC(self)
# dc = wxPaintDC(self)
self.SetCurrent()
_mode = glGetDouble(GL_MATRIX_MODE)
glMatrixMode(GL_PROJECTION)
glPushMatrix()
self.redraw()
glFlush()
glPopMatrix()
### Swap buffers
self.SwapBuffers()
glMatrixMode(_mode)
def wxExpose(self, *dummy):
self.wxRedraw()
#---------------------------------------------------------------------------------------
class OpenglMultiLinePlot(RawOpengl):
"""
A class for drawing line plots on an openGL canvas.
"""
def __init__(self, parent=None, autospin_allowed=1, xs=None, ys=None, errors=None, **kw):
apply(RawOpengl.__init__, (self, parent), kw)
self.parent = parent
if ys is None:
self.ys = None
self.xs = None
else: # len(ys.shape) == 1:
self.set_ys(ys)
self.set_xs(xs)
self.errors = errors
self.arrow = 0
self.font = GLUT.GLUT_BITMAP_HELVETICA_12
# self.font = WGL.
# self.font = GLTTwxFont.GLTTFont('arialbd',9)
self.parent = parent
self.drawcount = 0
self.redraw = self.paintit
self.xscale = 1
self.yscale = 1
self.lineweight = 1.0
self.bkgdcolor = [0., 0., 0., 0.]
self.settingbackground = 0
self.xminusflag = 0
self.yminusflag = 0
self.box = None
self.dataxmin = min(ravel(self.xs))
self.dataymin = min(ravel(self.ys))
self.dataxmax = max(ravel(self.xs))
self.dataymax = max(ravel(self.ys))
self.plotxmin = self.dataxmin
self.plotymin = self.dataymin
self.plotxmax = self.dataxmax
self.plotymax = self.dataymax
EVT_MOUSE_EVENTS(self, self.OnMouseEvent)
EVT_CHAR(self,self.OnChar)
# def wxPaint(self,*dummy):
# dc = wxPaintDC(self)
# self.paintit()
def OnChar(self, event):
# print event.KeyCode()
if event.KeyCode() < 256:
key = string.upper(chr(event.KeyCode()))
if key == 'L':
popup = wxFileDialog(NULL, "Choose LOG filename ...", "",
"", "*", wxSAVE, wxPoint(100,100))
popup.ShowModal()
# @@@need to make "enter" default to Save, somehow
a = glReadPixels(0,0,self.GetSize().x,self.GetSize().y,GL_RGB,GL_UNSIGNED_BYTE)
size = self.GetClientSizeTuple()
a = array(size,Int).tostring() + a
f=open(popup.GetFilename(),'wb')
f.write(a)
f.close()
def OnMouseEvent(self,event):
size = self.GetSize()
# determine where (in proportions) on screen the click happened
xr = float(event.GetX())/size.x # GetX=0 at left
yr = float(event.GetY())/size.y # GetY=0 at top
# scale this location to where WITHIN THE PLOT the click happened (in proportions)
# ... with 0,0 at lower left of PLOT area
xrs = (xr-(1-self.xscale)/2.)/float(self.xscale) # scale to the plot area
yrs = 1-(yr-(1-self.yscale)/2.)/float(self.yscale) # invert Y and scale to plot area
if event.LeftDown():
self.xminusflag = 0 #was selection box dragged LEFT?
self.yminusflag = 0 #was selection box dragged UP?
self.box = [(xrs*(self.plotxmax-self.plotxmin)+self.plotxmin),
(yrs*(self.plotymax-self.plotymin)+self.plotymin)]
self.xstart = xr
self.ystart = yr
elif self.box and event.LeftIsDown() and not event.LeftDown():
# compute position of other box-corner within plot
nxrs = (xrs*(self.plotxmax-self.plotxmin)+self.plotxmin)
nyrs = (yrs*(self.plotymax-self.plotymin)+self.plotymin)
if nxrs < self.box[0]:
self.xminusflag = 1
else:
self.xminusflag = 0
if nyrs < self.box[1]:
self.yminusflag = 1
else:
self.yminusflag = 0
if self.box[0]<>nxrs or self.box[1]<>nyrs:
self.box = [self.box[0], self.box[1], nxrs, nyrs]
else: # may need to convert a 4-element box to a 2-element box
self.box = [nxrs, nyrs]
self.xend = xr
self.yend = yr
self.paintit()
elif event.LeftUp():
if len(self.box)>2:
# if dragged up or left, exchange value-pairs
if self.box[0] > self.box[2]:
self.box[0],self.box[2] = self.box[2],self.box[0]
if self.box[1] > self.box[3]:
self.box[1],self.box[3] = self.box[3],self.box[1]
self.plotxmin = self.box[0]
self.plotymin = self.box[1]
self.plotxmax = self.box[2]
self.plotymax = self.box[3]
self.xminusflag = 0
self.yminusflag = 0
self.box = None
self.paintit() # can't use wxRedraw for some reason
if event.RightUp():
self.plotxmin = self.dataxmin
self.plotymin = self.dataymin
self.plotxmax = self.dataxmax
self.plotymax = self.dataymax
self.box = None
self.paintit() # can't use wxRedraw for some reason
def OnSize(self, event):
size = self.GetClientSize()
if self.GetContext() != 'NULL':
self.SetCurrent()
glViewport(0, 0, size.width, size.height)
def changelineweight(self,step):
self.lineweight += step
if self.lineweight <= 0:
self.lineweight = 0.1
self.paintit()
def save_colorpixelmap(self):
string = glReadPixels(0,0,self.GetSize().x,self.GetSize().y,GL_RGB,GL_UNSIGNED_BYTE)
size = list(self.GetClientSizeTuple())
a = fromstring(string,Int8) # convert pixels to array
print a.shape, size
size[0],size[1] = size[1],size[0] # swap x,y dimensions for proper unraveling
r = a[0::3]+0
g = a[1::3]+0
b = a[2::3]+0
r.shape = size
g.shape = size
b.shape = size
carray = array([r[::-1,:],g[::-1,:],b[::-1,:]]) # up-down flip the image
print carray.shape, type(carray), carray.typecode(), min(ravel(carray)), max(ravel(carray))
im.ashow(carray)
def save_graypixelmap(self):
string = glReadPixels(0,0,self.GetSize().x,self.GetSize().y,GL_LUMINANCE,GL_FLOAT)
size = list(self.GetClientSizeTuple())
a = fromstring(string,Float32) # convert pixels to array
print a.shape, size
size[0],size[1] = size[1],size[0] # swap x,y dimensions for proper unraveling
carray = reshape(a,size)*255 # must be a luminance map
print carray.shape, type(carray), carray.typecode(), min(ravel(carray)), max(ravel(carray))
im.ashow(carray[::-1,:])
def setbackground(self,color):
if self.settingbackground:
return
if len(color) == 3:
color = list(color) + [0.]
apply(glClearColor,color)
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT)
self.bkgdcolor = color
self.settingbackground = 1
self.paintit()
self.settingbackground = 0
def set_xs(self, xs=None):
if self.ys is None:
xs = None
return
elif xs is None:
xs = arange(self.ys.shape[0])
self.xs = xs
self.x_offset = -xs[0]
self.x_scale = 1.0/(max(xs)-min(xs))
self.dataxmin = min(ravel(self.xs))
self.dataxmax = max(ravel(self.xs))
self.plotxmin = self.dataxmin
self.plotxmax = self.dataxmax
def transform(self, ys):
# should convert to a rank-2 array
return add.reduce(ys)
def set_ys(self, ys):
if ys is None:
self.ys = None
return
while len(ys.shape) > 2:
ys = self.transform(ys)
self.ys = ys
self.y_offset = -ys[0]
try:
self.y_scale = 1.0/(max(ys)-min(ys))
except ZeroDivisionError:
self.y_scale = 1.0
self.dataymin = min(ravel(self.ys))
self.dataymax = max(ravel(self.ys))
self.plotymin = self.dataymin
self.plotymax = self.dataymax
def set_errors(self, errors):
if errors is None:
self.errors = None
return
while len(errors.shape) > 2:
errors = self.transform(errors)
self.errors = errors
self.dataymin = min(ravel(self.ys-abs(self.errors)))
self.dataymax = max(ravel(self.ys+abs(self.errors)))
self.plotymin = self.dataymin
self.plotymax = self.dataymax
def paintit(self):#, event):
### PREPARE FOR DRAWING AND CLEAR WINDOW
self.setbackground(self.bkgdcolor)
if self.ys is None:
return
### SET UP FOR REDRAWING
if not self.xs:
self.set_xs()
size = self.GetClientSize()
w,h = size.x, size.y
WZ = float(w) / len(self.xs)
HZ = float(h) / len(self.ys)
glLoadIdentity()
glEnable(GL_LINE_SMOOTH)
glEnable(GL_BLEND)
glHint(GL_LINE_SMOOTH_HINT, GL_NICEST)
glHint(GL_POINT_SMOOTH_HINT, GL_NICEST)
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA)
# IMPORTANT COORDINATE TRANSFORMATIONS
self.xscale = 0.84
self.yscale = 0.8
glScale(self.xscale, self.yscale, 1.0) # scale everything hereafter in this matrix
glOrtho(self.plotxmin, self.plotxmax,
self.plotymin, self.plotymax,
0, 1)
# Make sure both are 2D, so plot code can be general for multi and single lines
if len(self.ys.shape) == 1:
self.ys = self.ys[:,NewAxis]
if self.errors:
self.errors.shape = (len(self.errors),1)
### PLOT ERRORBARS (SAME COLOR AS ASSOCIATED TIMESERIES)
if hasattr(self, 'errors') and self.errors:
# loop through all timeseries'
for i in range(self.errors.shape[1]):
if self.errors.shape[1] > 1:
colortrio = glplotcolors[i%len(glplotcolors)]
apply(glColor3f,colortrio)
else:
glColor3f(1.,1.,0.)
glLineWidth(1.0)
lower = self.ys[:,i] - self.errors[:,i]
upper = self.ys[:,i] + self.errors[:,i]
glBegin(GL_LINES)
for x,yl, yu in transpose(array([self.xs, lower, upper])):
if x>=self.plotxmin and x<=self.plotxmax:
glVertex2f(x,yl)
glVertex2f(x,yu)
glEnd()
### PLOT TIMESERIES (after/ON-TOP-OF ERRORBARS)
# loop through all timeseries'
for i in range(self.ys.shape[1]):
glLineWidth(self.lineweight)
if self.ys.shape[1] > 1:
colortrio = glplotcolors[i%len(glplotcolors)]
apply(glColor3f,colortrio)
else:
glColor3f(1.,1.,1.)
d = array((self.xs+0.0, self.ys[:,i]))
t = transpose(d)
glBegin(GL_LINE_STRIP)
for vert in t:
if vert[0]>=self.plotxmin and vert[0]<=self.plotxmax:
glVertex(vert[0],vert[1])
glEnd()
### PLOT X/Y-AXIS LINES (white)
glColor3f(1.,1.,1.)
glLineWidth(1.5)
glBegin(GL_LINES)
glVertex2i(self.plotxmin, 0)
glVertex2i(self.plotxmax, 0)
glVertex2i(0, self.plotymin)
glVertex2i(0, self.plotymax)
glEnd()
###
### TEXT PLOTTING CODE ... USED TO USE PyGLTT; NOW USES GLUT (until GLTT/FTGL works again)
###
self.textcolor = (1,1,1)
# Pick round numbers to be displayed
xrange_sigfig = log10(self.plotxmax-self.plotxmin)
yrange_sigfig = log10(self.plotymax-self.plotymin)
# print self.plotymax, self.plotymin, yrange_sigfig
if xrange_sigfig<=1:
xrounddigits = int(xrange_sigfig)+3
else:
xrounddigits = 0
if yrange_sigfig<=1:
yrounddigits = int(yrange_sigfig)+3
else:
yrounddigits = 0
# print self.plotymax, self.plotymin, yrange_sigfig
# And properly format the numeric text strings to be dispalyed
if xrounddigits:
xminstr = str(round(self.plotxmin,xrounddigits))
xmaxstr = str(round(self.plotxmax,xrounddigits))
else:
xminstr = str(int(round(self.plotxmin,xrounddigits)))
xmaxstr = str(int(round(self.plotxmax,xrounddigits)))
if yrounddigits:
yminstr = str(round(self.plotymin,yrounddigits))
ymaxstr = str(round(self.plotymax,yrounddigits))
else:
yminstr = str(int(round(self.plotymin,yrounddigits)))
ymaxstr = str(int(round(self.plotymax,yrounddigits)))
# Figure out where to place the numerical labels
# NOTE: Though we are using an Identity matrix, bitmap font locations apparently
# want to be localized in pixel-coordinates (hence all the GetSize() calls)
glPushMatrix()
glLoadIdentity()
xaxis_yoffset = -0.93*self.GetSize().y
yaxis_xoffset = -0.94*self.GetSize().x
xaxis_xmin = (-self.xscale-0.01)*self.GetSize().x
xaxis_xmax = (self.xscale-0.01)*self.GetSize().x
yaxis_ymin = -0.86*self.GetSize().y
yaxis_ymax = 0.78*self.GetSize().y
# print
# print self.GetSize(), self.GetClientSize()
# print "X-axis min: ",xaxis_xmin, xaxis_yoffset, ' / ', xminstr
# print "X-axis max: ",xaxis_xmax, xaxis_yoffset, ' / ', xmaxstr
# print "Y-axis min: ",yaxis_xoffset, yaxis_ymin, ' / ', yminstr
# print "Y-axis max: ",yaxis_xoffset, yaxis_ymax, ' / ', ymaxstr
### y-axis maximum
self.draw_text(self,
yaxis_xoffset, #self.GetSize().x*xoffset,
yaxis_ymax, #self.GetSize().y*ymaxoffset,
ymaxstr,None,None)
### y-axis minimum
self.draw_text(self,
yaxis_xoffset, #self.GetSize().x*xoffset,
yaxis_ymin, #self.GetSize().y*yminoffset,
yminstr,None,None)
# GLTTwxFont.ALIGN_RIGHT, GLTTwxFont.VALIGN_BOTTOM)
### x-axis maximum
self.draw_text(self,
xaxis_xmax, #self.GetSize().x*xoffset,
xaxis_yoffset, #self.GetSize().y*ymaxoffset,
xmaxstr,None,None)
### x-axis minimum
self.draw_text(self,
xaxis_xmin, #self.GetSize().x*xoffset,
xaxis_yoffset, #self.GetSize().y*yminoffset,
xminstr,None,None)
# GLTTwxFont.ALIGN_RIGHT, GLTTwxFont.VALIGN_BOTTOM)
### arrow value
# self.draw_text(self,
# xarrowoffset,
# self.GetSize().y*ymaxoffset,
# ' '+str(round(self.ys[self.arrow],1)), 0, 0) #,
# GLTTwxFont.ALIGN_LEFT, GLTTwxFont.VALIGN_BOTTOM)
### arrow timepoint
# self.draw_text(self,
# xarrowoffset,
# self.GetSize().y*yminoffset,
# ' '+str(self.arrow),None,None)
# GLTTwxFont.ALIGN_LEFT, GLTTwxFont.VALIGN_BOTTOM)
# Finally, draw a bounding-box (bottom/top left/right)
# NOTE: No need to use GetSize() here; we have an Identity matrix and are
# drawing normal (non-bitmap-text) stuff
BL = [-self.xscale,-self.yscale]
TL = [-self.xscale, self.yscale]
TR = [self.xscale, self.yscale]
BR = [self.xscale, -self.yscale]
#print BL, TL, TR, BR
glPointSize(1.0)
glColor3f(0.3,0.3,0.3)
glBegin(GL_LINE_STRIP)
glVertex2f(BL[0],BL[1])
glVertex2f(TL[0],TL[1])
glVertex2f(TR[0],TR[1])
glVertex2f(BR[0],BR[1])
glVertex2f(BL[0],BL[1])
glEnd()
glPopMatrix()
### LAST, BUT NOT LEAST, DRAW SELECTION-BOX ... (RED)
if self.box and len(self.box)==4:
glPointSize(2.0)
glColor3f(1.,0.,0.)
glBegin(GL_LINE_STRIP)
glVertex2f(self.box[0], self.box[1])
glVertex2f(self.box[2], self.box[1])
glVertex2f(self.box[2], self.box[3])
glVertex2f(self.box[0], self.box[3])
glVertex2f(self.box[0], self.box[1])
glEnd()
# FINALLY, CLIP VIEW TO SPECIFIED SUB-PORTION OF WINDOW
# glEnable(GL_CLIP_PLANE1)
# glEnable(GL_CLIP_PLANE2)
# glEnable(GL_CLIP_PLANE3)
# glEnable(GL_CLIP_PLANE4)
# glClipPlane(GL_CLIP_PLANE1, [0., 1., 0., -self.plotymin]) # clips off the bottom
# glClipPlane(GL_CLIP_PLANE2, [0., -1., 0., self.plotymax]) # clips off the top
# glClipPlane(GL_CLIP_PLANE3, [1., 0., 0., -self.plotxmin]) # clips off the left
# glClipPlane(GL_CLIP_PLANE4, [-1., 0., 0., self.plotxmax]) # clips off the right
self.SwapBuffers() # NECESSARY, or screen doesn't redraw
def draw_text(self, canvas, x,y,text,align,valign):
apply(glColor3f, self.textcolor)
size = self.GetClientSize()
w,h = float(size.x), float(size.y)
glRasterPos2f(x/w,y/h)
for char in text:
# print x,y,self.font,char
GLUT.glutBitmapCharacter(self.font,ord(char)) #text[0]) #self.font,text)
# self.font.write_string(canvas, x, y, text, align, valign)
def getpropX(self, x):
w = self.GetClientSize().x
p = (x - w*.1) / (w*.8)
return p
def TimeToQuit(self, event):
### REMAKE LINEPLOT WHEN SELF.BOX IS RE-CREATED
self.Close(true)
def glplot(yvals=None,xvals=None,errors=None):
"""
Create a plot using a wxGLCanvas.
Usage: glplot(
x=None, x-axis data
y=None, y-axis data, skip x and use y=[data] for x=range(len(y))
errors=None, y-axis errorbar data
"""
if not xvals and not yvals:
return
if not xvals:
xvals = N.arange(yvals.shape[0])
class MyApp(wxApp):
def OnInit(self): #,x=None,y=None,errors=None):
windowXpixels = 8 # 8 pixels of frame OUTSIDE the canvas
windowYpixels = 27 # 27 pixels of frame plus title-bar OUTSIDE the canvas
self.frame = wxFrame(NULL, -1, "wxPython Context",
wxPoint(0,0),
wxSize(1200+windowXpixels,400+windowYpixels))
self.mainmenu = wxMenuBar()
filemenu = wxMenu()
cimgID = wxNewId()
gimgID = wxNewId()
exitID = wxNewId()
filemenu.Append(cimgID, 'Save C&olor\tAlt-C', 'Save color pixelmap using IC.exe')
filemenu.Append(gimgID, 'Save G&ray\tAlt-G', 'Save gray pixelmap using IC.exe')
filemenu.Append(exitID, 'E&xit\tAlt-X', 'Quit')
EVT_MENU(self, cimgID, self.OnCImgSave)
EVT_MENU(self, gimgID, self.OnGImgSave)
EVT_MENU(self, exitID, self.OnFileExit)
self.mainmenu.Append(filemenu, '&File')
propmenu = wxMenu()
fontID = wxNewId()
lineweightupID = wxNewId()
lineweightdnID = wxNewId()
bkgdID = wxNewId()
propmenu.Append(fontID, 'F&onts\tAlt-F', 'Change font for all text items')
propmenu.Append(lineweightupID, 'I&ncrease lineweight\tAlt-I', 'Increase plotting line weight')
propmenu.Append(lineweightdnID, 'D&ecrease lineweight\tAlt-D', 'Decrease plotting line weight')
propmenu.Append(bkgdID, 'B&ackground color\tAlt-B', 'Change plot background color')
EVT_MENU(self, fontID, self.OnFont)
EVT_MENU(self, lineweightupID, self.OnLineweightup)
EVT_MENU(self, lineweightdnID, self.OnLineweightdn)
EVT_MENU(self, bkgdID, self.OnBkgd)
self.mainmenu.Append(propmenu, '&Edit')
self.frame.SetMenuBar(self.mainmenu)
# Now, create the line-plot part
self.win = OpenglMultiLinePlot(self.frame,autospin_allowed=0)
self.frame.Show(TRUE)
self.SetTopWindow(self.frame)
return TRUE
def OnCImgSave(self,event):
self.win.save_colorpixelmap()
def OnGImgSave(self,event):
self.win.save_graypixelmap()
def OnFileExit(self,event):
sys.exit()
def OnFont(self,event):
data = wxFontData()
dlg = wxFontDialog(self.frame, data)
if dlg.ShowModal() == wxID_OK:
data = dlg.GetFontData()
font = data.GetChosenFont()
print 'You selected: ',font.GetFaceName(),', ',str(font.GetPointSize()),', color ',data.GetColour().Get()
self.win.fontname = font.GetFaceName()
self.win.fontstype = font.GetStyle()
self.win.fontsize = font.GetPointSize()
self.win.fontcolor = data.GetColour().Get()
dlg.Destroy()
def OnLineweightup(self,event):
self.win.changelineweight(+1)
def OnLineweightdn(self,event):
self.win.changelineweight(-1)
def OnBkgd(self,event):
data = wxColourData()
dlg = wxColourDialog(self.frame, data)
if dlg.ShowModal() == wxID_OK:
data = dlg.GetColourData()
wxcolor = data.GetColour()
dlg.Destroy()
color = N.array([wxcolor.Red(), wxcolor.Green(), wxcolor.Blue()])
newcolor = color / 255.
self.win.setbackground(newcolor)
app = MyApp(0)
app.win.set_xs(xvals)
app.win.set_ys(yvals)
app.win.set_errors(errors)
app.MainLoop()

1168
tests/util/vendor/strangman/io.py поставляемый Normal file

Разница между файлами не показана из-за своего большого размера Загрузить разницу

1068
tests/util/vendor/strangman/pstat.py поставляемый Normal file

Разница между файлами не показана из-за своего большого размера Загрузить разницу

4530
tests/util/vendor/strangman/stats.py поставляемый Normal file

Разница между файлами не показана из-за своего большого размера Загрузить разницу

199
tests/util/vendor/strangman/statstest.py поставляемый Normal file
Просмотреть файл

@ -0,0 +1,199 @@
import stats, os, pstat
reload(stats)
try:
import numpy as N
except ImportError:
pass
l = range(1,21)
lf = range(1,21)
lf[2] = 3.0
a = N.array(l)
af = N.array(lf)
ll = [l]*5
aa = N.array(ll)
print '\nCENTRAL TENDENCY'
print 'geometricmean:',stats.geometricmean(l), stats.geometricmean(lf), stats.geometricmean(a), stats.geometricmean(af)
print 'harmonicmean:',stats.harmonicmean(l), stats.harmonicmean(lf), stats.harmonicmean(a), stats.harmonicmean(af)
print 'mean:',stats.mean(l), stats.mean(lf), stats.mean(a), stats.mean(af)
print 'median:',stats.median(l),stats.median(lf),stats.median(a),stats.median(af)
print 'medianscore:',stats.medianscore(l),stats.medianscore(lf),stats.medianscore(a),stats.medianscore(af)
print 'mode:',stats.mode(l),stats.mode(a)
print '\nMOMENTS'
print 'moment:',stats.moment(l),stats.moment(lf),stats.moment(a),stats.moment(af)
print 'variation:',stats.variation(l),stats.variation(a),stats.variation(lf),stats.variation(af)
print 'skew:',stats.skew(l),stats.skew(lf),stats.skew(a),stats.skew(af)
print 'kurtosis:',stats.kurtosis(l),stats.kurtosis(lf),stats.kurtosis(a),stats.kurtosis(af)
print 'tmean:',stats.tmean(a,(5,17)),stats.tmean(af,(5,17))
print 'tvar:',stats.tvar(a,(5,17)),stats.tvar(af,(5,17))
print 'tstdev:',stats.tstdev(a,(5,17)),stats.tstdev(af,(5,17))
print 'tsem:',stats.tsem(a,(5,17)),stats.tsem(af,(5,17))
print 'describe:'
print stats.describe(l)
print stats.describe(lf)
print stats.describe(a)
print stats.describe(af)
print '\nFREQUENCY'
print 'freqtable:'
print 'itemfreq:'
print stats.itemfreq(l)
print stats.itemfreq(a)
print 'scoreatpercentile:',stats.scoreatpercentile(l,40),stats.scoreatpercentile(lf,40),stats.scoreatpercentile(a,40),stats.scoreatpercentile(af,40)
print 'percentileofscore:',stats.percentileofscore(l,12),stats.percentileofscore(lf,12),stats.percentileofscore(a,12),stats.percentileofscore(af,12)
print 'histogram:',stats.histogram(l),stats.histogram(a)
print 'cumfreq:'
print stats.cumfreq(l)
print stats.cumfreq(lf)
print stats.cumfreq(a)
print stats.cumfreq(af)
print 'relfreq:'
print stats.relfreq(l)
print stats.relfreq(lf)
print stats.relfreq(a)
print stats.relfreq(af)
print '\nVARIATION'
print 'obrientransform:'
l = range(1,21)
a = N.array(l)
ll = [l]*5
aa = N.array(ll)
print stats.obrientransform(l,l,l,l,l)
print stats.obrientransform(a,a,a,a,a)
print 'samplevar:',stats.samplevar(l),stats.samplevar(a)
print 'samplestdev:',stats.samplestdev(l),stats.samplestdev(a)
print 'var:',stats.var(l),stats.var(a)
print 'stdev:',stats.stdev(l),stats.stdev(a)
print 'sterr:',stats.sterr(l),stats.sterr(a)
print 'sem:',stats.sem(l),stats.sem(a)
print 'z:',stats.z(l,4),stats.z(a,4)
print 'zs:'
print stats.zs(l)
print stats.zs(a)
print '\nTRIMMING'
print 'trimboth:'
print stats.trimboth(l,.2)
print stats.trimboth(lf,.2)
print stats.trimboth(a,.2)
print stats.trimboth(af,.2)
print 'trim1:'
print stats.trim1(l,.2)
print stats.trim1(lf,.2)
print stats.trim1(a,.2)
print stats.trim1(af,.2)
print '\nCORRELATION'
#execfile('testpairedstats.py')
l = range(1,21)
a = N.array(l)
ll = [l]*5
aa = N.array(ll)
m = range(4,24)
m[10] = 34
b = N.array(m)
pb = [0]*9 + [1]*11
apb = N.array(pb)
print 'paired:'
#stats.paired(l,m)
#stats.paired(a,b)
print
print
print 'pearsonr:'
print stats.pearsonr(l,m)
print stats.pearsonr(a,b)
print 'spearmanr:'
print stats.spearmanr(l,m)
print stats.spearmanr(a,b)
print 'pointbiserialr:'
print stats.pointbiserialr(pb,l)
print stats.pointbiserialr(apb,a)
print 'kendalltau:'
print stats.kendalltau(l,m)
print stats.kendalltau(a,b)
print 'linregress:'
print stats.linregress(l,m)
print stats.linregress(a,b)
print '\nINFERENTIAL'
print 'ttest_1samp:'
print stats.ttest_1samp(l,12)
print stats.ttest_1samp(a,12)
print 'ttest_ind:'
print stats.ttest_ind(l,m)
print stats.ttest_ind(a,b)
print 'ttest_rel:'
print stats.ttest_rel(l,m)
print stats.ttest_rel(a,b)
print 'chisquare:'
print stats.chisquare(l)
print stats.chisquare(a)
print 'ks_2samp:'
print stats.ks_2samp(l,m)
print stats.ks_2samp(a,b)
print 'mannwhitneyu:'
print stats.mannwhitneyu(l,m)
print stats.mannwhitneyu(a,b)
print 'ranksums:'
print stats.ranksums(l,m)
print stats.ranksums(a,b)
print 'wilcoxont:'
print stats.wilcoxont(l,m)
print stats.wilcoxont(a,b)
print 'kruskalwallish:'
print stats.kruskalwallish(l,m,l)
print len(l), len(m)
print stats.kruskalwallish(a,b,a)
print 'friedmanchisquare:'
print stats.friedmanchisquare(l,m,l)
print stats.friedmanchisquare(a,b,a)
l = range(1,21)
a = N.array(l)
ll = [l]*5
aa = N.array(ll)
m = range(4,24)
m[10] = 34
b = N.array(m)
print '\n\nF_oneway:'
print stats.F_oneway(l,m)
print stats.F_oneway(a,b)
#print 'F_value:',stats.F_value(l),stats.F_value(a)
print '\nSUPPORT'
print 'sum:',stats.sum(l),stats.sum(lf),stats.sum(a),stats.sum(af)
print 'cumsum:'
print stats.cumsum(l)
print stats.cumsum(lf)
print stats.cumsum(a)
print stats.cumsum(af)
print 'ss:',stats.ss(l),stats.ss(lf),stats.ss(a),stats.ss(af)
print 'summult:',stats.summult(l,m),stats.summult(lf,m),stats.summult(a,b),stats.summult(af,b)
print 'sumsquared:',stats.square_of_sums(l),stats.square_of_sums(lf),stats.square_of_sums(a),stats.square_of_sums(af)
print 'sumdiffsquared:',stats.sumdiffsquared(l,m),stats.sumdiffsquared(lf,m),stats.sumdiffsquared(a,b),stats.sumdiffsquared(af,b)
print 'shellsort:'
print stats.shellsort(m)
print stats.shellsort(b)
print 'rankdata:'
print stats.rankdata(m)
print stats.rankdata(b)
print '\nANOVAs'
execfile('testanova.py')