зеркало из https://github.com/mozilla/esFrontLine.git
update lib and tests
This commit is contained in:
Родитель
748e5bc164
Коммит
142becee34
|
@ -0,0 +1 @@
|
|||
|
|
@ -10,7 +10,7 @@
|
|||
"threaded":true,
|
||||
"processes":1
|
||||
},
|
||||
"whitelist":["bugs"],
|
||||
"whitelist":["public_bugs"],
|
||||
"debug":{
|
||||
"log":[{
|
||||
"filename": "./tests/results/logs/test_slow_server.log",
|
||||
|
|
|
@ -1,15 +1,12 @@
|
|||
{
|
||||
"elasticsearch":[{
|
||||
"host":"http://elasticsearch4.metrics.scl3.mozilla.com",
|
||||
"host":"http://elasticsearch1.bugs.scl3.mozilla.com",
|
||||
"port":9200
|
||||
},{
|
||||
"host":"http://elasticsearch5.metrics.scl3.mozilla.com",
|
||||
"host":"http://elasticsearch2.bugs.scl3.mozilla.com",
|
||||
"port":9200
|
||||
},{
|
||||
"host":"http://elasticsearch7.metrics.scl3.mozilla.com",
|
||||
"port":9200
|
||||
},{
|
||||
"host":"http://elasticsearch8.metrics.scl3.mozilla.com",
|
||||
"host":"http://elasticsearch3.bugs.scl3.mozilla.com",
|
||||
"port":9200
|
||||
}],
|
||||
"flask":{
|
||||
|
@ -19,7 +16,7 @@
|
|||
"threaded":true,
|
||||
"processes":1
|
||||
},
|
||||
"whitelist":["bugs", "org_chart", "bug_summary", "reviews"],
|
||||
"whitelist":["public_bugs"],
|
||||
"debug":{
|
||||
"log":[{
|
||||
"filename": "./tests/results/logs/app.log",
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
from tests_by_bug_id import all_tests
|
||||
from util.logs import Log
|
||||
from util.env.logs import Log
|
||||
|
||||
url = "http://klahnakoski-es.corp.tor1.mozilla.com:9201"
|
||||
|
||||
|
|
|
@ -18,13 +18,15 @@ import requests
|
|||
from werkzeug.exceptions import abort
|
||||
from esFrontLine.app import stream
|
||||
from util.cnv import CNV
|
||||
from util.logs import Log
|
||||
from util.env.logs import Log
|
||||
from util.strings import expand_template
|
||||
from util.threads import Thread, Signal
|
||||
from util.thread.threads import Thread, Signal
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
PATH = '/bugs/_mapping'
|
||||
WHITELISTED = "public_bugs" # ENSURE THIS IS IN THE slow_server_settings.json WHITELIST
|
||||
|
||||
PATH = '/'+WHITELISTED+'/_mapping'
|
||||
SLOW_PORT = 9299
|
||||
PROXY_PORT = 9298
|
||||
RATE = 4.0 # per second
|
||||
|
@ -37,15 +39,15 @@ server_is_ready = Signal()
|
|||
def serve_slowly(path):
|
||||
def octoberfest():
|
||||
for bb in range(99, 2, -1):
|
||||
yield ("0"*65535)+"\n" #BUG ENOUGH TO FILL THE INCOMING BUFFER
|
||||
yield ("0"*65535)+"\n" # ENOUGH TO FILL THE INCOMING BUFFER
|
||||
Thread.sleep(1.0/RATE)
|
||||
yield CNV.unicode2utf8(expand_template("{{num}} bottles of beer on the wall! {{num}} bottles of beer! Take one down, pass it around! {{less}} bottles of beer on he wall!\n", {
|
||||
"num": bb,
|
||||
"less": bb - 1
|
||||
}))
|
||||
yield ("0"*65535)+"\n" #BUG ENOUGH TO FILL THE INCOMING BUFFER
|
||||
yield ("0"*65535)+"\n" # ENOUGH TO FILL THE INCOMING BUFFER
|
||||
yield CNV.unicode2utf8(u"2 bottles of beer on the wall! 2 bottles of beer! Take one down, pass it around! 1 bottle of beer on he wall!\n")
|
||||
yield ("0"*65535)+"\n" #BUG ENOUGH TO FILL THE INCOMING BUFFER
|
||||
yield ("0"*65535)+"\n" # ENOUGH TO FILL THE INCOMING BUFFER
|
||||
yield CNV.unicode2utf8(u"1 bottle of beer on the wall! 1 bottle of beer! Take one down, pass it around! 0 bottles of beer on he wall.\n")
|
||||
|
||||
try:
|
||||
|
@ -101,13 +103,13 @@ def run_proxy(please_stop):
|
|||
proc.send_signal(signal.CTRL_C_EVENT)
|
||||
|
||||
|
||||
def test_slow_server():
|
||||
def test_slow_streaming():
|
||||
"""
|
||||
TEST THAT THE app ACTUALLY STREAMS. WE SHOULD GET A RESPONSE BEFORE THE SERVER
|
||||
FINISHES DELIVERING
|
||||
"""
|
||||
slow_server_thread = Thread.run("run slow server", run_slow_server)
|
||||
proxy_thread = Thread.run("run slow server", run_proxy)
|
||||
proxy_thread = Thread.run("run proxy", run_proxy)
|
||||
|
||||
try:
|
||||
proxy_is_ready.wait_for_go()
|
||||
|
@ -118,7 +120,7 @@ def test_slow_server():
|
|||
for i, data in enumerate(stream(response.raw)):
|
||||
Log.note("CLIENT GOT RESPONSE:\n{{data|indent}}", {"data": data})
|
||||
end = time.clock()
|
||||
if i == 0 and end - start > 5: # IF WE GET DATA BEFORE 5sec, THEN WE KNOW WE ARE STREAMING
|
||||
if i == 0 and end - start > 10: # IF WE GET DATA BEFORE 10sec, THEN WE KNOW WE ARE STREAMING
|
||||
Log.error("should have something by now")
|
||||
if response.status_code != 200:
|
||||
Log.error("Expecting a positive response")
|
||||
|
|
|
@ -2,10 +2,12 @@ from _subprocess import CREATE_NEW_PROCESS_GROUP
|
|||
import subprocess
|
||||
import requests
|
||||
import signal
|
||||
import test_slow_server
|
||||
from test_slow_server import test_slow_server
|
||||
from util.logs import Log
|
||||
from util.threads import Thread, Signal
|
||||
from util.env.logs import Log
|
||||
from util.thread.threads import Signal, Thread
|
||||
from test_slow_server import test_slow_streaming
|
||||
|
||||
WHITELISTED = "public_bugs" # ENSURE THIS IS IN THE test_settings.json WHITELIST
|
||||
NOT_WHITELISTED = "bug_hierarchy"
|
||||
|
||||
|
||||
def test_943465(url):
|
||||
|
@ -21,14 +23,14 @@ def test_943465(url):
|
|||
|
||||
def test_943472(url):
|
||||
#https://bugzilla.mozilla.org/show_bug.cgi?id=943472
|
||||
response = request("GET", url + "/bugs/_stats/")
|
||||
response = request("GET", url + "/" + WHITELISTED + "/_stats/")
|
||||
if response.status_code != 400:
|
||||
Log.error("should not allow")
|
||||
|
||||
|
||||
def test_943478(url):
|
||||
#https://bugzilla.mozilla.org/show_bug.cgi?id=943478
|
||||
response = request("POST", url + "/telemetry_agg_valid_201302/_search", data="""
|
||||
response = request("POST", url + "/" + NOT_WHITELISTED + "/_search", data="""
|
||||
{
|
||||
"query":{"filtered":{
|
||||
"query":{"match_all":{}}
|
||||
|
@ -42,7 +44,7 @@ def test_943478(url):
|
|||
Log.error("should not allow")
|
||||
|
||||
# VERIFY ALLOWED INDEX GETS THROUGH
|
||||
response = request("POST", url + "/bugs/_search", data="""
|
||||
response = request("POST", url + "/" + WHITELISTED + "/_search", data="""
|
||||
{
|
||||
"query":{"filtered":{
|
||||
"query":{"match_all":{}},
|
||||
|
@ -60,11 +62,25 @@ def test_943478(url):
|
|||
def test_allow_3path_mapping(url):
|
||||
#WE SHOULD ALLOW -mapping WITH INDEX AND TYPE IN PATH
|
||||
#http://klahnakoski-es.corp.tor1.mozilla.com:9204/bugs/bug_version/_mapping
|
||||
response = request("GET", url + "/bugs/bug_version/_mapping")
|
||||
response = request("GET", url + "/" + WHITELISTED + "/bug_version/_mapping")
|
||||
if response.status_code != 200:
|
||||
Log.error("should be allowed")
|
||||
|
||||
|
||||
def test_allow_head_request(url):
|
||||
#WE SHOULD ALLOW HEAD REQUESTS TO /
|
||||
response = request("HEAD", url + "/")
|
||||
if response.status_code != 200:
|
||||
Log.error("should be allowed")
|
||||
|
||||
response = request("HEAD", url)
|
||||
if response.status_code != 200:
|
||||
Log.error("should be allowed")
|
||||
|
||||
# ENVEN HEAD REQUESTS TO WHITELISTED INDEXES WILL BE DENIED
|
||||
response = request("HEAD", url + "/" + WHITELISTED + "/bug_version/_mapping")
|
||||
if response.status_code == 200:
|
||||
Log.error("should NOT be allowed")
|
||||
|
||||
|
||||
def request(type, url, data=None, **kwargs):
|
||||
|
@ -107,11 +123,14 @@ def run_app(please_stop):
|
|||
|
||||
|
||||
def all_tests(url):
|
||||
# test_943465(url)
|
||||
# test_943472(url)
|
||||
# test_943478(url)
|
||||
# test_allow_3path_mapping(url)
|
||||
test_slow_server()
|
||||
test_allow_head_request(url)
|
||||
test_943465(url)
|
||||
test_943472(url)
|
||||
test_943478(url)
|
||||
test_allow_3path_mapping(url)
|
||||
|
||||
test_slow_streaming()
|
||||
|
||||
Log.note("ALL TESTS PASS")
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
from .maths import stats
|
|
@ -1,6 +1,5 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
@ -8,17 +7,27 @@
|
|||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import StringIO
|
||||
import base64
|
||||
import datetime
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
from .multiset import Multiset
|
||||
from .jsons import json_decoder, json_encoder
|
||||
from .logs import Log
|
||||
import struct
|
||||
from .strings import expand_template, indent
|
||||
from .struct import StructList, Null
|
||||
from urllib import urlencode
|
||||
|
||||
from . import struct
|
||||
from . import jsons
|
||||
from .jsons import json_encoder
|
||||
from .collections.multiset import Multiset
|
||||
from .env.profiles import Profiler
|
||||
from .env.logs import Log
|
||||
from .strings import expand_template
|
||||
from .structs.wraps import wrap, wrap_dot
|
||||
|
||||
|
||||
json_decoder = json.JSONDecoder().decode
|
||||
|
||||
|
||||
class CNV:
|
||||
|
@ -29,71 +38,97 @@ class CNV:
|
|||
@staticmethod
|
||||
def object2JSON(obj, pretty=False):
|
||||
try:
|
||||
return json_encoder.encode(obj, pretty=pretty)
|
||||
json = json_encoder(obj, pretty=pretty)
|
||||
if json == None:
|
||||
Log.note(str(type(obj)) + " is not valid{{type}}JSON", {"type": " (pretty) " if pretty else " "})
|
||||
Log.error("Not valid JSON: " + str(obj) + " of type " + str(type(obj)))
|
||||
return json
|
||||
except Exception, e:
|
||||
Log.error("Can not encode into JSON: {{value}}", {"value": repr(obj)}, e)
|
||||
|
||||
@staticmethod
|
||||
def JSON2object(json_string, params=None, flexible=False):
|
||||
try:
|
||||
#REMOVE """COMMENTS""", #COMMENTS, //COMMENTS, AND \n \r
|
||||
if flexible: json_string = re.sub(r"\"\"\".*?\"\"\"|\s+//.*\n|#.*?\n|\n|\r", r" ",
|
||||
json_string) #DERIVED FROM https://github.com/jeads/datasource/blob/master/datasource/bases/BaseHub.py#L58
|
||||
def JSON2object(json_string, params=None, flexible=False, paths=False):
|
||||
with Profiler("JSON2Object"):
|
||||
try:
|
||||
#REMOVE """COMMENTS""", #COMMENTS, //COMMENTS, AND \n \r
|
||||
if flexible:
|
||||
#DERIVED FROM https://github.com/jeads/datasource/blob/master/datasource/bases/BaseHub.py#L58
|
||||
json_string = re.sub(r"\"\"\".*?\"\"\"|[ \t]+//.*\n|^//.*\n|#.*?\n", r"\n", json_string)
|
||||
json_string = re.sub(r"\n//.*\n", r"\n\n", json_string)
|
||||
if params:
|
||||
params = dict([(k, CNV.value2quote(v)) for k, v in params.items()])
|
||||
json_string = expand_template(json_string, params)
|
||||
if isinstance(json_string, str):
|
||||
Log.error("only unicode json accepted")
|
||||
|
||||
if params:
|
||||
params = dict([(k, CNV.value2quote(v)) for k, v in params.items()])
|
||||
json_string = expand_template(json_string, params)
|
||||
value = wrap(json_decoder(json_string))
|
||||
|
||||
obj = json_decoder.decode(json_string)
|
||||
if isinstance(obj, list): return StructList(obj)
|
||||
return struct.wrap(obj)
|
||||
except Exception, e:
|
||||
Log.error("Can not decode JSON:\n\t" + json_string, e)
|
||||
if paths:
|
||||
value = wrap_dot(value)
|
||||
|
||||
return value
|
||||
|
||||
except Exception, e:
|
||||
Log.error("Can not decode JSON:\n\t" + str(json_string), e)
|
||||
|
||||
@staticmethod
|
||||
def string2datetime(value, format):
|
||||
## http://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
|
||||
if value == None:
|
||||
return None
|
||||
try:
|
||||
return datetime.datetime.strptime(value, format)
|
||||
except Exception, e:
|
||||
Log.error("Can not format {{value}} with {{format}}", {"value": value, "format": format}, e)
|
||||
|
||||
|
||||
@staticmethod
|
||||
def datetime2string(value, format):
|
||||
def datetime2string(value, format="%Y-%m-%d %H:%M:%S"):
|
||||
try:
|
||||
return value.strftime(format)
|
||||
except Exception, e:
|
||||
Log.error("Can not format {{value}} with {{format}}", {"value": value, "format": format}, e)
|
||||
|
||||
|
||||
@staticmethod
|
||||
def datetime2unix(d):
|
||||
if d == None:
|
||||
return None
|
||||
return long(time.mktime(d.timetuple()))
|
||||
|
||||
|
||||
@staticmethod
|
||||
def datetime2milli(d):
|
||||
try:
|
||||
epoch = datetime.datetime(1970, 1, 1)
|
||||
if d == None:
|
||||
return None
|
||||
elif isinstance(d, datetime.datetime):
|
||||
epoch = datetime.datetime(1970, 1, 1)
|
||||
elif isinstance(d, datetime.date):
|
||||
epoch = datetime.date(1970, 1, 1)
|
||||
else:
|
||||
Log.error("Can not convert {{value}} of type {{type}}", {"value": d, "type": d.__class__})
|
||||
|
||||
diff = d - epoch
|
||||
return (diff.days * 86400000) + \
|
||||
(diff.seconds * 1000) + \
|
||||
(diff.microseconds / 1000) # 86400000=24*3600*1000
|
||||
return long(diff.total_seconds()) * 1000L + long(diff.microseconds / 1000)
|
||||
except Exception, e:
|
||||
Log.error("Can not convert {{value}}", {"value": d})
|
||||
Log.error("Can not convert {{value}}", {"value": d}, e)
|
||||
|
||||
@staticmethod
|
||||
def timedelta2milli(v):
|
||||
return v.total_seconds()
|
||||
|
||||
@staticmethod
|
||||
def unix2datetime(u):
|
||||
return datetime.datetime.utcfromtimestamp(u)
|
||||
try:
|
||||
if u == None:
|
||||
return None
|
||||
if u == 9999999999: # PYPY BUG https://bugs.pypy.org/issue1697
|
||||
return datetime.datetime(2286, 11, 20, 17, 46, 39)
|
||||
return datetime.datetime.utcfromtimestamp(u)
|
||||
except Exception, e:
|
||||
Log.error("Can not convert {{value}} to datetime", {"value": u}, e)
|
||||
|
||||
@staticmethod
|
||||
def milli2datetime(u):
|
||||
return datetime.datetime.utcfromtimestamp(u / 1000)
|
||||
|
||||
return CNV.unix2datetime(u / 1000.0)
|
||||
|
||||
@staticmethod
|
||||
def dict2Multiset(dic):
|
||||
|
@ -113,14 +148,25 @@ class CNV:
|
|||
return None
|
||||
return dict(value.dic)
|
||||
|
||||
|
||||
@staticmethod
|
||||
def table2list(
|
||||
column_names, #tuple of columns names
|
||||
rows #list of tuples
|
||||
):
|
||||
return StructList([dict(zip(column_names, r)) for r in rows])
|
||||
return wrap([dict(zip(column_names, r)) for r in rows])
|
||||
|
||||
@staticmethod
|
||||
def list2tab(rows):
|
||||
columns = set()
|
||||
for r in rows:
|
||||
columns |= set(r.keys())
|
||||
keys = list(columns)
|
||||
|
||||
output = []
|
||||
for r in rows:
|
||||
output.append("\t".join(CNV.object2JSON(r[k]) for k in keys))
|
||||
|
||||
return "\t".join(keys) + "\n" + "\n".join(output)
|
||||
|
||||
#PROPER NULL HANDLING
|
||||
@staticmethod
|
||||
|
@ -140,8 +186,18 @@ class CNV:
|
|||
|
||||
@staticmethod
|
||||
def string2quote(value):
|
||||
# return repr(value)
|
||||
return "\"" + value.replace("\\", "\\\\").replace("\"", "\\\"") + "\""
|
||||
return jsons.quote(value)
|
||||
|
||||
@staticmethod
|
||||
def value2url(value):
|
||||
return urlencode(value)
|
||||
|
||||
@staticmethod
|
||||
def quote2string(value):
|
||||
if value[0] == "\"" and value[-1] == "\"":
|
||||
value = value[1:-1]
|
||||
|
||||
return value.replace("\\\\", "\\").replace("\\\"", "\"").replace("\\'", "'").replace("\\\n", "\n").replace("\\\t", "\t")
|
||||
|
||||
#RETURN PYTHON CODE FOR THE SAME
|
||||
@staticmethod
|
||||
|
@ -170,11 +226,26 @@ class CNV:
|
|||
def latin12hex(value):
|
||||
return value.encode("hex")
|
||||
|
||||
|
||||
@staticmethod
|
||||
def int2hex(value, size):
|
||||
return (("0" * size) + hex(value)[2:])[-size:]
|
||||
|
||||
@staticmethod
|
||||
def hex2bytearray(value):
|
||||
return bytearray(value.decode("hex"))
|
||||
|
||||
@staticmethod
|
||||
def bytearray2hex(value):
|
||||
return value.decode("latin1").encode("hex")
|
||||
|
||||
@staticmethod
|
||||
def base642bytearray(value):
|
||||
return bytearray(base64.b64decode(value))
|
||||
|
||||
@staticmethod
|
||||
def bytearray2base64(value):
|
||||
return base64.b64encode(value)
|
||||
|
||||
@staticmethod
|
||||
def value2intlist(value):
|
||||
if value == None:
|
||||
|
@ -187,7 +258,6 @@ class CNV:
|
|||
else:
|
||||
return [int(value)]
|
||||
|
||||
|
||||
@staticmethod
|
||||
def value2int(value):
|
||||
if value == None:
|
||||
|
@ -195,7 +265,6 @@ class CNV:
|
|||
else:
|
||||
return int(value)
|
||||
|
||||
|
||||
@staticmethod
|
||||
def value2number(v):
|
||||
try:
|
||||
|
@ -219,79 +288,126 @@ class CNV:
|
|||
|
||||
@staticmethod
|
||||
def latin12unicode(value):
|
||||
return unicode(value.decode('iso-8859-1'))
|
||||
if isinstance(value, unicode):
|
||||
Log.error("can not convert unicode from latin1")
|
||||
try:
|
||||
return unicode(value.decode('iso-8859-1'))
|
||||
except Exception, e:
|
||||
Log.error("Can not convert {{value|quote}} to unicode", {"value": value})
|
||||
|
||||
@staticmethod
|
||||
def esfilter2where(esfilter):
|
||||
"""
|
||||
CONVERT esfilter TO FUNCTION THAT WILL PERFORM THE FILTER
|
||||
WILL ADD row, rownum, AND rows AS CONTEXT VARIABLES FOR {"script":} IF NEEDED
|
||||
"""
|
||||
output = None
|
||||
condition = CNV._esfilter2where(esfilter)
|
||||
exec \
|
||||
"def result(row, rownum, rows):\n" + \
|
||||
" if " + condition + ":\n" + \
|
||||
" return True\n" + \
|
||||
" return False" + \
|
||||
"output = result"
|
||||
|
||||
def output(row, rownum=None, rows=None):
|
||||
return _filter(esfilter, row, rownum, rows)
|
||||
|
||||
return output
|
||||
|
||||
@staticmethod
|
||||
def _esfilter2where(esfilter):
|
||||
def isolate(separator, list):
|
||||
if len(list) > 1:
|
||||
return u"(\n" + indent((" " + separator + " \\\n").join(list)) + u"\n)"
|
||||
else:
|
||||
return list[0]
|
||||
def pipe2value(value):
|
||||
type = value[0]
|
||||
if type == '0':
|
||||
return None
|
||||
if type == 'n':
|
||||
return CNV.value2number(value[1::])
|
||||
|
||||
esfilter = struct.wrap(esfilter)
|
||||
if type != 's' and type != 'a':
|
||||
Log.error("unknown pipe type ({{type}}) in {{value}}", {"type": type, "value": value})
|
||||
|
||||
if esfilter[u"and"]:
|
||||
return isolate(u"and", [CNV._esfilter2where(a) for a in esfilter[u"and"]])
|
||||
elif esfilter[u"or"]:
|
||||
return isolate(u"or", [CNV._esfilter2where(a) for a in esfilter[u"or"]])
|
||||
elif esfilter[u"not"]:
|
||||
return u"not (" + CNV._esfilter2where(esfilter[u"not"]) + u")"
|
||||
elif esfilter.term:
|
||||
return isolate(u"and", [u"row." + col + u" == " + CNV.value2quote(val) for col, val in esfilter.term.items()])
|
||||
elif esfilter.terms:
|
||||
def single(col, vals):
|
||||
has_null = False
|
||||
for val in vals:
|
||||
if val == None:
|
||||
has_null = True
|
||||
break
|
||||
|
||||
if has_null:
|
||||
return u"(row." + col + u" == None or row." + col + u" in " + CNV.value2quote(v for v in vals if v != None)
|
||||
else:
|
||||
return u"row." + col + u" in " + CNV.value2quote(vals)
|
||||
|
||||
return isolate(u"and", [single(col, vals) for col, vals in esfilter.terms])
|
||||
elif esfilter.script:
|
||||
return u"(" + esfilter.script + u")"
|
||||
elif esfilter.range:
|
||||
name2sign = {
|
||||
u"gt": u">",
|
||||
u"gte": u">=",
|
||||
u"lte": u"<=",
|
||||
u"lt": u"<"
|
||||
}
|
||||
|
||||
def single(col, ranges):
|
||||
return u" and ".join(u"row." + col + name2sign[sign] + CNV.value2quote(value) for sign, value in ranges.items())
|
||||
|
||||
output = isolate(u"and", [single(col, ranges) for col, ranges in esfilter.range.items()])
|
||||
# EXPECTING MOST STRINGS TO NOT HAVE ESCAPED CHARS
|
||||
output = unPipe(value)
|
||||
if type == 's':
|
||||
return output
|
||||
elif esfilter.missing:
|
||||
if isinstance(esfilter.missing, basestring):
|
||||
return esfilter.missing + u" == None"
|
||||
else:
|
||||
return esfilter.missing.field + u" == None"
|
||||
elif esfilter.exists:
|
||||
if isinstance(esfilter.exists, basestring):
|
||||
return esfilter.exists + u" != None"
|
||||
else:
|
||||
return esfilter.exists.field + u" != None"
|
||||
|
||||
return [CNV.pipe2value(v) for v in output.split("|")]
|
||||
|
||||
|
||||
def unPipe(value):
|
||||
s = value.find("\\", 1)
|
||||
if s < 0:
|
||||
return value[1::]
|
||||
|
||||
result = ""
|
||||
e = 1
|
||||
while True:
|
||||
c = value[s + 1]
|
||||
if c == 'p':
|
||||
result = result + value[e:s] + '|'
|
||||
s += 2
|
||||
e = s
|
||||
elif c == '\\':
|
||||
result = result + value[e:s] + '\\'
|
||||
s += 2
|
||||
e = s
|
||||
else:
|
||||
Log.error(u"Can not convert esfilter to SQL: {{esfilter}}", {u"esfilter": esfilter})
|
||||
s += 1
|
||||
|
||||
s = value.find("\\", s)
|
||||
if s < 0:
|
||||
break
|
||||
return result + value[e::]
|
||||
|
||||
|
||||
def _filter(esfilter, row, rownum, rows):
|
||||
esfilter = wrap(esfilter)
|
||||
|
||||
if esfilter[u"and"]:
|
||||
for a in esfilter[u"and"]:
|
||||
if not _filter(a, row, rownum, rows):
|
||||
return False
|
||||
return True
|
||||
elif esfilter[u"or"]:
|
||||
for a in esfilter[u"and"]:
|
||||
if _filter(a, row, rownum, rows):
|
||||
return True
|
||||
return False
|
||||
elif esfilter[u"not"]:
|
||||
return not _filter(esfilter[u"not"], row, rownum, rows)
|
||||
elif esfilter.term:
|
||||
for col, val in esfilter.term.items():
|
||||
if row[col] != val:
|
||||
return False
|
||||
return True
|
||||
elif esfilter.terms:
|
||||
for col, vals in esfilter.terms.items():
|
||||
if not row[col] in vals:
|
||||
return False
|
||||
return True
|
||||
elif esfilter.range:
|
||||
for col, ranges in esfilter.range.items():
|
||||
for sign, val in ranges.items():
|
||||
if sign in ("gt", ">") and row[col] <= val:
|
||||
return False
|
||||
if sign == "gte" and row[col] < val:
|
||||
return False
|
||||
if sign == "lte" and row[col] > val:
|
||||
return False
|
||||
if sign == "lt" and row[col] >= val:
|
||||
return False
|
||||
return True
|
||||
elif esfilter.missing:
|
||||
if isinstance(esfilter.missing, basestring):
|
||||
field = esfilter.missing
|
||||
else:
|
||||
field = esfilter.missing.field
|
||||
|
||||
if row[field] == None:
|
||||
return True
|
||||
return False
|
||||
|
||||
elif esfilter.exists:
|
||||
if isinstance(esfilter.missing, basestring):
|
||||
field = esfilter.missing
|
||||
else:
|
||||
field = esfilter.missing.field
|
||||
|
||||
if row[field] != None:
|
||||
return True
|
||||
return False
|
||||
else:
|
||||
Log.error(u"Can not convert esfilter to SQL: {{esfilter}}", {u"esfilter": esfilter})
|
||||
|
||||
|
|
|
@ -0,0 +1,143 @@
|
|||
import types
|
||||
import math
|
||||
from ..collections.multiset import Multiset
|
||||
from ..struct import Null
|
||||
|
||||
__author__ = 'klahnakoski'
|
||||
|
||||
|
||||
|
||||
def reverse(values):
|
||||
"""
|
||||
REVERSE - WITH NO SIDE EFFECTS!
|
||||
"""
|
||||
output = list(values)
|
||||
output.reverse()
|
||||
return output
|
||||
|
||||
|
||||
def MIN(*values):
|
||||
if isinstance(values, tuple) and len(values) == 1 and isinstance(values[0], (list, set, tuple, Multiset, types.GeneratorType)):
|
||||
values = values[0]
|
||||
output = Null
|
||||
for v in values:
|
||||
if v == None:
|
||||
continue
|
||||
if isinstance(v, float) and math.isnan(v):
|
||||
continue
|
||||
if output == None:
|
||||
output = v
|
||||
continue
|
||||
output = min(output, v)
|
||||
return output
|
||||
|
||||
|
||||
def MAX(*values):
|
||||
if isinstance(values, tuple) and len(values) == 1 and isinstance(values[0], (list, set, tuple, Multiset, types.GeneratorType)):
|
||||
values = values[0]
|
||||
output = Null
|
||||
for v in values:
|
||||
if v == None:
|
||||
continue
|
||||
if isinstance(v, float) and math.isnan(v):
|
||||
continue
|
||||
if output == None:
|
||||
output = v
|
||||
continue
|
||||
output = max(output, v)
|
||||
return output
|
||||
|
||||
|
||||
def PRODUCT(*values):
|
||||
if isinstance(values, tuple) and len(values) == 1 and isinstance(values[0], (list, set, tuple, Multiset, types.GeneratorType)):
|
||||
values = values[0]
|
||||
output = Null
|
||||
for v in values:
|
||||
if v == None:
|
||||
continue
|
||||
if isinstance(v, float) and math.isnan(v):
|
||||
continue
|
||||
if output == None:
|
||||
output = v
|
||||
continue
|
||||
output *= v
|
||||
return output
|
||||
|
||||
def SUM(*values):
|
||||
if isinstance(values, tuple) and len(values) == 1 and isinstance(values[0], (list, set, tuple, Multiset, types.GeneratorType)):
|
||||
values = values[0]
|
||||
output = Null
|
||||
for v in values:
|
||||
if v == None:
|
||||
continue
|
||||
if isinstance(v, float) and math.isnan(v):
|
||||
continue
|
||||
if output == None:
|
||||
output = v
|
||||
continue
|
||||
output += v
|
||||
return output
|
||||
|
||||
|
||||
def COUNT(*values):
|
||||
if isinstance(values, tuple) and len(values) == 1 and isinstance(values[0], (list, set, tuple, Multiset, types.GeneratorType)):
|
||||
values = values[0]
|
||||
output = 0
|
||||
for v in values:
|
||||
if v != None:
|
||||
output += 1
|
||||
return output
|
||||
|
||||
|
||||
def SUM(*values):
|
||||
if isinstance(values, tuple) and len(values) == 1 and isinstance(values[0], (list, set, tuple, Multiset, types.GeneratorType)):
|
||||
values = values[0]
|
||||
output = Null
|
||||
for v in values:
|
||||
if v == None:
|
||||
continue
|
||||
if isinstance(v, float) and math.isnan(v):
|
||||
continue
|
||||
if output == None:
|
||||
output = v
|
||||
continue
|
||||
output += v
|
||||
return output
|
||||
|
||||
|
||||
def AND(*values):
|
||||
if isinstance(values, tuple) and len(values) == 1 and isinstance(values[0], (list, set, tuple, Multiset, types.GeneratorType)):
|
||||
values = values[0]
|
||||
for v in values:
|
||||
if v == None:
|
||||
continue
|
||||
if not v:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def OR(*values):
|
||||
if isinstance(values, tuple) and len(values) == 1 and isinstance(values[0], (list, set, tuple, Multiset, types.GeneratorType)):
|
||||
values = values[0]
|
||||
for v in values:
|
||||
if v == None:
|
||||
continue
|
||||
if v:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def UNION(*values):
|
||||
if isinstance(values, tuple) and len(values) == 1 and isinstance(values[0], (list, set, tuple, Multiset, types.GeneratorType)):
|
||||
values = values[0]
|
||||
|
||||
output = set()
|
||||
for v in values:
|
||||
if values == None:
|
||||
continue
|
||||
if isinstance(v, (list, set)):
|
||||
output.update(v)
|
||||
continue
|
||||
else:
|
||||
output.add(v)
|
||||
return output
|
|
@ -0,0 +1,276 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from ..collections import PRODUCT, reverse, MAX, MIN
|
||||
from ..cnv import CNV
|
||||
from ..env.logs import Log
|
||||
from ..struct import Null, Struct
|
||||
from ..structs.wraps import wrap
|
||||
|
||||
|
||||
class Matrix(object):
|
||||
"""
|
||||
SIMPLE n-DIMENSIONAL ARRAY OF OBJECTS
|
||||
"""
|
||||
ZERO = None
|
||||
|
||||
def __init__(self, *dims, **kwargs):
|
||||
kwargs = wrap(kwargs)
|
||||
list = kwargs.list
|
||||
if list:
|
||||
self.num = 1
|
||||
self.dims = (len(list), )
|
||||
self.cube = list
|
||||
return
|
||||
|
||||
value = kwargs.value
|
||||
if value != None:
|
||||
self.num = 0
|
||||
self.dims = tuple()
|
||||
self.cube = value
|
||||
return
|
||||
|
||||
self.num = len(dims)
|
||||
self.dims = tuple(dims)
|
||||
if self.num == 0:
|
||||
self.cube = Null
|
||||
else:
|
||||
self.cube = _null(*dims)
|
||||
|
||||
@staticmethod
|
||||
def wrap(array):
|
||||
output = Matrix()
|
||||
output.num = 1
|
||||
output.dims = (len(array), )
|
||||
output.cube = array
|
||||
return output
|
||||
|
||||
def __getitem__(self, index):
|
||||
if isinstance(index, list):
|
||||
m = self.cube
|
||||
for k in index:
|
||||
m = m[k]
|
||||
return m
|
||||
if isinstance(index, slice):
|
||||
pass
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
try:
|
||||
if len(key) != self.num:
|
||||
Log.error("Expecting coordinates to match the number of dimensions")
|
||||
last = self.num - 1
|
||||
m = self.cube
|
||||
for k in key[0:last:]:
|
||||
m = m[k]
|
||||
m[key[last]] = value
|
||||
except Exception, e:
|
||||
Log.error("can not set item", e)
|
||||
|
||||
def __bool__(self):
|
||||
return self.cube != None
|
||||
|
||||
def __nonzero__(self):
|
||||
return self.cube != None
|
||||
|
||||
def __len__(self):
|
||||
if self.num == 0:
|
||||
return 0
|
||||
return PRODUCT(self.dims)
|
||||
|
||||
@property
|
||||
def value(self):
|
||||
if self.num:
|
||||
Log.error("can not get value of with dimension")
|
||||
return self.cube
|
||||
|
||||
def __lt__(self, other):
|
||||
return self.value < other
|
||||
|
||||
def __gt__(self, other):
|
||||
return self.value > other
|
||||
|
||||
def __eq__(self, other):
|
||||
if other == None:
|
||||
return False
|
||||
return self.value == other
|
||||
|
||||
def __add__(self, other):
|
||||
return self.value + other
|
||||
|
||||
def __radd__(self, other):
|
||||
return other + self.value
|
||||
|
||||
def __sub__(self, other):
|
||||
return self.value - other
|
||||
|
||||
def __rsub__(self, other):
|
||||
return other - self.value
|
||||
|
||||
def __mul__(self, other):
|
||||
return self.value * other
|
||||
|
||||
def __rmul__(self, other):
|
||||
return other * self.value
|
||||
|
||||
def __div__(self, other):
|
||||
return self.value / other
|
||||
|
||||
def __rdiv__(self, other):
|
||||
return other / self.value
|
||||
|
||||
def __iter__(self):
|
||||
return (self[c] for c in self._all_combos())
|
||||
|
||||
def __float__(self):
|
||||
return self.value
|
||||
|
||||
def groupby(self, io_select):
|
||||
"""
|
||||
SLICE THIS MATRIX INTO ONES WITH LESS DIMENSIONALITY
|
||||
"""
|
||||
|
||||
#offsets WILL SERVE TO MASK DIMS WE ARE NOT GROUPING BY, AND SERVE AS RELATIVE INDEX FOR EACH COORDINATE
|
||||
offsets = []
|
||||
new_dim = []
|
||||
acc = 1
|
||||
for i, d in reverse(enumerate(self.dims)):
|
||||
if not io_select[i]:
|
||||
new_dim.insert(0, d)
|
||||
offsets.insert(0, acc * io_select[i])
|
||||
acc *= d
|
||||
|
||||
if not new_dim:
|
||||
# WHEN groupby ALL DIMENSIONS, ONLY THE VALUES REMAIN
|
||||
# RETURN AN ITERATOR OF PAIRS (c, v), WHERE
|
||||
# c - COORDINATES INTO THE CUBE
|
||||
# v - VALUE AT GIVEN COORDINATES
|
||||
return ((c, self[c]) for c in self._all_combos())
|
||||
else:
|
||||
output = [[None, Matrix(*new_dim)] for i in range(acc)]
|
||||
_groupby(self.cube, 0, offsets, 0, output, tuple(), [])
|
||||
|
||||
return output
|
||||
|
||||
def aggregate(self, type):
|
||||
func = aggregates[type]
|
||||
if not type:
|
||||
Log.error("Aggregate of type {{type}} is not supported yet", {"type": type})
|
||||
|
||||
return func(self.num, self.cube)
|
||||
|
||||
|
||||
def forall(self, method):
|
||||
"""
|
||||
IT IS EXPECTED THE method ACCEPTS (value, coord, cube), WHERE
|
||||
value - VALUE FOUND AT ELEMENT
|
||||
coord - THE COORDINATES OF THE ELEMENT (PLEASE, READ ONLY)
|
||||
cube - THE WHOLE CUBE, FOR USE IN WINDOW FUNCTIONS
|
||||
"""
|
||||
for c in self._all_combos():
|
||||
method(self[c], c, self.cube)
|
||||
|
||||
|
||||
def _all_combos(self):
|
||||
"""
|
||||
RETURN AN ITERATOR OF ALL COORDINATES
|
||||
"""
|
||||
num = self.num
|
||||
dim = self.dims
|
||||
|
||||
combos = PRODUCT(dim)
|
||||
if not combos:
|
||||
return
|
||||
|
||||
c = [0]*num # THE CORRECT SIZE
|
||||
while True:
|
||||
yield c
|
||||
|
||||
for i in range(num-1, -1, -1):
|
||||
c[i] += 1
|
||||
if c[i] < dim[i]:
|
||||
break
|
||||
c[i] = 0
|
||||
else:
|
||||
break
|
||||
|
||||
|
||||
def __str__(self):
|
||||
return "Matrix " + CNV.object2JSON(self.dims) + ": " + str(self.cube)
|
||||
|
||||
def __json__(self):
|
||||
return CNV.object2JSON(self.cube)
|
||||
|
||||
|
||||
Matrix.ZERO = Matrix(value=None)
|
||||
|
||||
def _max(depth, cube):
|
||||
if depth == 0:
|
||||
return cube
|
||||
elif depth == 1:
|
||||
return MAX(cube)
|
||||
else:
|
||||
return MAX(_max(depth - 1, c) for c in cube)
|
||||
|
||||
|
||||
def _min(depth, cube):
|
||||
if depth == 0:
|
||||
return cube
|
||||
elif depth == 1:
|
||||
return MIN(cube)
|
||||
else:
|
||||
return MIN(_min(depth - 1, c) for c in cube)
|
||||
|
||||
|
||||
aggregates = Struct(
|
||||
max=_max,
|
||||
maximum=_max,
|
||||
min=_min,
|
||||
minimum=_min
|
||||
)
|
||||
|
||||
|
||||
def _iter(cube, depth):
|
||||
if depth == 1:
|
||||
return cube.__iter__()
|
||||
else:
|
||||
def iterator():
|
||||
for c in cube:
|
||||
for b in _iter(c, depth - 1):
|
||||
yield b
|
||||
|
||||
return iterator()
|
||||
|
||||
|
||||
def _null(*dims):
|
||||
d0 = dims[0]
|
||||
if d0 == 0:
|
||||
Log.error("Zero dimensions not allowed")
|
||||
if len(dims) == 1:
|
||||
return [Null for i in range(d0)]
|
||||
else:
|
||||
return [_null(*dims[1::]) for i in range(d0)]
|
||||
|
||||
|
||||
def _groupby(cube, depth, intervals, offset, output, group, new_coord):
|
||||
if depth == len(intervals):
|
||||
output[offset][0] = group
|
||||
output[offset][1][new_coord] = cube
|
||||
return
|
||||
|
||||
interval = intervals[depth]
|
||||
|
||||
if interval:
|
||||
for i, c in enumerate(cube):
|
||||
_groupby(c, depth + 1, intervals, offset + i * interval, output, group + ( i, ), new_coord)
|
||||
else:
|
||||
for i, c in enumerate(cube):
|
||||
_groupby(c, depth + 1, intervals, offset, output, group + (-1, ), new_coord + [i])
|
||||
|
||||
|
|
@ -8,17 +8,42 @@
|
|||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
|
||||
def Multiset(list=None, key_field=None, count_field=None, allow_negative=False):
|
||||
if allow_negative:
|
||||
return _NegMultiset(list, key_field, count_field)
|
||||
else:
|
||||
return _Multiset(list, key_field, count_field)
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
class _Multiset(object):
|
||||
class Multiset(object):
|
||||
"""
|
||||
Multiset IS ONE MEMBER IN A FAMILY OF USEFUL CONTAINERS
|
||||
|
||||
def __init__(self, list=None, key_field=None, count_field=None):
|
||||
+------------+---------+----------+
|
||||
| Uniqueness | Ordered | Type |
|
||||
+------------+---------+----------+
|
||||
| Yes | Yes | Queue |
|
||||
| Yes | No | Set |
|
||||
| No | Yes | List |
|
||||
| No | No | Multiset |
|
||||
+------------+---------+----------+
|
||||
"""
|
||||
|
||||
def __new__(cls, list=None, key_field=None, count_field=None, allow_negative=False):
|
||||
try:
|
||||
if allow_negative:
|
||||
return _NegMultiset(list, key_field, count_field)
|
||||
else:
|
||||
return _Multiset(list, key_field, count_field)
|
||||
except Exception, e:
|
||||
from ..env.logs import Log
|
||||
|
||||
Log.error("Not expected", e)
|
||||
|
||||
|
||||
class _Multiset(Multiset):
|
||||
|
||||
def __new__(cls, *args):
|
||||
return object.__new__(cls, *args)
|
||||
|
||||
|
||||
def __init__(self, list=None, key_field=None, count_field=None, **kwargs):
|
||||
if not key_field and not count_field:
|
||||
self.dic = dict()
|
||||
if list:
|
||||
|
@ -26,8 +51,8 @@ class _Multiset(object):
|
|||
self.add(i)
|
||||
return
|
||||
else:
|
||||
self.dic={i[key_field]:i[count_field] for i in list}
|
||||
|
||||
self.dic = {i[key_field]: i[count_field] for i in list}
|
||||
|
||||
|
||||
def __iter__(self):
|
||||
for k, m in self.dic.items():
|
||||
|
@ -43,9 +68,9 @@ class _Multiset(object):
|
|||
|
||||
def add(self, value):
|
||||
if value in self.dic:
|
||||
self.dic[value]+=1
|
||||
self.dic[value] += 1
|
||||
else:
|
||||
self.dic[value]=1
|
||||
self.dic[value] = 1
|
||||
return self
|
||||
|
||||
def extend(self, values):
|
||||
|
@ -54,37 +79,43 @@ class _Multiset(object):
|
|||
|
||||
def remove(self, value):
|
||||
if value not in self.dic:
|
||||
from .logs import Log
|
||||
Log.error("{{value}} is not in multiset", {"value":value})
|
||||
from ..env.logs import Log
|
||||
|
||||
Log.error("{{value}} is not in multiset", {"value": value})
|
||||
self._remove(value)
|
||||
|
||||
def copy(self):
|
||||
output = _Multiset()
|
||||
output.dic=self.dic.copy()
|
||||
output.dic = self.dic.copy()
|
||||
return output
|
||||
|
||||
|
||||
def _remove(self, value):
|
||||
count=self.dic.get(value, None)
|
||||
count = self.dic.get(value, None)
|
||||
if count == None:
|
||||
return
|
||||
|
||||
count-=1
|
||||
if count==0:
|
||||
del(self.dic[value])
|
||||
count -= 1
|
||||
if count == 0:
|
||||
del (self.dic[value])
|
||||
else:
|
||||
self.dic[value]=count
|
||||
self.dic[value] = count
|
||||
|
||||
def __sub__(self, other):
|
||||
output=self.copy()
|
||||
output = self.copy()
|
||||
for o in other:
|
||||
output._remove(o)
|
||||
return output
|
||||
|
||||
def __add__(self, other):
|
||||
output=self.copy()
|
||||
for o in other:
|
||||
output.add(o)
|
||||
output = self.copy()
|
||||
|
||||
if isinstance(other, Multiset):
|
||||
for k, c in other.dic.items():
|
||||
output.dic[k] = output.dic.get(k, 0) + c
|
||||
else:
|
||||
for o in other:
|
||||
output.add(o)
|
||||
return output
|
||||
|
||||
def __set__(self, other):
|
||||
|
@ -106,10 +137,11 @@ class _Multiset(object):
|
|||
return 0
|
||||
|
||||
|
||||
class _NegMultiset(Multiset):
|
||||
def __new__(cls, *args, **kwargs):
|
||||
return object.__new__(cls, *args, **kwargs)
|
||||
|
||||
class _NegMultiset(object):
|
||||
|
||||
def __init__(self, list=None, key_field=None, count_field=None):
|
||||
def __init__(self, list=None, key_field=None, count_field=None, **kwargs):
|
||||
if not key_field and not count_field:
|
||||
self.dic = dict()
|
||||
if list:
|
||||
|
@ -117,7 +149,7 @@ class _NegMultiset(object):
|
|||
self.add(i)
|
||||
return
|
||||
else:
|
||||
self.dic={i[key_field]:i[count_field] for i in list}
|
||||
self.dic = {i[key_field]: i[count_field] for i in list}
|
||||
|
||||
|
||||
# def __iter__(self):
|
||||
|
@ -158,12 +190,12 @@ class _NegMultiset(object):
|
|||
|
||||
def copy(self):
|
||||
output = _NegMultiset()
|
||||
output.dic=self.dic.copy()
|
||||
output.dic = self.dic.copy()
|
||||
return output
|
||||
|
||||
|
||||
def __add__(self, other):
|
||||
output=self.copy()
|
||||
output = self.copy()
|
||||
|
||||
if isinstance(other, _NegMultiset):
|
||||
for k, c in other.dic.items():
|
||||
|
@ -178,7 +210,7 @@ class _NegMultiset(object):
|
|||
if not other:
|
||||
return self
|
||||
|
||||
output=self.copy()
|
||||
output = self.copy()
|
||||
for o in other:
|
||||
output.remove(o)
|
||||
return output
|
|
@ -0,0 +1,52 @@
|
|||
|
||||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
class Queue(object):
|
||||
"""
|
||||
A SET WITH ADDED ORDER MAINTAINED
|
||||
|
||||
+------------+---------+----------+
|
||||
| Uniqueness | Ordered | Type |
|
||||
+------------+---------+----------+
|
||||
| Yes | Yes | Queue |
|
||||
| Yes | No | Set |
|
||||
| No | Yes | List |
|
||||
| No | No | Multiset |
|
||||
+------------+---------+----------+
|
||||
"""
|
||||
def __init__(self):
|
||||
self.list = []
|
||||
|
||||
def __nonzero__(self):
|
||||
return len(self.list) > 0
|
||||
|
||||
def __len__(self):
|
||||
return self.list.__len__()
|
||||
|
||||
def add(self, value):
|
||||
if value in self.list:
|
||||
return self
|
||||
self.list.append(value)
|
||||
|
||||
def extend(self, values):
|
||||
for v in values:
|
||||
self.add(v)
|
||||
|
||||
def pop(self):
|
||||
if len(self.list) == 0:
|
||||
return None
|
||||
|
||||
output = self.list.pop(0)
|
||||
return output
|
||||
|
|
@ -0,0 +1,97 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from ..collections import SUM
|
||||
|
||||
class Relation_usingList(object):
|
||||
def __init__(self):
|
||||
self.all=set()
|
||||
|
||||
def len(self):
|
||||
return len(self.all)
|
||||
|
||||
def add(self, key, value):
|
||||
test = (key, value)
|
||||
if test not in self.all:
|
||||
self.all.add(test)
|
||||
|
||||
def testAndAdd(self, key, value):
|
||||
"""
|
||||
RETURN TRUE IF THIS RELATION IS NET-NEW
|
||||
"""
|
||||
test = (key, value)
|
||||
if test not in self.all:
|
||||
self.all.add(test)
|
||||
return True
|
||||
return False
|
||||
|
||||
def extend(self, key, values):
|
||||
for v in values:
|
||||
self.add(key, v)
|
||||
|
||||
def __getitem__(self, key):
|
||||
"""
|
||||
RETURN AN ARRAY OF OBJECTS THAT key MAPS TO
|
||||
"""
|
||||
return [v for k, v in self.all if k == key]
|
||||
|
||||
class Relation(object):
|
||||
def __init__(self):
|
||||
self.map = dict()
|
||||
|
||||
def len(self):
|
||||
return SUM(len(v) for k, v in self.map.items())
|
||||
|
||||
def add(self, key, value):
|
||||
to = self.map.get(key, None)
|
||||
if to is None:
|
||||
to = set()
|
||||
self.map[key] = to
|
||||
to.add(value)
|
||||
|
||||
def testAndAdd(self, key, value):
|
||||
"""
|
||||
RETURN TRUE IF THIS RELATION IS NET-NEW
|
||||
"""
|
||||
to = self.map.get(key, None)
|
||||
if to is None:
|
||||
to = set()
|
||||
self.map[key] = to
|
||||
to.add(value)
|
||||
return True
|
||||
|
||||
if value in to:
|
||||
return False
|
||||
to.add(value)
|
||||
return True
|
||||
|
||||
def extend(self, key, values):
|
||||
to = self.map.get(key, None)
|
||||
if not to:
|
||||
to = set(values)
|
||||
self.map[key] = to
|
||||
return
|
||||
|
||||
to.update(values)
|
||||
|
||||
def __getitem__(self, key):
|
||||
"""
|
||||
RETURN AN ARRAY OF OBJECTS THAT key MAPS TO
|
||||
"""
|
||||
o = self.map.get(key, None)
|
||||
if not o:
|
||||
return set()
|
||||
return o
|
||||
|
||||
def domain(self):
|
||||
return self.map.keys()
|
||||
|
||||
|
|
@ -1,344 +0,0 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from datetime import datetime
|
||||
import re
|
||||
import sha
|
||||
import time
|
||||
|
||||
import requests
|
||||
from .threads import ThreadedQueue
|
||||
|
||||
import struct
|
||||
from .maths import Math
|
||||
from .queries import Q
|
||||
from .cnv import CNV
|
||||
from .logs import Log
|
||||
from .struct import nvl, Null
|
||||
from .struct import Struct, StructList
|
||||
|
||||
DEBUG = False
|
||||
|
||||
|
||||
class ElasticSearch(object):
|
||||
def __init__(self, settings):
|
||||
assert settings.host
|
||||
assert settings.index
|
||||
assert settings.type
|
||||
|
||||
if settings.index == settings.alias:
|
||||
Log.error("must have a unique index name")
|
||||
self.metadata = None
|
||||
if not settings.port:
|
||||
settings.port = 9200
|
||||
self.debug = nvl(settings.debug, DEBUG)
|
||||
globals()["DEBUG"] = DEBUG or self.debug
|
||||
|
||||
self.settings = settings
|
||||
self.path = settings.host + ":" + unicode(settings.port) + "/" + settings.index + "/" + settings.type
|
||||
|
||||
@staticmethod
|
||||
def create_index(settings, schema):
|
||||
if isinstance(schema, basestring):
|
||||
schema = CNV.JSON2object(schema)
|
||||
|
||||
ElasticSearch.post(
|
||||
settings.host + ":" + unicode(settings.port) + "/" + settings.index,
|
||||
data=CNV.object2JSON(schema),
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
time.sleep(2)
|
||||
es = ElasticSearch(settings)
|
||||
return es
|
||||
|
||||
@staticmethod
|
||||
def delete_index(settings, index=None):
|
||||
index = nvl(index, settings.index)
|
||||
|
||||
ElasticSearch.delete(
|
||||
settings.host + ":" + unicode(settings.port) + "/" + index,
|
||||
)
|
||||
|
||||
def get_aliases(self):
|
||||
"""
|
||||
RETURN LIST OF {"alias":a, "index":i} PAIRS
|
||||
ALL INDEXES INCLUDED, EVEN IF NO ALIAS {"alias":Null}
|
||||
"""
|
||||
data = self.get_metadata().indices
|
||||
output = []
|
||||
for index, desc in data.items():
|
||||
if not desc["aliases"]:
|
||||
output.append({"index": index, "alias": None})
|
||||
else:
|
||||
for a in desc["aliases"]:
|
||||
output.append({"index": index, "alias": a})
|
||||
return struct.wrap(output)
|
||||
|
||||
def get_metadata(self):
|
||||
if not self.metadata:
|
||||
response = self.get(self.settings.host + ":" + unicode(self.settings.port) + "/_cluster/state")
|
||||
self.metadata = response.metadata
|
||||
return self.metadata
|
||||
|
||||
def get_schema(self):
|
||||
return self.get_metadata().indicies[self.settings.index]
|
||||
|
||||
#DELETE ALL INDEXES WITH GIVEN PREFIX, EXCEPT name
|
||||
def delete_all_but(self, prefix, name):
|
||||
if prefix == name:
|
||||
Log.note("{{index_name}} will not be deleted", {"index_name": prefix})
|
||||
for a in self.get_aliases():
|
||||
# MATCH <prefix>YYMMDD_HHMMSS FORMAT
|
||||
if re.match(re.escape(prefix) + "\\d{8}_\\d{6}", a.index) and a.index != name:
|
||||
ElasticSearch.delete_index(self.settings, a.index)
|
||||
|
||||
@staticmethod
|
||||
def proto_name(prefix, timestamp=None):
|
||||
if not timestamp:
|
||||
timestamp = datetime.utcnow()
|
||||
return prefix + CNV.datetime2string(timestamp, "%Y%m%d_%H%M%S")
|
||||
|
||||
def add_alias(self, alias):
|
||||
self.metadata = None
|
||||
requests.post(
|
||||
self.settings.host + ":" + unicode(self.settings.port) + "/_aliases",
|
||||
CNV.object2JSON({
|
||||
"actions": [
|
||||
{"add": {"index": self.settings.index, "alias": alias}}
|
||||
]
|
||||
})
|
||||
)
|
||||
|
||||
def get_proto(self, alias):
|
||||
"""
|
||||
RETURN ALL INDEXES THAT ARE INTENDED TO BE GIVEN alias, BUT HAVE NO
|
||||
ALIAS YET BECAUSE INCOMPLETE
|
||||
"""
|
||||
output = Q.sort([
|
||||
a.index
|
||||
for a in self.get_aliases()
|
||||
if re.match(re.escape(alias) + "\\d{8}_\\d{6}", a.index) and not a.alias
|
||||
])
|
||||
return output
|
||||
|
||||
def get_index(self, alias):
|
||||
"""
|
||||
RETURN THE INDEX USED BY THIS alias
|
||||
"""
|
||||
output = Q.sort([
|
||||
a.index
|
||||
for a in self.get_aliases()
|
||||
if a.alias == alias
|
||||
])
|
||||
if len(output) > 1:
|
||||
Log.error("only one index with given alias==\"{{alias}}\" expected", {"alias": alias})
|
||||
|
||||
if not output:
|
||||
return Null
|
||||
|
||||
return output.last()
|
||||
|
||||
def is_proto(self, index):
|
||||
"""
|
||||
RETURN True IF THIS INDEX HAS NOT BEEN ASSIGNED IT'S ALIAS
|
||||
"""
|
||||
for a in self.get_aliases():
|
||||
if a.index == index and a.alias:
|
||||
return False
|
||||
return True
|
||||
|
||||
def delete_record(self, query):
|
||||
if isinstance(query, dict):
|
||||
ElasticSearch.delete(
|
||||
self.path + "/_query",
|
||||
data=CNV.object2JSON(query)
|
||||
)
|
||||
else:
|
||||
ElasticSearch.delete(
|
||||
self.path + "/" + query
|
||||
)
|
||||
|
||||
def extend(self, records):
|
||||
# ADD LINE WITH COMMAND
|
||||
lines = []
|
||||
for r in records:
|
||||
id = r["id"]
|
||||
if "json" in r:
|
||||
json = r["json"]
|
||||
elif "value" in r:
|
||||
json = CNV.object2JSON(r["value"])
|
||||
else:
|
||||
Log.error("Expecting every record given to have \"value\" or \"json\" property")
|
||||
|
||||
if id == None:
|
||||
id = sha.new(json).hexdigest()
|
||||
|
||||
lines.append(u'{"index":{"_id":' + CNV.object2JSON(id) + '}}')
|
||||
lines.append(json)
|
||||
|
||||
if not lines: return
|
||||
response = ElasticSearch.post(
|
||||
self.path + "/_bulk",
|
||||
data="\n".join(lines).encode("utf8") + "\n",
|
||||
headers={"Content-Type": "text"}
|
||||
)
|
||||
items = response["items"]
|
||||
|
||||
for i, item in enumerate(items):
|
||||
if not item.index.ok:
|
||||
Log.error("{{error}} while loading line:\n{{line}}", {
|
||||
"error": item.index.error,
|
||||
"line": lines[i * 2 + 1]
|
||||
})
|
||||
|
||||
if self.debug:
|
||||
Log.note("{{num}} items added", {"num": len(lines) / 2})
|
||||
|
||||
# RECORDS MUST HAVE id AND json AS A STRING OR
|
||||
# HAVE id AND value AS AN OBJECT
|
||||
def add(self, record):
|
||||
if isinstance(record, list):
|
||||
Log.error("add() has changed to only accept one record, no lists")
|
||||
self.extend([record])
|
||||
|
||||
# -1 FOR NO REFRESH
|
||||
def set_refresh_interval(self, seconds):
|
||||
if seconds <= 0:
|
||||
interval = "-1"
|
||||
else:
|
||||
interval = unicode(seconds) + "s"
|
||||
|
||||
response = ElasticSearch.put(
|
||||
self.settings.host + ":" + unicode(
|
||||
self.settings.port) + "/" + self.settings.index + "/_settings",
|
||||
data="{\"index.refresh_interval\":\"" + interval + "\"}"
|
||||
)
|
||||
|
||||
if response.content != '{"ok":true}':
|
||||
Log.error("Can not set refresh interval ({{error}})", {
|
||||
"error": response.content
|
||||
})
|
||||
|
||||
def search(self, query):
|
||||
try:
|
||||
if DEBUG:
|
||||
Log.note("Query:\n{{query|indent}}", {"query": query})
|
||||
return ElasticSearch.post(self.path + "/_search", data=CNV.object2JSON(query))
|
||||
except Exception, e:
|
||||
Log.error("Problem with search (path={{path}}):\n{{query|indent}}", {
|
||||
"path": self.path + "/_search",
|
||||
"query": query
|
||||
}, e)
|
||||
|
||||
def threaded_queue(self, size):
|
||||
return ThreadedQueue(self, size)
|
||||
|
||||
@staticmethod
|
||||
def post(*args, **kwargs):
|
||||
try:
|
||||
response = requests.post(*args, **kwargs)
|
||||
if DEBUG:
|
||||
Log.note(response.content[:130])
|
||||
details = CNV.JSON2object(response.content)
|
||||
if details.error:
|
||||
Log.error(details.error)
|
||||
return details
|
||||
except Exception, e:
|
||||
if args[0][0:4] != "http":
|
||||
suggestion = " (did you forget \"http://\" prefix on the host name?)"
|
||||
else:
|
||||
suggestion = ""
|
||||
Log.error("Problem with call to {{url}}" + suggestion, {"url": args[0]}, e)
|
||||
|
||||
@staticmethod
|
||||
def get(*list, **args):
|
||||
try:
|
||||
response = requests.get(*list, **args)
|
||||
if DEBUG:
|
||||
Log.note(response.content[:130])
|
||||
details = CNV.JSON2object(response.content)
|
||||
if details.error:
|
||||
Log.error(details.error)
|
||||
return details
|
||||
except Exception, e:
|
||||
Log.error("Problem with call to {{url}}", {"url": list[0]}, e)
|
||||
|
||||
@staticmethod
|
||||
def put(*list, **args):
|
||||
try:
|
||||
response = requests.put(*list, **args)
|
||||
if DEBUG:
|
||||
Log.note(response.content)
|
||||
return response
|
||||
except Exception, e:
|
||||
Log.error("Problem with call to {{url}}", {"url": list[0]}, e)
|
||||
|
||||
@staticmethod
|
||||
def delete(*args, **kwargs):
|
||||
try:
|
||||
response = requests.delete(*args, **kwargs)
|
||||
if DEBUG:
|
||||
Log.note(response.content)
|
||||
return response
|
||||
except Exception, e:
|
||||
Log.error("Problem with call to {{url}}", {"url": args[0]}, e)
|
||||
|
||||
@staticmethod
|
||||
def scrub(r):
|
||||
"""
|
||||
REMOVE KEYS OF DEGENERATE VALUES (EMPTY STRINGS, EMPTY LISTS, AND NULLS)
|
||||
TO LOWER CASE
|
||||
CONVERT STRINGS OF NUMBERS TO NUMBERS
|
||||
RETURNS **COPY**, DOES NOT CHANGE ORIGINAL
|
||||
"""
|
||||
return struct.wrap(_scrub(r))
|
||||
|
||||
|
||||
def _scrub(r):
|
||||
try:
|
||||
if r == None:
|
||||
return None
|
||||
elif isinstance(r, basestring):
|
||||
if r == "":
|
||||
return None
|
||||
return r
|
||||
elif Math.is_number(r):
|
||||
return CNV.value2number(r)
|
||||
elif isinstance(r, dict):
|
||||
if isinstance(r, Struct):
|
||||
r = r.dict
|
||||
output = {}
|
||||
for k, v in r.items():
|
||||
v = _scrub(v)
|
||||
if v != None:
|
||||
output[k.lower()] = v
|
||||
if len(output) == 0:
|
||||
return None
|
||||
return output
|
||||
elif hasattr(r, '__iter__'):
|
||||
if isinstance(r, StructList):
|
||||
r = r.list
|
||||
output = []
|
||||
for v in r:
|
||||
v = _scrub(v)
|
||||
if v != None:
|
||||
output.append(v)
|
||||
if not output:
|
||||
return None
|
||||
try:
|
||||
return Q.sort(output)
|
||||
except Exception:
|
||||
return output
|
||||
else:
|
||||
return r
|
||||
except Exception, e:
|
||||
Log.warning("Can not scrub: {{json}}", {"json": r})
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1 @@
|
|||
__author__ = 'klahnakoski'
|
|
@ -0,0 +1,477 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from datetime import datetime
|
||||
import re
|
||||
import time
|
||||
import requests
|
||||
|
||||
from ..maths.randoms import Random
|
||||
from ..thread.threads import ThreadedQueue
|
||||
from ..maths import Math
|
||||
from ..cnv import CNV
|
||||
from ..env.logs import Log
|
||||
from ..struct import nvl, Null
|
||||
from ..structs.wraps import wrap, unwrap
|
||||
from ..struct import Struct, StructList
|
||||
|
||||
|
||||
DEBUG = False
|
||||
|
||||
|
||||
class ElasticSearch(object):
|
||||
"""
|
||||
AN ElasticSearch INDEX LIFETIME MANAGEMENT TOOL
|
||||
|
||||
ElasticSearch'S REST INTERFACE WORKS WELL WITH PYTHON AND JAVASCRIPT
|
||||
SO HARDLY ANY LIBRARY IS REQUIRED. IT IS SIMPLER TO MAKE HTTP CALLS
|
||||
DIRECTLY TO ES USING YOUR FAVORITE HTTP LIBRARY. I HAVE SOME
|
||||
CONVENIENCE FUNCTIONS HERE, BUT IT'S BETTER TO MAKE YOUR OWN.
|
||||
|
||||
THIS CLASS IS TO HELP DURING ETL, CREATING INDEXES, MANAGING ALIASES
|
||||
AND REMOVING INDEXES WHEN THEY HAVE BEEN REPLACED. IT USES A STANDARD
|
||||
SUFFIX (YYYYMMDD-HHMMSS) TO TRACK AGE AND RELATIONSHIP TO THE ALIAS,
|
||||
IF ANY YET.
|
||||
|
||||
"""
|
||||
def __init__(self, settings=None):
|
||||
"""
|
||||
settings.explore_metadata == True - IF PROBING THE CLUSTER FOR METATDATA IS ALLOWED
|
||||
settings.timeout == NUMBER OF SECONDS TO WAIT FOR RESPONSE, OR SECONDS TO WAIT FOR DOWNLOAD (PASSED TO requests)
|
||||
"""
|
||||
|
||||
if settings is None:
|
||||
self.debug = DEBUG
|
||||
return
|
||||
|
||||
settings = wrap(settings)
|
||||
assert settings.host
|
||||
assert settings.index
|
||||
assert settings.type
|
||||
settings.setdefault("explore_metadata", True)
|
||||
|
||||
if settings.index == settings.alias:
|
||||
Log.error("must have a unique index name")
|
||||
self.cluster_metadata = None
|
||||
if not settings.port:
|
||||
settings.port = 9200
|
||||
self.debug = nvl(settings.debug, DEBUG)
|
||||
self.settings = settings
|
||||
try:
|
||||
index = self.get_index(settings.index)
|
||||
if index:
|
||||
settings.alias = settings.index
|
||||
settings.index = index
|
||||
except Exception, e:
|
||||
# EXPLORING (get_metadata()) IS NOT ALLOWED ON THE PUBLIC CLUSTER
|
||||
pass
|
||||
|
||||
self.path = settings.host + ":" + unicode(settings.port) + "/" + settings.index + "/" + settings.type
|
||||
|
||||
|
||||
@staticmethod
|
||||
def get_or_create_index(settings, schema, limit_replicas=False):
|
||||
es = ElasticSearch(settings)
|
||||
aliases = es.get_aliases()
|
||||
if settings.index not in [a.index for a in aliases]:
|
||||
schema = CNV.JSON2object(CNV.object2JSON(schema), paths=True)
|
||||
es = ElasticSearch.create_index(settings, schema, limit_replicas=limit_replicas)
|
||||
return es
|
||||
|
||||
|
||||
@staticmethod
|
||||
def create_index(settings, schema=None, limit_replicas=False):
|
||||
if not schema and settings.schema_file:
|
||||
from .files import File
|
||||
|
||||
schema = CNV.JSON2object(File(settings.schema_file).read(), flexible=True, paths=True)
|
||||
else:
|
||||
schema = wrap(schema)
|
||||
if isinstance(schema, basestring):
|
||||
schema = CNV.JSON2object(schema)
|
||||
|
||||
if limit_replicas:
|
||||
# DO NOT ASK FOR TOO MANY REPLICAS
|
||||
health = DUMMY.get(settings.host + ":" + unicode(settings.port) + "/_cluster/health")
|
||||
if schema.settings.index.number_of_replicas >= health.number_of_nodes:
|
||||
Log.warning("Reduced number of replicas: {{from}} requested, {{to}} realized", {
|
||||
"from": schema.settings.index.number_of_replicas,
|
||||
"to": health.number_of_nodes-1
|
||||
})
|
||||
schema.settings.index.number_of_replicas = health.number_of_nodes-1
|
||||
|
||||
DUMMY._post(
|
||||
settings.host + ":" + unicode(settings.port) + "/" + settings.index,
|
||||
data=CNV.object2JSON(schema).encode("utf8"),
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
time.sleep(2)
|
||||
es = ElasticSearch(settings)
|
||||
return es
|
||||
|
||||
@staticmethod
|
||||
def delete_index(settings, index=None):
|
||||
index = nvl(index, settings.index)
|
||||
|
||||
DUMMY.delete(
|
||||
settings.host + ":" + unicode(settings.port) + "/" + index,
|
||||
)
|
||||
|
||||
def get_aliases(self):
|
||||
"""
|
||||
RETURN LIST OF {"alias":a, "index":i} PAIRS
|
||||
ALL INDEXES INCLUDED, EVEN IF NO ALIAS {"alias":Null}
|
||||
"""
|
||||
data = self.get_metadata().indices
|
||||
output = []
|
||||
for index, desc in data.items():
|
||||
if not desc["aliases"]:
|
||||
output.append({"index": index, "alias": None})
|
||||
else:
|
||||
for a in desc["aliases"]:
|
||||
output.append({"index": index, "alias": a})
|
||||
return wrap(output)
|
||||
|
||||
def get_metadata(self):
|
||||
if self.settings.explore_metadata:
|
||||
if not self.cluster_metadata:
|
||||
response = self.get(self.settings.host + ":" + unicode(self.settings.port) + "/_cluster/state")
|
||||
self.cluster_metadata = response.metadata
|
||||
self.node_metatdata = self.get(self.settings.host + ":" + unicode(self.settings.port) + "/")
|
||||
else:
|
||||
Log.error("Metadata exploration has been disabled")
|
||||
return self.cluster_metadata
|
||||
|
||||
|
||||
def get_schema(self):
|
||||
if self.settings.explore_metadata:
|
||||
indices = self.get_metadata().indices
|
||||
index = indices[self.settings.index]
|
||||
if not index.mappings[self.settings.type]:
|
||||
Log.error("ElasticSearch index ({{index}}) does not have type ({{type}})", self.settings)
|
||||
return index.mappings[self.settings.type]
|
||||
else:
|
||||
mapping = self.get(self.settings.host + ":" + unicode(self.settings.port) + "/" + self.settings.index +"/" + self.settings.type + "/_mapping")
|
||||
if not mapping[self.settings.type]:
|
||||
Log.error("{{index}} does not have type {{type}}", self.settings)
|
||||
return wrap({"mappings":mapping[self.settings.type]})
|
||||
|
||||
|
||||
#DELETE ALL INDEXES WITH GIVEN PREFIX, EXCEPT name
|
||||
def delete_all_but(self, prefix, name):
|
||||
if prefix == name:
|
||||
Log.note("{{index_name}} will not be deleted", {"index_name": prefix})
|
||||
for a in self.get_aliases():
|
||||
# MATCH <prefix>YYMMDD_HHMMSS FORMAT
|
||||
if re.match(re.escape(prefix) + "\\d{8}_\\d{6}", a.index) and a.index != name:
|
||||
ElasticSearch.delete_index(self.settings, a.index)
|
||||
|
||||
@staticmethod
|
||||
def proto_name(prefix, timestamp=None):
|
||||
if not timestamp:
|
||||
timestamp = datetime.utcnow()
|
||||
return prefix + CNV.datetime2string(timestamp, "%Y%m%d_%H%M%S")
|
||||
|
||||
def add_alias(self, alias):
|
||||
self.cluster_metadata = None
|
||||
requests.post(
|
||||
self.settings.host + ":" + unicode(self.settings.port) + "/_aliases",
|
||||
CNV.object2JSON({
|
||||
"actions": [
|
||||
{"add": {"index": self.settings.index, "alias": alias}}
|
||||
]
|
||||
}),
|
||||
timeout=nvl(self.settings.timeout, 30)
|
||||
)
|
||||
|
||||
def get_proto(self, alias):
|
||||
"""
|
||||
RETURN ALL INDEXES THAT ARE INTENDED TO BE GIVEN alias, BUT HAVE NO
|
||||
ALIAS YET BECAUSE INCOMPLETE
|
||||
"""
|
||||
output = sort([
|
||||
a.index
|
||||
for a in self.get_aliases()
|
||||
if re.match(re.escape(alias) + "\\d{8}_\\d{6}", a.index) and not a.alias
|
||||
])
|
||||
return output
|
||||
|
||||
def get_index(self, alias):
|
||||
"""
|
||||
RETURN THE INDEX USED BY THIS alias
|
||||
"""
|
||||
output = sort([
|
||||
a.index
|
||||
for a in self.get_aliases()
|
||||
if a.alias == alias
|
||||
])
|
||||
if len(output) > 1:
|
||||
Log.error("only one index with given alias==\"{{alias}}\" expected", {"alias": alias})
|
||||
|
||||
if not output:
|
||||
return Null
|
||||
|
||||
return output.last()
|
||||
|
||||
def is_proto(self, index):
|
||||
"""
|
||||
RETURN True IF THIS INDEX HAS NOT BEEN ASSIGNED ITS ALIAS
|
||||
"""
|
||||
for a in self.get_aliases():
|
||||
if a.index == index and a.alias:
|
||||
return False
|
||||
return True
|
||||
|
||||
def delete_record(self, filter):
|
||||
self.get_metadata()
|
||||
if self.node_metatdata.version.number.startswith("0.90"):
|
||||
query = filter
|
||||
elif self.node_metatdata.version.number.startswith("1.0"):
|
||||
query = {"query": filter}
|
||||
else:
|
||||
Log.error("not implemented yet")
|
||||
|
||||
if self.debug:
|
||||
Log.note("Delete bugs:\n{{query}}", {"query": query})
|
||||
|
||||
self.delete(
|
||||
self.path + "/_query",
|
||||
data=CNV.object2JSON(query)
|
||||
)
|
||||
|
||||
def extend(self, records):
|
||||
"""
|
||||
records - MUST HAVE FORM OF
|
||||
[{"value":value}, ... {"value":value}] OR
|
||||
[{"json":json}, ... {"json":json}]
|
||||
OPTIONAL "id" PROPERTY IS ALSO ACCEPTED
|
||||
"""
|
||||
lines = []
|
||||
try:
|
||||
for r in records:
|
||||
id = r.get("id", None)
|
||||
if id == None:
|
||||
id = Random.hex(40)
|
||||
|
||||
if "json" in r:
|
||||
json = r["json"]
|
||||
elif "value" in r:
|
||||
json = CNV.object2JSON(r["value"])
|
||||
else:
|
||||
json = None
|
||||
Log.error("Expecting every record given to have \"value\" or \"json\" property")
|
||||
|
||||
lines.append('{"index":{"_id": ' + CNV.object2JSON(id) + '}}')
|
||||
lines.append(json)
|
||||
|
||||
if not lines:
|
||||
return
|
||||
|
||||
try:
|
||||
data_bytes = "\n".join(lines) + "\n"
|
||||
data_bytes = data_bytes.encode("utf8")
|
||||
except Exception, e:
|
||||
Log.error("can not make request body from\n{{lines|indent}}", {"lines": lines}, e)
|
||||
|
||||
response = self._post(
|
||||
self.path + "/_bulk",
|
||||
data=data_bytes,
|
||||
headers={"Content-Type": "text"},
|
||||
timeout=self.settings.timeout
|
||||
)
|
||||
items = response["items"]
|
||||
|
||||
for i, item in enumerate(items):
|
||||
if not item.index.ok:
|
||||
Log.error("{{error}} while loading line:\n{{line}}", {
|
||||
"error": item.index.error,
|
||||
"line": lines[i * 2 + 1]
|
||||
})
|
||||
|
||||
if self.debug:
|
||||
Log.note("{{num}} items added", {"num": len(lines) / 2})
|
||||
except Exception, e:
|
||||
if e.message.startswith("sequence item "):
|
||||
Log.error("problem with {{data}}", {"data": repr(lines[int(e.message[14:16].strip())])}, e)
|
||||
Log.error("problem sending to ES", e)
|
||||
|
||||
|
||||
# RECORDS MUST HAVE id AND json AS A STRING OR
|
||||
# HAVE id AND value AS AN OBJECT
|
||||
def add(self, record):
|
||||
if isinstance(record, list):
|
||||
Log.error("add() has changed to only accept one record, no lists")
|
||||
self.extend([record])
|
||||
|
||||
# -1 FOR NO REFRESH
|
||||
def set_refresh_interval(self, seconds):
|
||||
if seconds <= 0:
|
||||
interval = "-1"
|
||||
else:
|
||||
interval = unicode(seconds) + "s"
|
||||
|
||||
response = self.put(
|
||||
self.settings.host + ":" + unicode(
|
||||
self.settings.port) + "/" + self.settings.index + "/_settings",
|
||||
data="{\"index.refresh_interval\":\"" + interval + "\"}"
|
||||
)
|
||||
|
||||
result = CNV.JSON2object(response.content.decode("utf-8"))
|
||||
if not result.ok:
|
||||
Log.error("Can not set refresh interval ({{error}})", {
|
||||
"error": response.content.decode("utf-8")
|
||||
})
|
||||
|
||||
def search(self, query, timeout=None):
|
||||
query = wrap(query)
|
||||
try:
|
||||
if self.debug:
|
||||
if len(query.facets.keys()) > 20:
|
||||
show_query = query.copy()
|
||||
show_query.facets = {k: "..." for k in query.facets.keys()}
|
||||
else:
|
||||
show_query = query
|
||||
Log.note("Query:\n{{query|indent}}", {"query": show_query})
|
||||
return self._post(
|
||||
self.path + "/_search",
|
||||
data=CNV.object2JSON(query).encode("utf8"),
|
||||
timeout=nvl(timeout, self.settings.timeout)
|
||||
)
|
||||
except Exception, e:
|
||||
Log.error("Problem with search (path={{path}}):\n{{query|indent}}", {
|
||||
"path": self.path + "/_search",
|
||||
"query": query
|
||||
}, e)
|
||||
|
||||
def threaded_queue(self, size=None, period=None):
|
||||
return ThreadedQueue(self, size=size, period=period)
|
||||
|
||||
def _post(self, *args, **kwargs):
|
||||
if "data" in kwargs and not isinstance(kwargs["data"], str):
|
||||
Log.error("data must be utf8 encoded string")
|
||||
|
||||
try:
|
||||
kwargs = wrap(kwargs)
|
||||
kwargs.setdefault("timeout", 600)
|
||||
kwargs.headers["Accept-Encoding"] = "gzip,deflate"
|
||||
kwargs = unwrap(kwargs)
|
||||
response = requests.post(*args, **kwargs)
|
||||
if self.debug:
|
||||
Log.note(response.content.decode("utf-8")[:130])
|
||||
details = CNV.JSON2object(response.content.decode("utf-8"))
|
||||
if details.error:
|
||||
Log.error(CNV.quote2string(details.error))
|
||||
if details._shards.failed > 0:
|
||||
Log.error("Shard failure")
|
||||
return details
|
||||
except Exception, e:
|
||||
if args[0][0:4] != "http":
|
||||
suggestion = " (did you forget \"http://\" prefix on the host name?)"
|
||||
else:
|
||||
suggestion = ""
|
||||
|
||||
Log.error("Problem with call to {{url}}" + suggestion +"\n{{body}}", {
|
||||
"url": args[0],
|
||||
"body": kwargs["data"] if DEBUG else kwargs["data"][0:100]
|
||||
}, e)
|
||||
|
||||
def get(self, *args, **kwargs):
|
||||
try:
|
||||
kwargs = wrap(kwargs)
|
||||
kwargs.setdefault("timeout", 600)
|
||||
response = requests.get(*args, **kwargs)
|
||||
if self.debug:
|
||||
Log.note(response.content.decode("utf-8")[:130])
|
||||
details = wrap(CNV.JSON2object(response.content.decode("utf-8")))
|
||||
if details.error:
|
||||
Log.error(details.error)
|
||||
return details
|
||||
except Exception, e:
|
||||
Log.error("Problem with call to {{url}}", {"url": args[0]}, e)
|
||||
|
||||
def put(self, *args, **kwargs):
|
||||
try:
|
||||
kwargs = wrap(kwargs)
|
||||
kwargs.setdefault("timeout", 60)
|
||||
response = requests.put(*args, **kwargs)
|
||||
if self.debug:
|
||||
Log.note(response.content.decode("utf-8"))
|
||||
return response
|
||||
except Exception, e:
|
||||
Log.error("Problem with call to {{url}}", {"url": args[0]}, e)
|
||||
|
||||
def delete(self, *args, **kwargs):
|
||||
try:
|
||||
kwargs.setdefault("timeout", 60)
|
||||
response = requests.delete(*args, **kwargs)
|
||||
if self.debug:
|
||||
Log.note(response.content.decode("utf-8"))
|
||||
return response
|
||||
except Exception, e:
|
||||
Log.error("Problem with call to {{url}}", {"url": args[0]}, e)
|
||||
|
||||
@staticmethod
|
||||
def scrub(r):
|
||||
"""
|
||||
REMOVE KEYS OF DEGENERATE VALUES (EMPTY STRINGS, EMPTY LISTS, AND NULLS)
|
||||
TO LOWER CASE
|
||||
CONVERT STRINGS OF NUMBERS TO NUMBERS
|
||||
RETURNS **COPY**, DOES NOT CHANGE ORIGINAL
|
||||
"""
|
||||
return wrap(_scrub(r))
|
||||
|
||||
|
||||
def _scrub(r):
|
||||
try:
|
||||
if r == None:
|
||||
return None
|
||||
elif isinstance(r, basestring):
|
||||
if r == "":
|
||||
return None
|
||||
return r
|
||||
elif Math.is_number(r):
|
||||
return CNV.value2number(r)
|
||||
elif isinstance(r, dict):
|
||||
if isinstance(r, Struct):
|
||||
r = object.__getattribute__(r, "__dict__")
|
||||
output = {}
|
||||
for k, v in r.items():
|
||||
v = _scrub(v)
|
||||
if v != None:
|
||||
output[k.lower()] = v
|
||||
if len(output) == 0:
|
||||
return None
|
||||
return output
|
||||
elif hasattr(r, '__iter__'):
|
||||
if isinstance(r, StructList):
|
||||
r = r.list
|
||||
output = []
|
||||
for v in r:
|
||||
v = _scrub(v)
|
||||
if v != None:
|
||||
output.append(v)
|
||||
if not output:
|
||||
return None
|
||||
try:
|
||||
return sort(output)
|
||||
except Exception:
|
||||
return output
|
||||
else:
|
||||
return r
|
||||
except Exception, e:
|
||||
Log.warning("Can not scrub: {{json}}", {"json": r})
|
||||
|
||||
|
||||
|
||||
def sort(values):
|
||||
return wrap(sorted(values))
|
||||
|
||||
|
||||
DUMMY = ElasticSearch()
|
|
@ -8,27 +8,39 @@
|
|||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from email.mime.multipart import MIMEMultipart
|
||||
from email.mime.text import MIMEText
|
||||
import smtplib
|
||||
import sys
|
||||
from .struct import nvl
|
||||
from .. import struct
|
||||
from ..struct import nvl
|
||||
|
||||
|
||||
class Emailer:
|
||||
|
||||
def __init__(self, settings):
|
||||
self.settings=settings
|
||||
|
||||
"""
|
||||
REQUIRES SETTINGS LIKE
|
||||
"email": {
|
||||
"from_address": "klahnakoski@mozilla.com", #DEFAULT
|
||||
"to":"klahnakoski@mozilla.com", #DEFAULT
|
||||
"subject": "catchy title", #DEFAULT
|
||||
"host": "mail.mozilla.com",
|
||||
"port": 465,
|
||||
"username": "example@example.com",
|
||||
"password": "password",
|
||||
"use_ssl": 1
|
||||
}
|
||||
"""
|
||||
self.settings = settings
|
||||
|
||||
|
||||
def send_email(self,
|
||||
from_address = None,
|
||||
to_addrs = None,
|
||||
subject='No Subject',
|
||||
text_data = None,
|
||||
html_data = None
|
||||
from_address=None,
|
||||
to_addrs=None,
|
||||
subject=None,
|
||||
text_data=None,
|
||||
html_data=None
|
||||
):
|
||||
"""Sends an email.
|
||||
|
||||
|
@ -42,9 +54,10 @@ class Emailer:
|
|||
viewer supports it; otherwise he'll see the text content.
|
||||
"""
|
||||
|
||||
settings=self.settings
|
||||
settings = self.settings
|
||||
|
||||
from_address=nvl(from_address, settings.from_address)
|
||||
from_address = nvl(from_address, settings["from"], settings.from_address)
|
||||
to_addrs = listwrap(nvl(to_addrs, settings.to, settings.to_addrs))
|
||||
|
||||
if not from_address or not to_addrs:
|
||||
raise Exception("Both from_addr and to_addrs must be specified")
|
||||
|
@ -63,14 +76,14 @@ class Emailer:
|
|||
msg = MIMEText(text_data)
|
||||
elif not text_data:
|
||||
msg = MIMEMultipart()
|
||||
msg.preamble = subject
|
||||
msg.preamble = nvl(subject, settings.subject, 'No Subject')
|
||||
msg.attach(MIMEText(html_data, 'html'))
|
||||
else:
|
||||
msg = MIMEMultipart('alternative')
|
||||
msg.attach(MIMEText(text_data, 'plain'))
|
||||
msg.attach(MIMEText(html_data, 'html'))
|
||||
|
||||
msg['Subject'] = subject
|
||||
msg['Subject'] = nvl(subject, settings.subject)
|
||||
msg['From'] = from_address
|
||||
msg['To'] = ', '.join(to_addrs)
|
||||
|
||||
|
@ -79,7 +92,6 @@ class Emailer:
|
|||
server.quit()
|
||||
|
||||
|
||||
|
||||
if sys.hexversion < 0x020603f0:
|
||||
# versions earlier than 2.6.3 have a bug in smtplib when sending over SSL:
|
||||
# http://bugs.python.org/issue4066
|
||||
|
@ -90,7 +102,8 @@ if sys.hexversion < 0x020603f0:
|
|||
import ssl
|
||||
|
||||
def _get_socket_fixed(self, host, port, timeout):
|
||||
if self.debuglevel > 0: print>> sys.stderr, 'connect:', (host, port)
|
||||
if self.debuglevel > 0:
|
||||
print>> sys.stderr, 'connect:', (host, port)
|
||||
new_socket = socket.create_connection((host, port), timeout)
|
||||
new_socket = ssl.wrap_socket(new_socket, self.keyfile, self.certfile)
|
||||
self.file = smtplib.SSLFakeFile(new_socket)
|
|
@ -0,0 +1,210 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
|
||||
from datetime import datetime
|
||||
import io
|
||||
import os
|
||||
import shutil
|
||||
from ..maths import crypto
|
||||
from ..struct import nvl
|
||||
from ..structs.wraps import listwrap
|
||||
from ..cnv import CNV
|
||||
|
||||
|
||||
class File(object):
|
||||
"""
|
||||
ASSUMES ALL FILE CONTENT IS UTF8 ENCODED STRINGS
|
||||
"""
|
||||
|
||||
def __init__(self, filename, buffering=2 ** 14, suffix=None):
|
||||
"""
|
||||
YOU MAY SET filename TO {"path":p, "key":k} FOR CRYPTO FILES
|
||||
"""
|
||||
if filename == None:
|
||||
from ..env.logs import Log
|
||||
|
||||
Log.error("File must be given a filename")
|
||||
elif isinstance(filename, basestring):
|
||||
self.key = None
|
||||
self._filename = "/".join(filename.split(os.sep)) # USE UNIX STANDARD
|
||||
self.buffering = buffering
|
||||
else:
|
||||
self.key = CNV.base642bytearray(filename.key)
|
||||
self._filename = "/".join(filename.path.split(os.sep)) # USE UNIX STANDARD
|
||||
self.buffering = buffering
|
||||
|
||||
if suffix:
|
||||
self._filename = File.add_suffix(self._filename, suffix)
|
||||
|
||||
@property
|
||||
def filename(self):
|
||||
return self._filename.replace("/", os.sep)
|
||||
|
||||
@property
|
||||
def abspath(self):
|
||||
return os.path.abspath(self._filename)
|
||||
|
||||
@staticmethod
|
||||
def add_suffix(filename, suffix):
|
||||
"""
|
||||
ADD suffix TO THE filename (NOT INCLUDING THE FILE EXTENSION)
|
||||
"""
|
||||
path = filename.split("/")
|
||||
parts = path[-1].split(".")
|
||||
i = max(len(parts)-2, 0)
|
||||
parts[i]=parts[i]+suffix
|
||||
path[-1]=".".join(parts)
|
||||
return "/".join(path)
|
||||
|
||||
|
||||
def backup_name(self, timestamp=None):
|
||||
"""
|
||||
RETURN A FILENAME THAT CAN SERVE AS A BACKUP FOR THIS FILE
|
||||
"""
|
||||
suffix = CNV.datetime2string(nvl(timestamp, datetime.now()), "%Y%m%d_%H%M%S")
|
||||
return File.add_suffix(self._filename, suffix)
|
||||
|
||||
def read(self, encoding="utf8"):
|
||||
with open(self._filename, "rb") as f:
|
||||
content = f.read().decode(encoding)
|
||||
if self.key:
|
||||
return crypto.decrypt(content, self.key)
|
||||
else:
|
||||
return content
|
||||
|
||||
def read_ascii(self):
|
||||
if not self.parent.exists:
|
||||
self.parent.create()
|
||||
with open(self._filename, "r") as f:
|
||||
return f.read()
|
||||
|
||||
def write_ascii(self, content):
|
||||
if not self.parent.exists:
|
||||
self.parent.create()
|
||||
with open(self._filename, "w") as f:
|
||||
f.write(content)
|
||||
|
||||
def write(self, data):
|
||||
if not self.parent.exists:
|
||||
self.parent.create()
|
||||
with open(self._filename, "wb") as f:
|
||||
if isinstance(data, list) and self.key:
|
||||
from ..env.logs import Log
|
||||
|
||||
Log.error("list of data and keys are not supported, encrypt before sending to file")
|
||||
|
||||
for d in listwrap(data):
|
||||
if not isinstance(d, unicode):
|
||||
from ..env.logs import Log
|
||||
|
||||
Log.error("Expecting unicode data only")
|
||||
if self.key:
|
||||
f.write(crypto.encrypt(d, self.key).encode("utf8"))
|
||||
else:
|
||||
f.write(d.encode("utf8"))
|
||||
|
||||
def __iter__(self):
|
||||
#NOT SURE HOW TO MAXIMIZE FILE READ SPEED
|
||||
#http://stackoverflow.com/questions/8009882/how-to-read-large-file-line-by-line-in-python
|
||||
#http://effbot.org/zone/wide-finder.htm
|
||||
def output():
|
||||
try:
|
||||
with io.open(self._filename, "rb") as f:
|
||||
for line in f:
|
||||
yield line.decode("utf8")
|
||||
except Exception, e:
|
||||
from .logs import Log
|
||||
Log.error("Can not read line from {{filename}}", {"filename": self._filename}, e)
|
||||
|
||||
return output()
|
||||
|
||||
def append(self, content):
|
||||
if not self.parent.exists:
|
||||
self.parent.create()
|
||||
with open(self._filename, "ab") as output_file:
|
||||
if isinstance(content, str):
|
||||
from .logs import Log
|
||||
Log.error("expecting to write unicode only")
|
||||
output_file.write(content.encode("utf-8"))
|
||||
output_file.write(b"\n")
|
||||
|
||||
def add(self, content):
|
||||
return self.append(content)
|
||||
|
||||
def extend(self, content):
|
||||
if not self.parent.exists:
|
||||
self.parent.create()
|
||||
with open(self._filename, "ab") as output_file:
|
||||
for c in content:
|
||||
if isinstance(c, str):
|
||||
from .logs import Log
|
||||
Log.error("expecting to write unicode only")
|
||||
|
||||
output_file.write(c.encode("utf-8"))
|
||||
output_file.write(b"\n")
|
||||
|
||||
|
||||
def delete(self):
|
||||
try:
|
||||
if os.path.isdir(self._filename):
|
||||
shutil.rmtree(self._filename)
|
||||
elif os.path.isfile(self._filename):
|
||||
os.remove(self._filename)
|
||||
return self
|
||||
except Exception, e:
|
||||
if e.strerror == "The system cannot find the path specified":
|
||||
return
|
||||
from ..env.logs import Log
|
||||
|
||||
Log.error("Could not remove file", e)
|
||||
|
||||
def backup(self):
|
||||
names = self._filename.split("/")[-1].split(".")
|
||||
if len(names) == 1:
|
||||
backup = File(self._filename + ".backup " + datetime.utcnow().strftime("%Y%m%d %H%i%s"))
|
||||
|
||||
|
||||
def create(self):
|
||||
try:
|
||||
os.makedirs(self._filename)
|
||||
except Exception, e:
|
||||
from ..env.logs import Log
|
||||
|
||||
Log.error("Could not make directory {{dir_name}}", {"dir_name": self._filename}, e)
|
||||
|
||||
|
||||
@property
|
||||
def parent(self):
|
||||
return File("/".join(self._filename.split("/")[:-1]))
|
||||
|
||||
@property
|
||||
def exists(self):
|
||||
if self._filename in ["", "."]:
|
||||
return True
|
||||
try:
|
||||
return os.path.exists(self._filename)
|
||||
except Exception, e:
|
||||
return False
|
||||
|
||||
def __bool__(self):
|
||||
return self.__nonzero__()
|
||||
|
||||
|
||||
def __nonzero__(self):
|
||||
"""
|
||||
USED FOR FILE EXISTENCE TESTING
|
||||
"""
|
||||
if self._filename in ["", "."]:
|
||||
return True
|
||||
try:
|
||||
return os.path.exists(self._filename)
|
||||
except Exception, e:
|
||||
return False
|
|
@ -0,0 +1,134 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from datetime import timedelta, datetime
|
||||
from ..cnv import CNV
|
||||
from .elasticsearch import ElasticSearch
|
||||
from ..structs.wraps import wrap
|
||||
from ..thread.threads import Thread, Queue
|
||||
from .logs import BaseLog, Log
|
||||
|
||||
|
||||
class Log_usingElasticSearch(BaseLog):
|
||||
def __init__(self, settings):
|
||||
settings = wrap(settings)
|
||||
self.es = ElasticSearch(settings)
|
||||
|
||||
aliases = self.es.get_aliases()
|
||||
if settings.index not in [a.index for a in aliases]:
|
||||
schema = CNV.JSON2object(CNV.object2JSON(SCHEMA), paths=True)
|
||||
self.es = ElasticSearch.create_index(settings, schema, limit_replicas=True)
|
||||
|
||||
self.queue = Queue()
|
||||
self.thread = Thread("log to " + settings.index, time_delta_pusher, es=self.es, queue=self.queue, interval=timedelta(seconds=1))
|
||||
self.thread.start()
|
||||
|
||||
def write(self, template, params):
|
||||
try:
|
||||
if params.get("template", None):
|
||||
#DETECTED INNER TEMPLATE, ASSUME TRACE IS ON, SO DO NOT NEED THE OUTER TEMPLATE
|
||||
self.queue.add(params)
|
||||
else:
|
||||
self.queue.add({"template": template, "params": params})
|
||||
return self
|
||||
except Exception, e:
|
||||
raise e #OH NO!
|
||||
|
||||
def stop(self):
|
||||
try:
|
||||
self.queue.add(Thread.STOP) #BE PATIENT, LET REST OF MESSAGE BE SENT
|
||||
self.thread.join()
|
||||
except Exception, e:
|
||||
pass
|
||||
|
||||
try:
|
||||
self.queue.close()
|
||||
except Exception, f:
|
||||
pass
|
||||
|
||||
|
||||
def time_delta_pusher(please_stop, es, queue, interval):
|
||||
"""
|
||||
appender - THE FUNCTION THAT ACCEPTS A STRING
|
||||
queue - FILLED WITH LOG ENTRIES {"template":template, "params":params} TO WRITE
|
||||
interval - timedelta
|
||||
USE IN A THREAD TO BATCH LOGS BY TIME INTERVAL
|
||||
"""
|
||||
if not isinstance(interval, timedelta):
|
||||
Log.error("Expecting interval to be a timedelta")
|
||||
|
||||
next_run = datetime.utcnow() + interval
|
||||
|
||||
while not please_stop:
|
||||
Thread.sleep(till=next_run)
|
||||
next_run = datetime.utcnow() + interval
|
||||
logs = queue.pop_all()
|
||||
if logs:
|
||||
try:
|
||||
last = len(logs)
|
||||
for i, log in enumerate(logs):
|
||||
if log is Thread.STOP:
|
||||
please_stop.go()
|
||||
last = i
|
||||
next_run = datetime.utcnow()
|
||||
if last > 0:
|
||||
es.extend([{"value": v} for v in logs[0:last]])
|
||||
except Exception, e:
|
||||
# DO NOT KILL THREAD, WE MUST CONTINUE TO CONSUME MESSAGES
|
||||
Log.warning("problem logging to es", e)
|
||||
|
||||
|
||||
|
||||
SCHEMA = {
|
||||
"settings": {
|
||||
"index.number_of_shards": 3,
|
||||
"index.number_of_replicas": 2,
|
||||
"index.store.throttle.type": "merge",
|
||||
"index.store.throttle.max_bytes_per_sec": "2mb",
|
||||
"index.cache.filter.expire": "1m",
|
||||
"index.cache.field.type": "soft",
|
||||
},
|
||||
"mappings": {
|
||||
"_default_": {
|
||||
"dynamic_templates": [
|
||||
{
|
||||
"values_strings": {
|
||||
"match": "*",
|
||||
"match_mapping_type" : "string",
|
||||
"mapping": {
|
||||
"type": "string",
|
||||
"index": "not_analyzed"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"_all": {
|
||||
"enabled": False
|
||||
},
|
||||
"_source": {
|
||||
"compress": True,
|
||||
"enabled": True
|
||||
},
|
||||
"properties": {
|
||||
"timestamp": {
|
||||
"type": "long",
|
||||
"index": "not_analyzed",
|
||||
"store": "yes"
|
||||
},
|
||||
"params": {
|
||||
"type": "object",
|
||||
"enabled": False,
|
||||
"index": "no",
|
||||
"store": "yes"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,87 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from datetime import timedelta
|
||||
import logging
|
||||
import sys
|
||||
|
||||
from .. import struct
|
||||
from .log_usingStream import Log_usingStream, time_delta_pusher
|
||||
from .logs import BaseLog, DEBUG_LOGGING, Log
|
||||
from ..thread import threads
|
||||
from ..thread.threads import Thread
|
||||
|
||||
|
||||
|
||||
#WRAP PYTHON CLASSIC logger OBJECTS
|
||||
class Log_usingLogger(BaseLog):
|
||||
def __init__(self, settings):
|
||||
self.logger = logging.Logger("unique name", level=logging.INFO)
|
||||
self.logger.addHandler(make_log_from_settings(settings))
|
||||
|
||||
# TURNS OUT LOGGERS ARE REALLY SLOW TOO
|
||||
self.queue = threads.Queue(max=10000, silent=True)
|
||||
self.thread = Thread("log to logger", time_delta_pusher, appender=self.logger.info, queue=self.queue, interval=timedelta(seconds=0.3))
|
||||
self.thread.start()
|
||||
|
||||
def write(self, template, params):
|
||||
# http://docs.python.org/2/library/logging.html#logging.LogRecord
|
||||
self.queue.add({"template": template, "params": params})
|
||||
|
||||
def stop(self):
|
||||
try:
|
||||
if DEBUG_LOGGING:
|
||||
sys.stdout.write("Log_usingLogger sees stop, adding stop to queue\n")
|
||||
self.queue.add(Thread.STOP) #BE PATIENT, LET REST OF MESSAGE BE SENT
|
||||
self.thread.join()
|
||||
if DEBUG_LOGGING:
|
||||
sys.stdout.write("Log_usingLogger done\n")
|
||||
except Exception, e:
|
||||
pass
|
||||
|
||||
try:
|
||||
self.queue.close()
|
||||
except Exception, f:
|
||||
pass
|
||||
|
||||
|
||||
def make_log_from_settings(settings):
|
||||
assert settings["class"]
|
||||
|
||||
# IMPORT MODULE FOR HANDLER
|
||||
path = settings["class"].split(".")
|
||||
class_name = path[-1]
|
||||
path = ".".join(path[:-1])
|
||||
constructor = None
|
||||
try:
|
||||
temp = __import__(path, globals(), locals(), [class_name], -1)
|
||||
constructor = object.__getattribute__(temp, class_name)
|
||||
except Exception, e:
|
||||
if settings.stream and not constructor:
|
||||
#PROVIDE A DEFAULT STREAM HANLDER
|
||||
constructor = Log_usingStream
|
||||
else:
|
||||
Log.error("Can not find class {{class}}", {"class": path}, e)
|
||||
|
||||
#IF WE NEED A FILE, MAKE SURE DIRECTORY EXISTS
|
||||
if settings.filename:
|
||||
from ..env.files import File
|
||||
|
||||
f = File(settings.filename)
|
||||
if not f.parent.exists:
|
||||
f.parent.create()
|
||||
|
||||
settings['class'] = None
|
||||
params = struct.unwrap(settings)
|
||||
log_instance = constructor(**params)
|
||||
return log_instance
|
||||
|
|
@ -0,0 +1,122 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from datetime import datetime, timedelta
|
||||
import sys
|
||||
|
||||
from .logs import BaseLog, DEBUG_LOGGING, Log
|
||||
from ..strings import expand_template
|
||||
from ..thread.threads import Thread
|
||||
|
||||
|
||||
|
||||
class Log_usingStream(BaseLog):
|
||||
#stream CAN BE AN OBJCET WITH write() METHOD, OR A STRING
|
||||
#WHICH WILL eval() TO ONE
|
||||
def __init__(self, stream):
|
||||
assert stream
|
||||
|
||||
use_UTF8 = False
|
||||
|
||||
if isinstance(stream, basestring):
|
||||
if stream.startswith("sys."):
|
||||
use_UTF8 = True #sys.* ARE OLD AND CAN NOT HANDLE unicode
|
||||
self.stream = eval(stream)
|
||||
name = stream
|
||||
else:
|
||||
self.stream = stream
|
||||
name = "stream"
|
||||
|
||||
#WRITE TO STREAMS CAN BE *REALLY* SLOW, WE WILL USE A THREAD
|
||||
from ..thread.threads import Queue
|
||||
|
||||
if use_UTF8:
|
||||
def utf8_appender(value):
|
||||
if isinstance(value, unicode):
|
||||
value = value.encode('utf-8')
|
||||
self.stream.write(value)
|
||||
|
||||
appender = utf8_appender
|
||||
else:
|
||||
appender = self.stream.write
|
||||
|
||||
self.queue = Queue(max=10000, silent=True)
|
||||
self.thread = Thread("log to " + name, time_delta_pusher, appender=appender, queue=self.queue, interval=timedelta(seconds=0.3))
|
||||
self.thread.start()
|
||||
|
||||
def write(self, template, params):
|
||||
try:
|
||||
self.queue.add({"template": template, "params": params})
|
||||
return self
|
||||
except Exception, e:
|
||||
raise e #OH NO!
|
||||
|
||||
def stop(self):
|
||||
try:
|
||||
if DEBUG_LOGGING:
|
||||
sys.stdout.write("Log_usingStream sees stop, adding stop to queue\n")
|
||||
self.queue.add(Thread.STOP) #BE PATIENT, LET REST OF MESSAGE BE SENT
|
||||
self.thread.join()
|
||||
if DEBUG_LOGGING:
|
||||
sys.stdout.write("Log_usingStream done\n")
|
||||
except Exception, e:
|
||||
if DEBUG_LOGGING:
|
||||
raise e
|
||||
|
||||
try:
|
||||
self.queue.close()
|
||||
except Exception, f:
|
||||
if DEBUG_LOGGING:
|
||||
raise f
|
||||
|
||||
|
||||
|
||||
def time_delta_pusher(please_stop, appender, queue, interval):
|
||||
"""
|
||||
appender - THE FUNCTION THAT ACCEPTS A STRING
|
||||
queue - FILLED WITH LOG ENTRIES {"template":template, "params":params} TO WRITE
|
||||
interval - timedelta
|
||||
USE IN A THREAD TO BATCH LOGS BY TIME INTERVAL
|
||||
"""
|
||||
|
||||
if not isinstance(interval, timedelta):
|
||||
Log.error("Expecting interval to be a timedelta")
|
||||
|
||||
next_run = datetime.utcnow() + interval
|
||||
|
||||
while not please_stop:
|
||||
Thread.sleep(till=next_run)
|
||||
next_run = datetime.utcnow() + interval
|
||||
logs = queue.pop_all()
|
||||
if logs:
|
||||
lines = []
|
||||
for log in logs:
|
||||
try:
|
||||
if log is Thread.STOP:
|
||||
please_stop.go()
|
||||
next_run = datetime.utcnow()
|
||||
else:
|
||||
expanded = expand_template(log.get("template", None), log.get("params", None))
|
||||
lines.append(expanded)
|
||||
except Exception, e:
|
||||
Log.warning("Trouble formatting logs", e)
|
||||
# SWALLOW ERROR, GOT TO KEEP RUNNING
|
||||
try:
|
||||
if DEBUG_LOGGING and please_stop:
|
||||
sys.stdout.write("Call to appender with " + str(len(lines)) + " lines\n")
|
||||
appender(u"\n".join(lines) + u"\n")
|
||||
if DEBUG_LOGGING and please_stop:
|
||||
sys.stdout.write("Done call to appender with " + str(len(lines)) + " lines\n")
|
||||
except Exception, e:
|
||||
sys.stderr.write("Trouble with appender: " + str(e.message) + "\n")
|
||||
# SWALLOW ERROR, GOT TO KEEP RUNNNIG
|
||||
|
|
@ -0,0 +1,523 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from datetime import datetime
|
||||
import sys
|
||||
|
||||
from .. import struct
|
||||
from ..jsons import json_encoder
|
||||
from ..thread import threads
|
||||
from ..struct import nvl, Struct
|
||||
from ..structs.wraps import listwrap, wrap
|
||||
from ..strings import indent, expand_template
|
||||
from ..thread.threads import Thread
|
||||
|
||||
|
||||
|
||||
DEBUG_LOGGING = False
|
||||
ERROR = "ERROR"
|
||||
WARNING = "WARNING"
|
||||
NOTE = "NOTE"
|
||||
|
||||
|
||||
class Log(object):
|
||||
"""
|
||||
FOR STRUCTURED LOGGING AND EXCEPTION CHAINING
|
||||
"""
|
||||
trace = False
|
||||
main_log = None
|
||||
logging_multi = None
|
||||
profiler = None
|
||||
cprofiler = None # screws up with pypy, but better than nothing
|
||||
error_mode = False # prevent error loops
|
||||
|
||||
@classmethod
|
||||
def new_instance(cls, settings):
|
||||
settings = wrap(settings)
|
||||
|
||||
if settings["class"]:
|
||||
if settings["class"].startswith("logging.handlers."):
|
||||
from .log_usingLogger import Log_usingLogger
|
||||
return Log_usingLogger(settings)
|
||||
else:
|
||||
try:
|
||||
from .log_usingLogger import make_log_from_settings
|
||||
return make_log_from_settings(settings)
|
||||
except Exception, e:
|
||||
pass # OH WELL :(
|
||||
|
||||
if settings.log_type=="file" or settings.file:
|
||||
return Log_usingFile(file)
|
||||
if settings.log_type=="file" or settings.filename:
|
||||
return Log_usingFile(settings.filename)
|
||||
if settings.log_type=="stream" or settings.stream:
|
||||
from .log_usingStream import Log_usingStream
|
||||
return Log_usingStream(settings.stream)
|
||||
if settings.log_type=="elasticsearch" or settings.stream:
|
||||
from .log_usingElasticSearch import Log_usingElasticSearch
|
||||
return Log_usingElasticSearch(settings)
|
||||
|
||||
@classmethod
|
||||
def add_log(cls, log):
|
||||
cls.logging_multi.add_log(log)
|
||||
|
||||
@classmethod
|
||||
def debug(cls, template=None, params=None):
|
||||
"""
|
||||
USE THIS FOR DEBUGGING (AND EVENTUAL REMOVAL)
|
||||
"""
|
||||
Log.note(nvl(template, ""), params, stack_depth=1)
|
||||
|
||||
@classmethod
|
||||
def println(cls, template, params=None):
|
||||
Log.note(template, params, stack_depth=1)
|
||||
|
||||
@classmethod
|
||||
def note(cls, template, params=None, stack_depth=0):
|
||||
# USE replace() AS POOR MAN'S CHILD TEMPLATE
|
||||
|
||||
log_params = Struct(
|
||||
template=template,
|
||||
params=nvl(params, {}).copy(),
|
||||
timestamp=datetime.utcnow(),
|
||||
)
|
||||
if cls.trace:
|
||||
log_template = "{{timestamp|datetime}} - {{thread.name}} - {{location.file}}:{{location.line}} ({{location.method}}) - " + template.replace("{{", "{{params.")
|
||||
f = sys._getframe(stack_depth + 1)
|
||||
log_params.location = {
|
||||
"line": f.f_lineno,
|
||||
"file": f.f_code.co_filename,
|
||||
"method": f.f_code.co_name
|
||||
}
|
||||
thread = Thread.current()
|
||||
log_params.thread = {"name": thread.name, "id": thread.id}
|
||||
else:
|
||||
log_template = "{{timestamp|datetime}} - " + template.replace("{{", "{{params.")
|
||||
|
||||
cls.main_log.write(log_template, log_params)
|
||||
|
||||
@classmethod
|
||||
def warning(cls, template, params=None, cause=None):
|
||||
if isinstance(params, BaseException):
|
||||
cause = params
|
||||
params = None
|
||||
|
||||
if cause and not isinstance(cause, Except):
|
||||
cause = Except(WARNING, unicode(cause), trace=extract_tb(0))
|
||||
|
||||
trace = extract_stack(1)
|
||||
e = Except(WARNING, template, params, cause, trace)
|
||||
Log.note(unicode(e), {
|
||||
"warning": {
|
||||
"template": template,
|
||||
"params": params,
|
||||
"cause": cause,
|
||||
"trace": trace
|
||||
}
|
||||
})
|
||||
|
||||
|
||||
@classmethod
|
||||
def error(
|
||||
cls,
|
||||
template, #human readable template
|
||||
params=None, #parameters for template
|
||||
cause=None, #pausible cause
|
||||
offset=0 #stack trace offset (==1 if you do not want to report self)
|
||||
):
|
||||
"""
|
||||
raise an exception with a trace for the cause too
|
||||
"""
|
||||
if params and isinstance(listwrap(params)[0], BaseException):
|
||||
cause = params
|
||||
params = None
|
||||
|
||||
if cause == None:
|
||||
cause = []
|
||||
elif isinstance(cause, list):
|
||||
pass
|
||||
elif isinstance(cause, Except):
|
||||
cause = [cause]
|
||||
else:
|
||||
cause = [Except(ERROR, unicode(cause), trace=extract_tb(offset))]
|
||||
|
||||
trace = extract_stack(1 + offset)
|
||||
e = Except(ERROR, template, params, cause, trace)
|
||||
raise e
|
||||
|
||||
@classmethod
|
||||
def fatal(
|
||||
cls,
|
||||
template, #human readable template
|
||||
params=None, #parameters for template
|
||||
cause=None, #pausible cause
|
||||
offset=0 #stack trace offset (==1 if you do not want to report self)
|
||||
):
|
||||
"""
|
||||
SEND TO STDERR
|
||||
"""
|
||||
if params and isinstance(listwrap(params)[0], BaseException):
|
||||
cause = params
|
||||
params = None
|
||||
|
||||
if cause == None:
|
||||
cause = []
|
||||
elif isinstance(cause, list):
|
||||
pass
|
||||
elif isinstance(cause, Except):
|
||||
cause = [cause]
|
||||
else:
|
||||
cause = [Except(ERROR, unicode(cause), trace=extract_tb(offset))]
|
||||
|
||||
trace = extract_stack(1 + offset)
|
||||
e = Except(ERROR, template, params, cause, trace)
|
||||
str_e = unicode(e)
|
||||
|
||||
error_mode = cls.error_mode
|
||||
try:
|
||||
if not error_mode:
|
||||
cls.error_mode = True
|
||||
Log.note(str_e, {
|
||||
"error": {
|
||||
"template": template,
|
||||
"params": params,
|
||||
"cause": cause,
|
||||
"trace": trace
|
||||
}
|
||||
})
|
||||
except Exception, f:
|
||||
pass
|
||||
cls.error_mode = error_mode
|
||||
|
||||
sys.stderr.write(str_e)
|
||||
|
||||
|
||||
#RUN ME FIRST TO SETUP THE THREADED LOGGING
|
||||
@classmethod
|
||||
def start(cls, settings=None):
|
||||
##http://victorlin.me/2012/08/good-logging-practice-in-python/
|
||||
if not settings:
|
||||
return
|
||||
|
||||
cls.settings = settings
|
||||
cls.trace = cls.trace | nvl(settings.trace, False)
|
||||
if cls.trace:
|
||||
from ..thread.threads import Thread
|
||||
|
||||
if not settings.log:
|
||||
return
|
||||
|
||||
cls.logging_multi = Log_usingMulti()
|
||||
cls.main_log = Log_usingThread(cls.logging_multi)
|
||||
|
||||
for log in listwrap(settings.log):
|
||||
Log.add_log(Log.new_instance(log))
|
||||
|
||||
if settings.cprofile:
|
||||
if isinstance(settings.cprofile, bool):
|
||||
settings.cprofile = {"enabled": True, "filename": "cprofile.tab"}
|
||||
|
||||
import cProfile
|
||||
cls.cprofiler = cProfile.Profile()
|
||||
cls.cprofiler.enable()
|
||||
|
||||
if settings.profile:
|
||||
from ..env import profiles
|
||||
|
||||
if isinstance(settings.profile, bool):
|
||||
settings.profile = {"enabled": True, "filename": "profile.tab"}
|
||||
|
||||
if settings.profile.enabled:
|
||||
profiles.ON = True
|
||||
|
||||
|
||||
@classmethod
|
||||
def stop(cls):
|
||||
from ..env import profiles
|
||||
|
||||
if cls.cprofiler and hasattr(cls, "settings"):
|
||||
write_profile(cls.settings.cprofile, cls.cprofiler)
|
||||
|
||||
if profiles.ON and hasattr(cls, "settings"):
|
||||
profiles.write(cls.settings.profile)
|
||||
cls.main_log.stop()
|
||||
|
||||
|
||||
def write(self):
|
||||
Log.error("not implemented")
|
||||
|
||||
def extract_stack(start=0):
|
||||
"""
|
||||
SNAGGED FROM traceback.py
|
||||
Extract the raw traceback from the current stack frame.
|
||||
|
||||
Each item in the returned list is a quadruple (filename,
|
||||
line number, function name, text), and the entries are in order
|
||||
from newest to oldest
|
||||
"""
|
||||
try:
|
||||
raise ZeroDivisionError
|
||||
except ZeroDivisionError:
|
||||
trace = sys.exc_info()[2]
|
||||
f = trace.tb_frame.f_back
|
||||
|
||||
for i in range(start):
|
||||
f = f.f_back
|
||||
|
||||
stack = []
|
||||
n = 0
|
||||
while f is not None:
|
||||
stack.append({
|
||||
"depth": n,
|
||||
"line": f.f_lineno,
|
||||
"file": f.f_code.co_filename,
|
||||
"method": f.f_code.co_name
|
||||
})
|
||||
f = f.f_back
|
||||
n += 1
|
||||
return stack
|
||||
|
||||
|
||||
def extract_tb(start):
|
||||
"""
|
||||
SNAGGED FROM traceback.py
|
||||
|
||||
Return list of up to limit pre-processed entries from traceback.
|
||||
|
||||
This is useful for alternate formatting of stack traces. If
|
||||
'limit' is omitted or None, all entries are extracted. A
|
||||
pre-processed stack trace entry is a quadruple (filename, line
|
||||
number, function name, text) representing the information that is
|
||||
usually printed for a stack trace.
|
||||
"""
|
||||
tb = sys.exc_info()[2]
|
||||
for i in range(start):
|
||||
tb = tb.tb_next
|
||||
|
||||
trace = []
|
||||
n = 0
|
||||
while tb is not None:
|
||||
f = tb.tb_frame
|
||||
trace.append({
|
||||
"depth": n,
|
||||
"file": f.f_code.co_filename,
|
||||
"line": tb.tb_lineno,
|
||||
"method": f.f_code.co_name
|
||||
})
|
||||
tb = tb.tb_next
|
||||
n += 1
|
||||
trace.reverse()
|
||||
return trace
|
||||
|
||||
|
||||
def format_trace(tbs, start=0):
|
||||
trace = []
|
||||
for d in tbs[start::]:
|
||||
d["file"] = d["file"].replace("/", "\\")
|
||||
item = expand_template('File "{{file}}", line {{line}}, in {{method}}\n', d)
|
||||
trace.append(item)
|
||||
return "".join(trace)
|
||||
|
||||
|
||||
class Except(Exception):
|
||||
def __init__(self, type=ERROR, template=None, params=None, cause=None, trace=None):
|
||||
super(Exception, self).__init__(self)
|
||||
self.type = type
|
||||
self.template = template
|
||||
self.params = params
|
||||
self.cause = cause
|
||||
self.trace = trace
|
||||
|
||||
@property
|
||||
def message(self):
|
||||
return unicode(self)
|
||||
|
||||
def contains(self, value):
|
||||
if self.type == value:
|
||||
return True
|
||||
for c in self.cause:
|
||||
if c.contains(value):
|
||||
return True
|
||||
return False
|
||||
|
||||
def __str__(self):
|
||||
output = self.type + ": " + self.template
|
||||
if self.params:
|
||||
output = expand_template(output, self.params)
|
||||
|
||||
if self.trace:
|
||||
output += "\n" + indent(format_trace(self.trace))
|
||||
|
||||
if self.cause:
|
||||
cause_strings = []
|
||||
for c in self.cause:
|
||||
try:
|
||||
cause_strings.append(c.__str__())
|
||||
except Exception, e:
|
||||
pass
|
||||
|
||||
output += "\ncaused by\n\t" + "\nand caused by\n\t".join(cause_strings)
|
||||
|
||||
return output + "\n"
|
||||
|
||||
def __json__(self):
|
||||
return json_encoder(Struct(
|
||||
type = self.type,
|
||||
template = self.template,
|
||||
params = self.params,
|
||||
cause = self.cause,
|
||||
trace = self.trace
|
||||
))
|
||||
|
||||
|
||||
class BaseLog(object):
|
||||
def write(self, template, params):
|
||||
pass
|
||||
|
||||
def stop(self):
|
||||
pass
|
||||
|
||||
|
||||
class Log_usingFile(BaseLog):
|
||||
def __init__(self, file):
|
||||
assert file
|
||||
|
||||
from ..env.files import File
|
||||
|
||||
self.file = File(file)
|
||||
if self.file.exists:
|
||||
self.file.backup()
|
||||
self.file.delete()
|
||||
|
||||
self.file_lock = threads.Lock()
|
||||
|
||||
def write(self, template, params):
|
||||
from ..env.files import File
|
||||
|
||||
with self.file_lock:
|
||||
File(self.filename).append(expand_template(template, params))
|
||||
|
||||
|
||||
|
||||
|
||||
class Log_usingThread(BaseLog):
|
||||
def __init__(self, logger):
|
||||
#DELAYED LOAD FOR THREADS MODULE
|
||||
from ..thread.threads import Queue
|
||||
|
||||
self.queue = Queue(max=10000, silent=True)
|
||||
self.logger = logger
|
||||
|
||||
def worker(please_stop):
|
||||
while not please_stop:
|
||||
Thread.sleep(1)
|
||||
logs = self.queue.pop_all()
|
||||
for log in logs:
|
||||
if log is Thread.STOP:
|
||||
if DEBUG_LOGGING:
|
||||
sys.stdout.write("Log_usingThread.worker() sees stop, filling rest of queue\n")
|
||||
please_stop.go()
|
||||
else:
|
||||
self.logger.write(**log)
|
||||
|
||||
self.thread = Thread("log thread", worker)
|
||||
self.thread.start()
|
||||
|
||||
def write(self, template, params):
|
||||
try:
|
||||
self.queue.add({"template": template, "params": params})
|
||||
return self
|
||||
except Exception, e:
|
||||
sys.stdout.write("IF YOU SEE THIS, IT IS LIKELY YOU FORGOT TO RUN Log.start() FIRST\n")
|
||||
raise e #OH NO!
|
||||
|
||||
def stop(self):
|
||||
try:
|
||||
if DEBUG_LOGGING:
|
||||
sys.stdout.write("injecting stop into queue\n")
|
||||
self.queue.add(Thread.STOP) #BE PATIENT, LET REST OF MESSAGE BE SENT
|
||||
self.thread.join()
|
||||
if DEBUG_LOGGING:
|
||||
sys.stdout.write("Log_usingThread telling logger to stop\n")
|
||||
self.logger.stop()
|
||||
except Exception, e:
|
||||
if DEBUG_LOGGING:
|
||||
raise e
|
||||
|
||||
try:
|
||||
self.queue.close()
|
||||
except Exception, f:
|
||||
if DEBUG_LOGGING:
|
||||
raise f
|
||||
|
||||
|
||||
class Log_usingMulti(BaseLog):
|
||||
def __init__(self):
|
||||
self.many = []
|
||||
def write(self, template, params):
|
||||
for m in self.many:
|
||||
try:
|
||||
m.write(template, params)
|
||||
except Exception, e:
|
||||
pass
|
||||
return self
|
||||
|
||||
def add_log(self, logger):
|
||||
self.many.append(logger)
|
||||
return self
|
||||
|
||||
def remove_log(self, logger):
|
||||
self.many.remove(logger)
|
||||
return self
|
||||
|
||||
def clear_log(self):
|
||||
self.many = []
|
||||
def stop(self):
|
||||
for m in self.many:
|
||||
try:
|
||||
m.stop()
|
||||
except Exception, e:
|
||||
pass
|
||||
|
||||
|
||||
def write_profile(profile_settings, cprofiler):
|
||||
from ..cnv import CNV
|
||||
from .files import File
|
||||
import pstats
|
||||
|
||||
p = pstats.Stats(cprofiler)
|
||||
stats = [{
|
||||
"num_calls": d[1],
|
||||
"self_time": d[2],
|
||||
"total_time": d[3],
|
||||
"self_time_per_call": d[2] / d[1],
|
||||
"total_time_per_call": d[3] / d[1],
|
||||
"file": (f[0] if f[0] != "~" else "").replace("\\", "/"),
|
||||
"line": f[1],
|
||||
"method": f[2].lstrip("<").rstrip(">")
|
||||
}
|
||||
for f, d, in p.stats.iteritems()
|
||||
]
|
||||
stats_file = File(profile_settings.filename, suffix=CNV.datetime2string(datetime.now(), "_%Y%m%d_%H%M%S"))
|
||||
stats_file.write(CNV.list2tab(stats))
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
if not Log.main_log:
|
||||
from log_usingStream import Log_usingStream
|
||||
Log.main_log = Log_usingStream("sys.stdout")
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,93 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from datetime import datetime
|
||||
from time import clock
|
||||
from ..collections import MAX
|
||||
from ..struct import Struct
|
||||
|
||||
ON = False
|
||||
profiles = {}
|
||||
|
||||
|
||||
class Profiler(object):
|
||||
"""
|
||||
|
||||
"""
|
||||
|
||||
def __new__(cls, *args):
|
||||
if ON:
|
||||
output = profiles.get(args[0], None)
|
||||
if output:
|
||||
return output
|
||||
output = object.__new__(cls, *args)
|
||||
return output
|
||||
|
||||
def __init__(self, description):
|
||||
from ..queries.windows import Stats
|
||||
|
||||
if ON and not hasattr(self, "description"):
|
||||
self.description = description
|
||||
self.samples = []
|
||||
self.stats = Stats()()
|
||||
profiles[description] = self
|
||||
|
||||
def __enter__(self):
|
||||
if ON:
|
||||
self.start = clock()
|
||||
return self
|
||||
|
||||
def __exit__(self, type, value, traceback):
|
||||
if ON:
|
||||
self.end = clock()
|
||||
duration = self.end - self.start
|
||||
|
||||
from util.queries.windows import Stats
|
||||
|
||||
self.stats.add(duration)
|
||||
if self.samples is not None:
|
||||
self.samples.append(duration)
|
||||
if len(self.samples) > 100:
|
||||
self.samples = None
|
||||
|
||||
|
||||
def write(profile_settings):
|
||||
from ..cnv import CNV
|
||||
from .files import File
|
||||
|
||||
profs = list(profiles.values())
|
||||
for p in profs:
|
||||
p.stats = p.stats.end()
|
||||
|
||||
stats = [{
|
||||
"description": p.description,
|
||||
"num_calls": p.stats.count,
|
||||
"total_time": p.stats.count * p.stats.mean,
|
||||
"total_time_per_call": p.stats.mean
|
||||
}
|
||||
for p in profs if p.stats.count > 0
|
||||
]
|
||||
stats_file = File(profile_settings.filename, suffix=CNV.datetime2string(datetime.now(), "_%Y%m%d_%H%M%S"))
|
||||
if stats:
|
||||
stats_file.write(CNV.list2tab(stats))
|
||||
else:
|
||||
stats_file.write("<no profiles>")
|
||||
|
||||
stats_file2 = File(profile_settings.filename, suffix=CNV.datetime2string(datetime.now(), "_series_%Y%m%d_%H%M%S"))
|
||||
r = range(MAX([len(p.samples) for p in profs]))
|
||||
profs.insert(0, Struct(description="index", samples=r))
|
||||
stats = [
|
||||
{p.description: p.samples[i] for p in profs if p.samples}
|
||||
for i in r
|
||||
]
|
||||
if stats:
|
||||
stats_file2.write(CNV.list2tab(stats))
|
||||
|
||||
|
|
@ -0,0 +1,158 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
import argparse
|
||||
import os
|
||||
import tempfile
|
||||
import sys
|
||||
from ..structs.wraps import listwrap, wrap, unwrap
|
||||
from ..cnv import CNV
|
||||
from ..env.logs import Log
|
||||
from ..env.files import File
|
||||
|
||||
|
||||
#PARAMETERS MATCH argparse.ArgumentParser.add_argument()
|
||||
#http://docs.python.org/dev/library/argparse.html#the-add-argument-method
|
||||
#name or flags - Either a name or a list of option strings, e.g. foo or -f, --foo.
|
||||
#action - The basic type of action to be taken when this argument is encountered at the command line.
|
||||
#nargs - The number of command-line arguments that should be consumed.
|
||||
#const - A constant value required by some action and nargs selections.
|
||||
#default - The value produced if the argument is absent from the command line.
|
||||
#type - The type to which the command-line argument should be converted.
|
||||
#choices - A container of the allowable values for the argument.
|
||||
#required - Whether or not the command-line option may be omitted (optionals only).
|
||||
#help - A brief description of what the argument does.
|
||||
#metavar - A name for the argument in usage messages.
|
||||
#dest - The name of the attribute to be added to the object returned by parse_args().
|
||||
|
||||
def _argparse(defs):
|
||||
parser = argparse.ArgumentParser()
|
||||
for d in listwrap(defs):
|
||||
args = d.copy()
|
||||
name = args.name
|
||||
args.name = None
|
||||
parser.add_argument(*unwrap(listwrap(name)), **unwrap(args))
|
||||
namespace = parser.parse_args()
|
||||
output = {k: getattr(namespace, k) for k in vars(namespace)}
|
||||
return wrap(output)
|
||||
|
||||
|
||||
def read_settings(filename=None, defs=None):
|
||||
# READ SETTINGS
|
||||
if filename:
|
||||
settings_file = File(filename)
|
||||
if not settings_file.exists:
|
||||
Log.error("Can not file settings file {{filename}}", {
|
||||
"filename": settings_file.abspath
|
||||
})
|
||||
json = settings_file.read()
|
||||
settings = CNV.JSON2object(json, flexible=True)
|
||||
if defs:
|
||||
settings.args = _argparse(defs)
|
||||
return settings
|
||||
else:
|
||||
defs = listwrap(defs)
|
||||
defs.append({
|
||||
"name": ["--settings", "--settings-file", "--settings_file"],
|
||||
"help": "path to JSON file with settings",
|
||||
"type": str,
|
||||
"dest": "filename",
|
||||
"default": "./settings.json",
|
||||
"required": False
|
||||
})
|
||||
args = _argparse(defs)
|
||||
settings_file = File(args.filename)
|
||||
if not settings_file.exists:
|
||||
Log.error("Can not read settings file {{filename}}", {
|
||||
"filename": settings_file.abspath
|
||||
})
|
||||
json = settings_file.read()
|
||||
settings = CNV.JSON2object(json, flexible=True)
|
||||
settings.args = args
|
||||
return settings
|
||||
|
||||
|
||||
# snagged from https://github.com/pycontribs/tendo/blob/master/tendo/singleton.py (under licence PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2)
|
||||
class SingleInstance:
|
||||
"""
|
||||
ONLY ONE INSTANCE OF PROGRAM ALLOWED
|
||||
If you want to prevent your script from running in parallel just instantiate SingleInstance() class.
|
||||
If is there another instance already running it will exist the application with the message
|
||||
"Another instance is already running, quitting.", returning -1 error code.
|
||||
|
||||
me = SingleInstance()
|
||||
|
||||
This option is very useful if you have scripts executed by crontab at small amounts of time.
|
||||
|
||||
Remember that this works by creating a lock file with a filename based on the full path to the script file.
|
||||
"""
|
||||
def __init__(self, flavor_id=""):
|
||||
self.initialized = False
|
||||
appname = os.path.splitext(os.path.abspath(sys.argv[0]))[0]
|
||||
basename = ((appname + '-%s') % flavor_id).replace("/", "-").replace(":", "").replace("\\", "-") + '.lock'
|
||||
self.lockfile = os.path.normpath(tempfile.gettempdir() + '/' + basename)
|
||||
|
||||
|
||||
def __enter__(self):
|
||||
Log.note("SingleInstance.lockfile = " + self.lockfile)
|
||||
if sys.platform == 'win32':
|
||||
try:
|
||||
# file already exists, we try to remove (in case previous execution was interrupted)
|
||||
if os.path.exists(self.lockfile):
|
||||
os.unlink(self.lockfile)
|
||||
self.fd = os.open(self.lockfile, os.O_CREAT | os.O_EXCL | os.O_RDWR)
|
||||
except Exception, e:
|
||||
Log.note("\n"+
|
||||
"**********************************************************************\n"+
|
||||
"** Another instance is already running, quitting.\n"+
|
||||
"**********************************************************************\n"
|
||||
)
|
||||
sys.exit(-1)
|
||||
else: # non Windows
|
||||
import fcntl
|
||||
self.fp = open(self.lockfile, 'w')
|
||||
try:
|
||||
fcntl.lockf(self.fp, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||
except IOError:
|
||||
Log.note("\n"+
|
||||
"**********************************************************************\n"+
|
||||
"** Another instance is already running, quitting.\n"+
|
||||
"**********************************************************************\n"
|
||||
)
|
||||
sys.exit(-1)
|
||||
self.initialized = True
|
||||
|
||||
|
||||
def __exit__(self, type, value, traceback):
|
||||
self.__del__()
|
||||
|
||||
|
||||
def __del__(self):
|
||||
import sys
|
||||
import os
|
||||
|
||||
temp, self.initialized = self.initialized, False
|
||||
if not temp:
|
||||
return
|
||||
try:
|
||||
if sys.platform == 'win32':
|
||||
if hasattr(self, 'fd'):
|
||||
os.close(self.fd)
|
||||
os.unlink(self.lockfile)
|
||||
else:
|
||||
import fcntl
|
||||
fcntl.lockf(self.fp, fcntl.LOCK_UN)
|
||||
if os.path.isfile(self.lockfile):
|
||||
os.unlink(self.lockfile)
|
||||
except Exception as e:
|
||||
Log.warning("Problem with SingleInstance __del__()", e)
|
||||
sys.exit(-1)
|
||||
|
|
@ -1,143 +0,0 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
|
||||
|
||||
import codecs
|
||||
from datetime import datetime
|
||||
import io
|
||||
import os
|
||||
import shutil
|
||||
from .struct import listwrap, nvl
|
||||
from .cnv import CNV
|
||||
|
||||
|
||||
class File(object):
|
||||
def __init__(self, filename, buffering=2 ** 14):
|
||||
if filename == None:
|
||||
from .logs import Log
|
||||
Log.error("File must be given a filename")
|
||||
|
||||
#USE UNIX STANDARD
|
||||
self._filename = "/".join(filename.split(os.sep))
|
||||
self.buffering = buffering
|
||||
|
||||
|
||||
@property
|
||||
def filename(self):
|
||||
return self._filename.replace("/", os.sep)
|
||||
|
||||
@property
|
||||
def abspath(self):
|
||||
return os.path.abspath(self._filename)
|
||||
|
||||
def backup_name(self, timestamp=None):
|
||||
"""
|
||||
RETURN A FILENAME THAT CAN SERVE AS A BACKUP FOR THIS FILE
|
||||
"""
|
||||
suffix = CNV.datetime2string(nvl(timestamp, datetime.now()), "%Y%m%d_%H%M%S")
|
||||
parts = self._filename.split(".")
|
||||
if len(parts) == 1:
|
||||
output = self._filename + "." + suffix
|
||||
elif len(parts) > 1 and parts[-2][-1] == "/":
|
||||
output = self._filename + "." + suffix
|
||||
else:
|
||||
parts.insert(-1, suffix)
|
||||
output = ".".join(parts)
|
||||
return output
|
||||
|
||||
|
||||
def read(self, encoding="utf-8"):
|
||||
with codecs.open(self._filename, "r", encoding=encoding) as file:
|
||||
return file.read()
|
||||
|
||||
def read_ascii(self):
|
||||
if not self.parent.exists: self.parent.create()
|
||||
with open(self._filename, "r") as file:
|
||||
return file.read()
|
||||
|
||||
def write_ascii(self, content):
|
||||
if not self.parent.exists: self.parent.create()
|
||||
with open(self._filename, "w") as file:
|
||||
file.write(content)
|
||||
|
||||
def write(self, data):
|
||||
if not self.parent.exists: self.parent.create()
|
||||
with open(self._filename, "w") as file:
|
||||
for d in listwrap(data):
|
||||
file.write(d)
|
||||
|
||||
def __iter__(self):
|
||||
#NOT SURE HOW TO MAXIMIZE FILE READ SPEED
|
||||
#http://stackoverflow.com/questions/8009882/how-to-read-large-file-line-by-line-in-python
|
||||
#http://effbot.org/zone/wide-finder.htm
|
||||
def output():
|
||||
with io.open(self._filename, "rb") as f:
|
||||
for line in f:
|
||||
yield line.decode("utf-8")
|
||||
|
||||
return output()
|
||||
|
||||
def append(self, content):
|
||||
if not self.parent.exists:
|
||||
self.parent.create()
|
||||
with open(self._filename, "a") as output_file:
|
||||
output_file.write(content)
|
||||
|
||||
def add(self, content):
|
||||
return self.append(content)
|
||||
|
||||
def extend(self, content):
|
||||
if not self.parent.exists:
|
||||
self.parent.create()
|
||||
with open(self._filename, "a") as output_file:
|
||||
for c in content:
|
||||
output_file.write(c)
|
||||
|
||||
|
||||
|
||||
def delete(self):
|
||||
try:
|
||||
if os.path.isdir(self._filename):
|
||||
shutil.rmtree(self._filename)
|
||||
elif os.path.isfile(self._filename):
|
||||
os.remove(self._filename)
|
||||
return self
|
||||
except Exception, e:
|
||||
if e.strerror=="The system cannot find the path specified":
|
||||
return
|
||||
from .logs import Log
|
||||
Log.error("Could not remove file", e)
|
||||
|
||||
def backup(self):
|
||||
names=self._filename.split("/")[-1].split(".")
|
||||
if len(names)==1:
|
||||
backup=File(self._filename+".backup "+datetime.utcnow().strftime("%Y%m%d %H%i%s"))
|
||||
|
||||
|
||||
def create(self):
|
||||
try:
|
||||
os.makedirs(self._filename)
|
||||
except Exception, e:
|
||||
from .logs import Log
|
||||
Log.error("Could not make directory {{dir_name}}", {"dir_name":self._filename}, e)
|
||||
|
||||
|
||||
@property
|
||||
def parent(self):
|
||||
return File("/".join(self._filename.split("/")[:-1]))
|
||||
|
||||
@property
|
||||
def exists(self):
|
||||
if self._filename in ["", "."]: return True
|
||||
try:
|
||||
return os.path.exists(self._filename)
|
||||
except Exception, e:
|
||||
return False
|
|
@ -0,0 +1,446 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
from .jsons import json_encoder, use_pypy, UnicodeBuilder
|
||||
from .struct import StructList, Null, EmptyList
|
||||
from .structs.wraps import wrap, wrap_dot
|
||||
|
||||
DEBUG = True
|
||||
|
||||
# PARSE MODES
|
||||
ARRAY = 1 # PARSING INSIDE AN ARRAY
|
||||
VALUE = 3 # PARSING PROPERTY VALUE
|
||||
OBJECT = 4 # PARSING PROPERTY NAME
|
||||
|
||||
|
||||
def decode(json):
|
||||
"""
|
||||
THIS IS CURRENTLY 50% SLOWER THAN PyPy DEFAULT IMPLEMENTATION
|
||||
|
||||
THE INTENT IS TO NEVER ACTUALLY PARSE ARRAYS OF PRIMITIVE VALUES, RATHER FIND
|
||||
THE START AND END OF THOSE ARRAYS AND SIMPLY STRING COPY THEM TO THE
|
||||
INEVITABLE JSON OUTPUT
|
||||
"""
|
||||
var = ""
|
||||
curr = StructList()
|
||||
mode = ARRAY
|
||||
stack = StructList()
|
||||
# FIRST PASS SIMPLY GETS STRUCTURE
|
||||
i = 0
|
||||
while i < len(json):
|
||||
c = json[i]
|
||||
i += 1
|
||||
if mode == ARRAY:
|
||||
if c in [" ", "\t", "\n", "\r", ","]:
|
||||
pass
|
||||
elif c == "]":
|
||||
curr = stack.pop()
|
||||
if isinstance(curr, dict):
|
||||
mode = OBJECT
|
||||
else:
|
||||
mode = ARRAY
|
||||
elif c == "[":
|
||||
i, arr = jump_array(i, json)
|
||||
if arr is None:
|
||||
arr = []
|
||||
stack.append(curr)
|
||||
curr.append(arr)
|
||||
curr = arr
|
||||
mode = ARRAY
|
||||
else:
|
||||
curr.append(arr)
|
||||
elif c == "{":
|
||||
obj = {}
|
||||
stack.append(curr)
|
||||
curr.append(obj)
|
||||
curr = obj
|
||||
mode = OBJECT
|
||||
elif c == "\"":
|
||||
i, val = fast_parse_string(i, json)
|
||||
curr.children.append(val)
|
||||
else:
|
||||
i, val = parse_const(i, json)
|
||||
elif mode == OBJECT:
|
||||
if c in [" ", "\t", "\n", "\r", ","]:
|
||||
pass
|
||||
elif c == ":":
|
||||
mode = VALUE
|
||||
elif c == "}":
|
||||
curr = stack.pop()
|
||||
if isinstance(curr, dict):
|
||||
mode = OBJECT
|
||||
else:
|
||||
mode = ARRAY
|
||||
elif c == "\"":
|
||||
i, var = fast_parse_string(i, json)
|
||||
elif mode == VALUE:
|
||||
if c in [" ", "\t", "\n", "\r"]:
|
||||
pass
|
||||
elif c == "}":
|
||||
curr = stack.pop()
|
||||
if isinstance(curr, dict):
|
||||
mode = OBJECT
|
||||
else:
|
||||
mode = ARRAY
|
||||
elif c == "[":
|
||||
i, arr = jump_array(i, json)
|
||||
if arr is None:
|
||||
arr = []
|
||||
stack.append(curr)
|
||||
curr[var] = arr
|
||||
curr = arr
|
||||
mode = ARRAY
|
||||
else:
|
||||
curr[var] = arr
|
||||
mode = OBJECT
|
||||
elif c == "{":
|
||||
obj = {}
|
||||
stack.append(curr)
|
||||
curr[var] = obj
|
||||
curr = obj
|
||||
mode = OBJECT
|
||||
elif c == "\"":
|
||||
i, val = fast_parse_string(i, json)
|
||||
curr[var] = val
|
||||
mode = OBJECT
|
||||
else:
|
||||
i, val = parse_const(i, json)
|
||||
curr[var] = val
|
||||
mode = OBJECT
|
||||
|
||||
return curr[0]
|
||||
|
||||
|
||||
|
||||
|
||||
def fast_parse_string(i, json):
|
||||
simple = True
|
||||
j = i
|
||||
while True:
|
||||
c = json[j]
|
||||
j += 1
|
||||
if c == "\"":
|
||||
if simple:
|
||||
return j, json[i:j-1]
|
||||
else:
|
||||
return parse_string(i, json)
|
||||
elif c == "\\":
|
||||
simple = False
|
||||
c = json[j]
|
||||
if c == "u":
|
||||
j += 5
|
||||
elif c in ["\"", "\\", "/", "b", "n", "f", "n", "t"]:
|
||||
j += 1
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
ESC = {
|
||||
"\"": "\"",
|
||||
"\\": "\\",
|
||||
"/": "/",
|
||||
"b": "\b",
|
||||
"r": "\r",
|
||||
"f": "\f",
|
||||
"n": "\n",
|
||||
"t": "\t"
|
||||
}
|
||||
|
||||
|
||||
def parse_string(i, json):
|
||||
j = i
|
||||
output = UnicodeBuilder()
|
||||
while True:
|
||||
c = json[j]
|
||||
if c == "\"":
|
||||
return j + 1, output.build()
|
||||
elif c == "\\":
|
||||
j += 1
|
||||
c = json[j]
|
||||
if c == "u":
|
||||
n = json[j:j + 4].decode('hex').decode('utf-8')
|
||||
output.append(n)
|
||||
j += 4
|
||||
else:
|
||||
try:
|
||||
output.append(ESC[c])
|
||||
except Exception, e:
|
||||
output.append("\\")
|
||||
output.append(c)
|
||||
else:
|
||||
output.append(c)
|
||||
j += 1
|
||||
|
||||
|
||||
|
||||
def parse_array(i, json):
|
||||
"""
|
||||
ARRAY OF PRIMITIVES ARE SKIPPED, THIS IS WHERE WE PARSE THEM
|
||||
"""
|
||||
output = []
|
||||
val = None
|
||||
while True:
|
||||
c = json[i]
|
||||
i += 1
|
||||
if c in [" ", "\n", "\r", "\t"]:
|
||||
pass
|
||||
elif c == ",":
|
||||
output.append(val)
|
||||
val = Null
|
||||
elif c == "]":
|
||||
if val is not None:
|
||||
output.append(val)
|
||||
return i, output
|
||||
elif c == "[":
|
||||
i, val = parse_array(i, json)
|
||||
elif c == "\"":
|
||||
i, val = parse_string(i, json)
|
||||
else:
|
||||
i, val = parse_const(i, json)
|
||||
|
||||
|
||||
def jump_string(i, json):
|
||||
while True:
|
||||
c = json[i]
|
||||
i += 1
|
||||
if c == "\"":
|
||||
return i
|
||||
elif c == "\\":
|
||||
c = json[i]
|
||||
if c == "u":
|
||||
i += 5
|
||||
elif c in ["\"", "\\", "/", "b", "n", "f", "n", "t"]:
|
||||
i += 1
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
def jump_array(i, json):
|
||||
j = i
|
||||
empty = True
|
||||
depth = 0
|
||||
while True:
|
||||
c = json[j]
|
||||
j += 1
|
||||
if c == "{":
|
||||
return i, None
|
||||
elif c == "[":
|
||||
depth += 1
|
||||
elif c == "]":
|
||||
if depth == 0:
|
||||
if empty:
|
||||
return j, []
|
||||
else:
|
||||
return j, JSONList(json, i-1, j)
|
||||
else:
|
||||
depth -= 1
|
||||
elif c == "\"":
|
||||
empty = False
|
||||
j = jump_string(j, json)
|
||||
elif c not in [" ", "\t", "\r", "\n"]:
|
||||
empty = False
|
||||
|
||||
def parse_const(i, json):
|
||||
try:
|
||||
j = i
|
||||
mode = int
|
||||
while True:
|
||||
c = json[j]
|
||||
if c in [" ", "\t", "\n", "\r", ",", "}", "]"]:
|
||||
const = json[i-1:j]
|
||||
try:
|
||||
val = {
|
||||
"0": 0,
|
||||
"-1": -1,
|
||||
"1": 1,
|
||||
"true": True,
|
||||
"false": False,
|
||||
"null": None
|
||||
}[const]
|
||||
except Exception:
|
||||
val = mode(const)
|
||||
|
||||
return j, val
|
||||
elif c in [".", "e", "E"]:
|
||||
mode = float
|
||||
j += 1
|
||||
except Exception, e:
|
||||
from .env.logs import Log
|
||||
|
||||
Log.error("Can not parse const", e)
|
||||
|
||||
class JSONList(object):
|
||||
def __init__(self, json, s, e):
|
||||
self.json = json
|
||||
self.start = s
|
||||
self.end = e
|
||||
self.list = None
|
||||
|
||||
def _convert(self):
|
||||
if self.list is None:
|
||||
i, self.list = parse_array(self.start+1, self.json)
|
||||
|
||||
def __getitem__(self, index):
|
||||
self._convert()
|
||||
if isinstance(index, slice):
|
||||
# IMPLEMENT FLAT SLICES (for i not in range(0, len(self)): assert self[i]==None)
|
||||
if index.step is not None:
|
||||
from .env.logs import Log
|
||||
|
||||
Log.error("slice step must be None, do not know how to deal with values")
|
||||
length = len(self.list)
|
||||
|
||||
i = index.start
|
||||
i = min(max(i, 0), length)
|
||||
j = index.stop
|
||||
if j is None:
|
||||
j = length
|
||||
else:
|
||||
j = max(min(j, length), 0)
|
||||
return StructList(self.list[i:j])
|
||||
|
||||
if index < 0 or len(self.list) <= index:
|
||||
return Null
|
||||
return wrap(self.list[index])
|
||||
|
||||
def __setitem__(self, i, y):
|
||||
self._convert()
|
||||
self.json = None
|
||||
self.list[i] = unwrap(y)
|
||||
|
||||
def __iter__(self):
|
||||
self._convert()
|
||||
return (wrap(v) for v in self.list)
|
||||
|
||||
def __contains__(self, item):
|
||||
self._convert()
|
||||
return list.__contains__(self.list, item)
|
||||
|
||||
def append(self, val):
|
||||
self._convert()
|
||||
self.json = None
|
||||
self.list.append(unwrap(val))
|
||||
return self
|
||||
|
||||
def __str__(self):
|
||||
return self.json[self.start:self.end]
|
||||
|
||||
def __len__(self):
|
||||
self._convert()
|
||||
return self.list.__len__()
|
||||
|
||||
def __getslice__(self, i, j):
|
||||
from .env.logs import Log
|
||||
|
||||
Log.error("slicing is broken in Python 2.7: a[i:j] == a[i+len(a), j] sometimes. Use [start:stop:step]")
|
||||
|
||||
def copy(self):
|
||||
if self.list is not None:
|
||||
return list(self.list)
|
||||
return JSONList(self.json, self.start, self.end)
|
||||
|
||||
def remove(self, x):
|
||||
self._convert()
|
||||
self.json = None
|
||||
self.list.remove(x)
|
||||
return self
|
||||
|
||||
def extend(self, values):
|
||||
self._convert()
|
||||
self.json = None
|
||||
for v in values:
|
||||
self.list.append(unwrap(v))
|
||||
return self
|
||||
|
||||
def pop(self):
|
||||
self._convert()
|
||||
self.json = None
|
||||
return wrap(self.list.pop())
|
||||
|
||||
def __add__(self, value):
|
||||
self._convert()
|
||||
output = list(self.list)
|
||||
output.extend(value)
|
||||
return StructList(vals=output)
|
||||
|
||||
def __or__(self, value):
|
||||
self._convert()
|
||||
output = list(self.list)
|
||||
output.append(value)
|
||||
return StructList(vals=output)
|
||||
|
||||
def __radd__(self, other):
|
||||
self._convert()
|
||||
output = list(other)
|
||||
output.extend(self.list)
|
||||
return StructList(vals=output)
|
||||
|
||||
def right(self, num=None):
|
||||
"""
|
||||
WITH SLICES BEING FLAT, WE NEED A SIMPLE WAY TO SLICE FROM THE RIGHT
|
||||
"""
|
||||
self._convert()
|
||||
if num == None:
|
||||
return StructList([self.list[-1]])
|
||||
if num <= 0:
|
||||
return EmptyList
|
||||
return StructList(self.list[-num])
|
||||
|
||||
def leftBut(self, num):
|
||||
"""
|
||||
WITH SLICES BEING FLAT, WE NEED A SIMPLE WAY TO SLICE FROM THE LEFT [:-num:]
|
||||
"""
|
||||
self._convert()
|
||||
if num == None:
|
||||
return StructList([self.list[:-1:]])
|
||||
if num <= 0:
|
||||
return EmptyList
|
||||
return StructList(self.list[:-num:])
|
||||
|
||||
def last(self):
|
||||
"""
|
||||
RETURN LAST ELEMENT IN StructList
|
||||
"""
|
||||
self._convert()
|
||||
if self.list:
|
||||
return wrap(self.list[-1])
|
||||
return Null
|
||||
|
||||
def map(self, oper, includeNone=True):
|
||||
self._convert()
|
||||
if includeNone:
|
||||
return StructList([oper(v) for v in self.list])
|
||||
else:
|
||||
return StructList([oper(v) for v in self.list if v != None])
|
||||
|
||||
def __json__(self):
|
||||
if self.json is not None:
|
||||
return self.json[self.start:self.end]
|
||||
else:
|
||||
return json_encoder(self)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
if use_pypy:
|
||||
json_decoder = decode
|
||||
else:
|
||||
import json
|
||||
|
||||
builtin_json_decoder = json.JSONDecoder().decode
|
||||
json_decoder = builtin_json_decoder
|
||||
|
||||
if DEBUG:
|
||||
json_decoder = decode
|
|
@ -7,149 +7,199 @@
|
|||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
from datetime import datetime
|
||||
import time
|
||||
from decimal import Decimal
|
||||
import json
|
||||
from math import floor
|
||||
import re
|
||||
import time
|
||||
from datetime import datetime, date, timedelta
|
||||
from decimal import Decimal
|
||||
import sys
|
||||
from .collections import AND, MAX
|
||||
from .struct import Struct, StructList
|
||||
|
||||
json_decoder = json.JSONDecoder().decode
|
||||
|
||||
|
||||
# THIS FILE EXISTS TO SERVE AS A FAST REPLACEMENT FOR JSON ENCODING
|
||||
# THE DEFAULT JSON ENCODERS CAN NOT HANDLE A DIVERSITY OF TYPES *AND* BE FAST
|
||||
#
|
||||
# 1) WHEN USING cPython, WE HAVE NO COMPILER OPTIMIZATIONS: THE BEST STRATEGY IS TO
|
||||
# CONVERT THE MEMORY STRUCTURE TO STANDARD TYPES AND SEND TO THE INSANELY FAST
|
||||
# DEFAULT JSON ENCODER
|
||||
# 2) WHEN USING PYPY, WE USE CLEAR-AND-SIMPLE PROGRAMMING SO THE OPTIMIZER CAN DO
|
||||
# ITS JOB. ALONG WITH THE UnicodeBuilder WE GET NEAR C SPEEDS
|
||||
|
||||
|
||||
use_pypy = False
|
||||
try:
|
||||
# StringBuilder IS ABOUT 2x FASTER THAN list()
|
||||
from __pypy__.builders import StringBuilder
|
||||
# UnicodeBuilder IS ABOUT 2x FASTER THAN list()
|
||||
# use_pypy = True
|
||||
from __pypy__.builders import UnicodeBuilder
|
||||
|
||||
use_pypy = True
|
||||
except Exception, e:
|
||||
use_pypy = False
|
||||
class StringBuilder(list):
|
||||
if use_pypy:
|
||||
sys.stdout.write("The PyPy JSON serializer is in use! Currently running CPython, not a good mix.")
|
||||
|
||||
class UnicodeBuilder(list):
|
||||
def __init__(self, length=None):
|
||||
list.__init__(self)
|
||||
|
||||
def build(self):
|
||||
return u"".join(self)
|
||||
|
||||
append = StringBuilder.append
|
||||
append = UnicodeBuilder.append
|
||||
|
||||
class PyPyJSONEncoder(object):
|
||||
|
||||
def encode(value, pretty=False):
|
||||
"""
|
||||
pypy DOES NOT OPTIMIZE GENERATOR CODE WELL
|
||||
"""
|
||||
def __init__(self):
|
||||
object.__init__(self)
|
||||
if pretty:
|
||||
return pretty_json(value)
|
||||
|
||||
def encode(self, value, pretty=False):
|
||||
if pretty:
|
||||
return unicode(json.dumps(json_scrub(value), indent=4, sort_keys=True, separators=(',', ': ')))
|
||||
|
||||
_buffer = StringBuilder(1024)
|
||||
try:
|
||||
_buffer = UnicodeBuilder(1024)
|
||||
_value2json(value, _buffer)
|
||||
output = _buffer.build()
|
||||
return output
|
||||
except Exception, e:
|
||||
#THE PRETTY JSON WILL PROVIDE MORE DETAIL ABOUT THE SERIALIZATION CONCERNS
|
||||
from .env.logs import Log
|
||||
Log.warning("Serialization of JSON problems", e)
|
||||
try:
|
||||
return pretty_json(value)
|
||||
except Exception, f:
|
||||
Log.error("problem serializing object", f)
|
||||
|
||||
|
||||
class cPythonJSONEncoder(object):
|
||||
def __init__(self):
|
||||
object.__init__(self)
|
||||
|
||||
self.encoder = json.JSONEncoder(
|
||||
skipkeys=False,
|
||||
ensure_ascii=False, # DIFF FROM DEFAULTS
|
||||
check_circular=True,
|
||||
allow_nan=True,
|
||||
indent=None,
|
||||
separators=None,
|
||||
encoding='utf-8',
|
||||
default=None,
|
||||
sort_keys=False
|
||||
)
|
||||
|
||||
def encode(self, value, pretty=False):
|
||||
if value == None:
|
||||
return "null"
|
||||
|
||||
if pretty:
|
||||
return unicode(json.dumps(json_scrub(value), indent=4, sort_keys=True, separators=(',', ': ')))
|
||||
|
||||
return unicode(json.dumps(json_scrub(value)))
|
||||
|
||||
|
||||
# OH HUM, cPython with uJSON, OR pypy WITH BUILTIN JSON?
|
||||
# http://liangnuren.wordpress.com/2012/08/13/python-json-performance/
|
||||
# http://morepypy.blogspot.ca/2011/10/speeding-up-json-encoding-in-pypy.html
|
||||
if use_pypy:
|
||||
json_encoder = PyPyJSONEncoder()
|
||||
json_decoder = json._default_decoder
|
||||
else:
|
||||
json_encoder = cPythonJSONEncoder()
|
||||
json_decoder = json._default_decoder
|
||||
|
||||
|
||||
return pretty_json(value)
|
||||
|
||||
|
||||
return unicode(self.encoder.encode(json_scrub(value)))
|
||||
|
||||
|
||||
def _value2json(value, _buffer):
|
||||
if isinstance(value, basestring):
|
||||
_string2json(value, _buffer)
|
||||
elif value == None:
|
||||
append(_buffer, "null")
|
||||
if value == None:
|
||||
append(_buffer, u"null")
|
||||
return
|
||||
elif value is True:
|
||||
append(_buffer, 'true')
|
||||
append(_buffer, u"true")
|
||||
return
|
||||
elif value is False:
|
||||
append(_buffer, 'false')
|
||||
elif isinstance(value, (int, long, Decimal)):
|
||||
append(_buffer, str(value))
|
||||
elif isinstance(value, float):
|
||||
append(_buffer, repr(value))
|
||||
elif isinstance(value, datetime):
|
||||
append(_buffer, unicode(long(time.mktime(value.timetuple())*1000)))
|
||||
elif isinstance(value, dict):
|
||||
_dict2json(value, _buffer)
|
||||
elif hasattr(value, '__iter__'):
|
||||
append(_buffer, u"false")
|
||||
return
|
||||
|
||||
type = value.__class__
|
||||
if type in (dict, Struct):
|
||||
if value:
|
||||
_dict2json(value, _buffer)
|
||||
else:
|
||||
append(_buffer, u"{}")
|
||||
elif type is str:
|
||||
append(_buffer, u"\"")
|
||||
v = value.decode("utf8")
|
||||
for c in v:
|
||||
append(_buffer, ESCAPE_DCT.get(c, c))
|
||||
append(_buffer, u"\"")
|
||||
elif type is unicode:
|
||||
append(_buffer, u"\"")
|
||||
for c in value:
|
||||
append(_buffer, ESCAPE_DCT.get(c, c))
|
||||
append(_buffer, u"\"")
|
||||
elif type in (int, long, Decimal):
|
||||
append(_buffer, unicode(value))
|
||||
elif type is float:
|
||||
append(_buffer, unicode(repr(value)))
|
||||
elif type in (set, list, tuple, StructList):
|
||||
_list2json(value, _buffer)
|
||||
elif type is date:
|
||||
append(_buffer, unicode(long(time.mktime(value.timetuple()) * 1000)))
|
||||
elif type is datetime:
|
||||
append(_buffer, unicode(long(time.mktime(value.timetuple()) * 1000)))
|
||||
elif type is timedelta:
|
||||
append(_buffer, "\"")
|
||||
append(_buffer, unicode(value.total_seconds()))
|
||||
append(_buffer, "second\"")
|
||||
elif hasattr(value, '__json__'):
|
||||
j = value.__json__()
|
||||
append(_buffer, j)
|
||||
elif hasattr(value, '__iter__'):
|
||||
_iter2json(value, _buffer)
|
||||
else:
|
||||
raise Exception(repr(value)+" is not JSON serializable")
|
||||
raise Exception(repr(value) + " is not JSON serializable")
|
||||
|
||||
|
||||
def _list2json(value, _buffer):
|
||||
append(_buffer, "[")
|
||||
first = True
|
||||
if not value:
|
||||
append(_buffer, u"[]")
|
||||
else:
|
||||
sep = u"["
|
||||
for v in value:
|
||||
append(_buffer, sep)
|
||||
sep = u", "
|
||||
_value2json(v, _buffer)
|
||||
append(_buffer, u"]")
|
||||
|
||||
|
||||
def _iter2json(value, _buffer):
|
||||
append(_buffer, u"[")
|
||||
sep = u""
|
||||
for v in value:
|
||||
if first:
|
||||
first = False
|
||||
else:
|
||||
append(_buffer, ", ")
|
||||
append(_buffer, sep)
|
||||
sep = u", "
|
||||
_value2json(v, _buffer)
|
||||
append(_buffer, "]")
|
||||
append(_buffer, u"]")
|
||||
|
||||
|
||||
def _dict2json(value, _buffer):
|
||||
items = value.iteritems()
|
||||
|
||||
append(_buffer, "{")
|
||||
first = True
|
||||
prefix = u"{\""
|
||||
for k, v in value.iteritems():
|
||||
if first:
|
||||
first = False
|
||||
else:
|
||||
append(_buffer, ", ")
|
||||
_string2json(unicode(k), _buffer)
|
||||
append(_buffer, ": ")
|
||||
append(_buffer, prefix)
|
||||
prefix = u", \""
|
||||
if isinstance(k, str):
|
||||
k = k.decode("utf8")
|
||||
for c in k:
|
||||
append(_buffer, ESCAPE_DCT.get(c, c))
|
||||
append(_buffer, u"\": ")
|
||||
_value2json(v, _buffer)
|
||||
append(_buffer, "}")
|
||||
append(_buffer, u"}")
|
||||
|
||||
|
||||
special_find = u"\\\"\t\n\r".find
|
||||
replacement = [u"\\\\", u"\\\"", u"\\t", u"\\n", u"\\r"]
|
||||
|
||||
ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
|
||||
ESCAPE_DCT = {
|
||||
'\\': '\\\\',
|
||||
'"': '\\"',
|
||||
'\b': '\\b',
|
||||
'\f': '\\f',
|
||||
'\n': '\\n',
|
||||
'\r': '\\r',
|
||||
'\t': '\\t',
|
||||
u"\\": u"\\\\",
|
||||
u"\"": u"\\\"",
|
||||
u"\b": u"\\b",
|
||||
u"\f": u"\\f",
|
||||
u"\n": u"\\n",
|
||||
u"\r": u"\\r",
|
||||
u"\t": u"\\t",
|
||||
}
|
||||
for i in range(0x20):
|
||||
ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
|
||||
|
||||
|
||||
def _string2json(value, _buffer):
|
||||
def replace(match):
|
||||
return ESCAPE_DCT[match.group(0)]
|
||||
append(_buffer, "\"")
|
||||
append(_buffer, ESCAPE.sub(replace, value))
|
||||
append(_buffer, "\"")
|
||||
|
||||
ESCAPE_DCT.setdefault(chr(i), u'\\u{0:04x}'.format(i))
|
||||
|
||||
|
||||
#REMOVE VALUES THAT CAN NOT BE JSON-IZED
|
||||
|
@ -160,25 +210,240 @@ def json_scrub(value):
|
|||
def _scrub(value):
|
||||
if value == None:
|
||||
return None
|
||||
elif isinstance(value, datetime):
|
||||
return long(time.mktime(value.timetuple())*1000)
|
||||
|
||||
type = value.__class__
|
||||
|
||||
if type in (date, datetime):
|
||||
return datetime2milli(value)
|
||||
elif type is timedelta:
|
||||
return unicode(value.total_seconds()) + "second"
|
||||
elif type is str:
|
||||
return unicode(value.decode("utf8"))
|
||||
elif type is Decimal:
|
||||
return float(value)
|
||||
elif isinstance(value, dict):
|
||||
output = {}
|
||||
for k, v in value.iteritems():
|
||||
v = _scrub(v)
|
||||
output[k] = v
|
||||
return output
|
||||
elif type in (list, StructList):
|
||||
output = []
|
||||
for v in value:
|
||||
v = _scrub(v)
|
||||
output.append(v)
|
||||
return output
|
||||
elif type.__name__ == "bool_": # DEAR ME! Numpy has it's own booleans (value==False could be used, but 0==False in Python. DOH!)
|
||||
if value == False:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
elif hasattr(value, '__json__'):
|
||||
try:
|
||||
return json._default_decoder.decode(value.__json__())
|
||||
except Exception, e:
|
||||
from .env.logs import Log
|
||||
|
||||
Log.error("problem with calling __json__()", e)
|
||||
elif hasattr(value, '__iter__'):
|
||||
output = []
|
||||
for v in value:
|
||||
v = _scrub(v)
|
||||
output.append(v)
|
||||
return output
|
||||
elif isinstance(value, Decimal):
|
||||
return float(value)
|
||||
else:
|
||||
return value
|
||||
|
||||
|
||||
ARRAY_ROW_LENGTH = 80
|
||||
ARRAY_ITEM_MAX_LENGTH = 30
|
||||
ARRAY_MAX_COLUMNS = 10
|
||||
INDENT = " "
|
||||
|
||||
|
||||
def pretty_json(value):
|
||||
try:
|
||||
if value == None:
|
||||
return "null"
|
||||
elif isinstance(value, basestring):
|
||||
if isinstance(value, str):
|
||||
try:
|
||||
value = value.decode("utf8")
|
||||
except Exception, e:
|
||||
from .env.logs import Log
|
||||
|
||||
value = unicode(value.decode("latin1"))
|
||||
Log.warning("Should not have latin1 encoded strings: {{value}}", {"value": value}, e)
|
||||
try:
|
||||
return quote(value)
|
||||
except Exception, e:
|
||||
from .env.logs import Log
|
||||
|
||||
try:
|
||||
Log.note("try explicit convert of string with length {{length}}", {"length": len(value)})
|
||||
acc = [u"\""]
|
||||
for c in value:
|
||||
try:
|
||||
try:
|
||||
c2 = ESCAPE_DCT[c]
|
||||
except Exception, h:
|
||||
c2 = c
|
||||
c3 = unicode(c2)
|
||||
acc.append(c3)
|
||||
except BaseException, g:
|
||||
pass
|
||||
# Log.warning("odd character {{ord}} found in string. Ignored.", {"ord": ord(c)}, g)
|
||||
acc.append(u"\"")
|
||||
output = u"".join(acc)
|
||||
Log.note("return value of length {{length}}", {"length": len(output)})
|
||||
return output
|
||||
except BaseException, f:
|
||||
Log.warning("can not even explicit convert", f)
|
||||
return "null"
|
||||
elif isinstance(value, dict):
|
||||
try:
|
||||
if not value:
|
||||
return "{}"
|
||||
items = list(value.items())
|
||||
if len(items) == 1:
|
||||
return "{\"" + items[0][0] + "\": " + pretty_json(items[0][1]).strip() + "}"
|
||||
|
||||
items = sorted(items, lambda a, b: value_compare(a[0], b[0]))
|
||||
values = [quote(k)+": " + indent(pretty_json(v)).strip() for k, v in items if v != None]
|
||||
return "{\n" + INDENT + (",\n"+INDENT).join(values) + "\n}"
|
||||
except Exception, e:
|
||||
from .env.logs import Log
|
||||
from .collections import OR
|
||||
|
||||
if OR(not isinstance(k, basestring) for k in value.keys()):
|
||||
Log.error("JSON must have string keys: {{keys}}:", {
|
||||
"keys": [k for k in value.keys()]
|
||||
}, e)
|
||||
|
||||
Log.error("problem making dict pretty: keys={{keys}}:", {
|
||||
"keys": [k for k in value.keys()]
|
||||
}, e)
|
||||
elif isinstance(value, list):
|
||||
if not value:
|
||||
return "[]"
|
||||
|
||||
if ARRAY_MAX_COLUMNS==1:
|
||||
return "[\n" + ",\n".join([indent(pretty_json(v)) for v in value]) + "\n]"
|
||||
|
||||
if len(value) == 1:
|
||||
j = pretty_json(value[0])
|
||||
if j.find("\n") >= 0:
|
||||
return "[\n" + indent(j) + "\n]"
|
||||
else:
|
||||
return "[" + j + "]"
|
||||
|
||||
js = [pretty_json(v) for v in value]
|
||||
max_len = MAX(len(j) for j in js)
|
||||
if max_len <= ARRAY_ITEM_MAX_LENGTH and AND(j.find("\n") == -1 for j in js):
|
||||
#ALL TINY VALUES
|
||||
num_columns = max(1, min(ARRAY_MAX_COLUMNS, int(floor((ARRAY_ROW_LENGTH + 2.0)/float(max_len+2))))) # +2 TO COMPENSATE FOR COMMAS
|
||||
if len(js)<=num_columns: # DO NOT ADD \n IF ONLY ONE ROW
|
||||
return "[" + ", ".join(js) + "]"
|
||||
if num_columns == 1: # DO NOT rjust IF THERE IS ONLY ONE COLUMN
|
||||
return "[\n" + ",\n".join([indent(pretty_json(v)) for v in value]) + "\n]"
|
||||
|
||||
content = ",\n".join(
|
||||
", ".join(j.rjust(max_len) for j in js[r:r+num_columns])
|
||||
for r in xrange(0, len(js), num_columns)
|
||||
)
|
||||
return "[\n" + indent(content) + "\n]"
|
||||
|
||||
pretty_list = [pretty_json(v) for v in value]
|
||||
|
||||
output = "[\n"
|
||||
for i, p in enumerate(pretty_list):
|
||||
try:
|
||||
if i > 0:
|
||||
output += ",\n"
|
||||
output += indent(p)
|
||||
except Exception, e:
|
||||
from .env.logs import Log
|
||||
|
||||
Log.warning("problem concatenating string of length {{len1}} and {{len2}}", {
|
||||
"len1": len(output),
|
||||
"len2": len(p)
|
||||
})
|
||||
return output + "\n]"
|
||||
elif hasattr(value, '__json__'):
|
||||
j = value.__json__()
|
||||
if j == None:
|
||||
return " null " # TODO: FIND OUT WHAT CAUSES THIS
|
||||
return pretty_json(json_decoder(j))
|
||||
elif hasattr(value, '__iter__'):
|
||||
return pretty_json(list(value))
|
||||
else:
|
||||
return encode(value)
|
||||
|
||||
except Exception, e:
|
||||
from .env.logs import Log
|
||||
|
||||
Log.error("Problem turning value ({{value}}) to json", {"value": repr(value)}, e)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
ESCAPE = re.compile(ur'[\x00-\x1f\\"\b\f\n\r\t]')
|
||||
def replace(match):
|
||||
return ESCAPE_DCT[match.group(0)]
|
||||
def quote(value):
|
||||
return "\""+ESCAPE.sub(replace, value)+"\""
|
||||
|
||||
|
||||
def indent(value, prefix=INDENT):
|
||||
try:
|
||||
content = value.rstrip()
|
||||
suffix = value[len(content):]
|
||||
lines = content.splitlines()
|
||||
return prefix + (u"\n" + prefix).join(lines) + suffix
|
||||
except Exception, e:
|
||||
raise Exception(u"Problem with indent of value (" + e.message + u")\n" + value)
|
||||
|
||||
|
||||
def value_compare(a, b):
|
||||
if a == None:
|
||||
if b == None:
|
||||
return 0
|
||||
return -1
|
||||
elif b == None:
|
||||
return 1
|
||||
|
||||
if a > b:
|
||||
return 1
|
||||
elif a < b:
|
||||
return -1
|
||||
else:
|
||||
return 0
|
||||
|
||||
|
||||
def datetime2milli(d):
|
||||
try:
|
||||
if d == None:
|
||||
return None
|
||||
elif isinstance(d, datetime):
|
||||
epoch = datetime(1970, 1, 1)
|
||||
elif isinstance(d, date):
|
||||
epoch = date(1970, 1, 1)
|
||||
else:
|
||||
raise Exception("Can not convert "+repr(d)+" to json")
|
||||
|
||||
diff = d - epoch
|
||||
return long(diff.total_seconds()) * 1000L + long(diff.microseconds / 1000)
|
||||
except Exception, e:
|
||||
raise Exception("Can not convert "+repr(d)+" to json", e)
|
||||
|
||||
|
||||
|
||||
|
||||
# OH HUM, cPython with uJSON, OR pypy WITH BUILTIN JSON?
|
||||
# http://liangnuren.wordpress.com/2012/08/13/python-json-performance/
|
||||
# http://morepypy.blogspot.ca/2011/10/speeding-up-json-encoding-in-pypy.html
|
||||
if use_pypy:
|
||||
json_encoder = encode
|
||||
else:
|
||||
json_encoder = cPythonJSONEncoder().encode
|
||||
|
|
|
@ -1,480 +0,0 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
import traceback
|
||||
import logging
|
||||
import sys
|
||||
from .struct import listwrap, nvl
|
||||
|
||||
import struct, threads
|
||||
from .strings import indent, expand_template
|
||||
from .threads import Thread
|
||||
|
||||
DEBUG_LOGGING = False
|
||||
ERROR="ERROR"
|
||||
WARNING="WARNING"
|
||||
NOTE="NOTE"
|
||||
|
||||
main_log = None
|
||||
logging_multi = None
|
||||
|
||||
|
||||
|
||||
class Log(object):
|
||||
"""
|
||||
FOR STRUCTURED LOGGING AND EXCEPTION CHAINING
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def new_instance(cls, settings):
|
||||
settings=struct.wrap(settings)
|
||||
if settings["class"]:
|
||||
if not settings["class"].startswith("logging.handlers."):
|
||||
return make_log_from_settings(settings)
|
||||
# elif settings["class"]=="sys.stdout":
|
||||
#CAN BE SUPER SLOW
|
||||
else:
|
||||
return Log_usingLogger(settings)
|
||||
if settings.file: return Log_usingFile(file)
|
||||
if settings.filename: return Log_usingFile(settings.filename)
|
||||
if settings.stream: return Log_usingStream(settings.stream)
|
||||
|
||||
@classmethod
|
||||
def add_log(cls, log):
|
||||
logging_multi.add_log(log)
|
||||
|
||||
|
||||
@staticmethod
|
||||
def debug(template=None, params=None):
|
||||
"""
|
||||
USE THIS FOR DEBUGGING (AND EVENTUAL REMOVAL)
|
||||
"""
|
||||
Log.note(nvl(template, ""), params)
|
||||
|
||||
|
||||
@staticmethod
|
||||
def println(template, params=None):
|
||||
Log.note(template, params)
|
||||
|
||||
@staticmethod
|
||||
def note(template, params=None):
|
||||
template="{{log_timestamp}} - "+template
|
||||
params = nvl(params, {}).copy()
|
||||
|
||||
#NICE TO GATHER MANY MORE ITEMS FOR LOGGING (LIKE STACK TRACES AND LINE NUMBERS)
|
||||
params["log_timestamp"]=datetime.utcnow().strftime("%H:%M:%S")
|
||||
|
||||
main_log.write(template, params)
|
||||
|
||||
|
||||
@staticmethod
|
||||
def warning(template, params=None, cause=None):
|
||||
if isinstance(params, BaseException):
|
||||
cause=params
|
||||
params = None
|
||||
|
||||
if cause and not isinstance(cause, Except):
|
||||
cause=Except(WARNING, unicode(cause), trace=format_trace(traceback.extract_tb(sys.exc_info()[2]), 0))
|
||||
|
||||
e = Except(WARNING, template, params, cause, format_trace(traceback.extract_stack(), 1))
|
||||
Log.note(unicode(e))
|
||||
|
||||
|
||||
#raise an exception with a trace for the cause too
|
||||
@staticmethod
|
||||
def error(
|
||||
template, #human readable template
|
||||
params=None, #parameters for template
|
||||
cause=None, #pausible cause
|
||||
offset=0 #stack trace offset (==1 if you do not want to report self)
|
||||
):
|
||||
if params and isinstance(struct.listwrap(params)[0], BaseException):
|
||||
cause=params
|
||||
params = None
|
||||
|
||||
if cause == None:
|
||||
cause = []
|
||||
elif isinstance(cause, list):
|
||||
pass
|
||||
elif isinstance(cause, Except):
|
||||
cause = [cause]
|
||||
else:
|
||||
cause = [Except(ERROR, unicode(cause), trace=format_trace(traceback.extract_tb(sys.exc_info()[2]), offset))]
|
||||
|
||||
trace=format_trace(traceback.extract_stack(), 1+offset)
|
||||
e=Except(ERROR, template, params, cause, trace)
|
||||
raise e
|
||||
|
||||
|
||||
#RUN ME FIRST TO SETUP THE THREADED LOGGING
|
||||
@staticmethod
|
||||
def start(settings=None):
|
||||
##http://victorlin.me/2012/08/good-logging-practice-in-python/
|
||||
if not settings: return
|
||||
if not settings.log: return
|
||||
|
||||
globals()["logging_multi"]=Log_usingMulti()
|
||||
globals()["main_log"] = Log_usingThread(logging_multi)
|
||||
|
||||
for log in listwrap(settings.log):
|
||||
Log.add_log(Log.new_instance(log))
|
||||
|
||||
|
||||
@staticmethod
|
||||
def stop():
|
||||
main_log.stop()
|
||||
|
||||
|
||||
|
||||
def write(self):
|
||||
Log.error("not implemented")
|
||||
|
||||
|
||||
def format_trace(tbs, trim=0):
|
||||
tbs.reverse()
|
||||
list = []
|
||||
for filename, lineno, name, line in tbs[trim:]:
|
||||
item = 'at File "%s", line %d, in %s\n' % (filename.replace("\\", "/"), lineno, name)
|
||||
list.append(item)
|
||||
return "".join(list)
|
||||
|
||||
|
||||
#def format_trace(tb, trim=0):
|
||||
# list = []
|
||||
# for filename, lineno, name, line in traceback.extract_tb(tb)[0:-trim]:
|
||||
# item = 'File "%s", line %d, in %s\n' % (filename,lineno,name)
|
||||
# if line:
|
||||
# item = item + '\t%s\n' % line.strip()
|
||||
# list.append(item)
|
||||
# return "".join(list)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class Except(Exception):
|
||||
def __init__(self, type=ERROR, template=None, params=None, cause=None, trace=None):
|
||||
super(Exception, self).__init__(self)
|
||||
self.type=type
|
||||
self.template=template
|
||||
self.params=params
|
||||
self.cause=cause
|
||||
self.trace=trace
|
||||
|
||||
@property
|
||||
def message(self):
|
||||
return unicode(self)
|
||||
|
||||
def contains(self, value):
|
||||
if self.type==value:
|
||||
return True
|
||||
for c in self.cause:
|
||||
if c.contains(value):
|
||||
return True
|
||||
return False
|
||||
|
||||
def __str__(self):
|
||||
output=self.type+": "+self.template
|
||||
if self.params: output=expand_template(output, self.params)
|
||||
|
||||
if self.trace:
|
||||
output+="\n"+indent(self.trace)
|
||||
|
||||
|
||||
if self.cause:
|
||||
output+="\ncaused by\n\t"+"\nand caused by\n\t".join([c.__str__() for c in self.cause])
|
||||
|
||||
return output+"\n"
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class BaseLog(object):
|
||||
def write(self, template, params):
|
||||
pass
|
||||
|
||||
def stop(self):
|
||||
pass
|
||||
|
||||
|
||||
|
||||
class Log_usingFile(BaseLog):
|
||||
|
||||
def __init__(self, file):
|
||||
assert file
|
||||
|
||||
from files import File
|
||||
self.file=File(file)
|
||||
if self.file.exists:
|
||||
self.file.backup()
|
||||
self.file.delete()
|
||||
|
||||
self.file_lock=threads.Lock()
|
||||
|
||||
|
||||
def write(self, template, params):
|
||||
from files import File
|
||||
with self.file_lock:
|
||||
File(self.filename).append(expand_template(template, params))
|
||||
|
||||
|
||||
|
||||
#WRAP PYTHON CLASSIC logger OBJECTS
|
||||
class Log_usingLogger(BaseLog):
|
||||
def __init__(self, settings):
|
||||
self.logger=logging.Logger("unique name", level=logging.INFO)
|
||||
self.logger.addHandler(make_log_from_settings(settings))
|
||||
|
||||
# TURNS OUT LOGGERS ARE REALLY SLOW TOO
|
||||
self.queue = threads.Queue()
|
||||
self.thread = Thread("log to logger", time_delta_pusher, appender=self.logger.info, queue=self.queue, interval=timedelta(seconds=0.3))
|
||||
self.thread.start()
|
||||
|
||||
def write(self, template, params):
|
||||
# http://docs.python.org/2/library/logging.html#logging.LogRecord
|
||||
self.queue.add({"template": template, "params": params})
|
||||
|
||||
def stop(self):
|
||||
try:
|
||||
if DEBUG_LOGGING:
|
||||
sys.stdout.write("Log_usingLogger sees stop, adding stop to queue\n")
|
||||
self.queue.add(Thread.STOP) #BE PATIENT, LET REST OF MESSAGE BE SENT
|
||||
self.thread.join()
|
||||
if DEBUG_LOGGING:
|
||||
sys.stdout.write("Log_usingLogger done\n")
|
||||
except Exception, e:
|
||||
pass
|
||||
|
||||
try:
|
||||
self.queue.close()
|
||||
except Exception, f:
|
||||
pass
|
||||
|
||||
|
||||
|
||||
|
||||
def make_log_from_settings(settings):
|
||||
assert settings["class"]
|
||||
|
||||
# IMPORT MODULE FOR HANDLER
|
||||
path=settings["class"].split(".")
|
||||
class_name=path[-1]
|
||||
path=".".join(path[:-1])
|
||||
temp=__import__(path, globals(), locals(), [class_name], -1)
|
||||
constructor=object.__getattribute__(temp, class_name)
|
||||
|
||||
#IF WE NEED A FILE, MAKE SURE DIRECTORY EXISTS
|
||||
if settings.filename:
|
||||
from files import File
|
||||
f = File(settings.filename)
|
||||
if not f.parent.exists:
|
||||
f.parent.create()
|
||||
|
||||
params = settings.dict
|
||||
del params['class']
|
||||
return constructor(**params)
|
||||
|
||||
|
||||
def time_delta_pusher(please_stop, appender, queue, interval):
|
||||
"""
|
||||
appender - THE FUNCTION THAT ACCEPTS A STRING
|
||||
queue - FILLED WITH LINES TO WRITE
|
||||
interval - timedelta
|
||||
USE IN A THREAD TO BATCH LOGS BY TIME INTERVAL
|
||||
"""
|
||||
|
||||
if not isinstance(interval, timedelta):
|
||||
Log.error("Expecting interval to be a timedelta")
|
||||
|
||||
next_run = datetime.utcnow() + interval
|
||||
|
||||
while not please_stop:
|
||||
Thread.sleep(till=next_run)
|
||||
next_run = datetime.utcnow() + interval
|
||||
logs = queue.pop_all()
|
||||
if logs:
|
||||
lines = []
|
||||
for log in logs:
|
||||
try:
|
||||
if log == Thread.STOP:
|
||||
please_stop.go()
|
||||
next_run = datetime.utcnow()
|
||||
else:
|
||||
lines.append(expand_template(log.get("template", None), log.get("params", None)))
|
||||
except Exception, e:
|
||||
if DEBUG_LOGGING:
|
||||
sys.stdout.write("Trouble formatting logs: "+e.message)
|
||||
raise e
|
||||
try:
|
||||
if DEBUG_LOGGING and please_stop:
|
||||
sys.stdout.write("Last call to appender with "+str(len(lines))+" lines\n")
|
||||
appender(u"\n".join(lines)+u"\n")
|
||||
if DEBUG_LOGGING and please_stop:
|
||||
sys.stdout.write("Done call to appender with "+str(len(lines))+" lines\n")
|
||||
except Exception, e:
|
||||
if DEBUG_LOGGING:
|
||||
sys.stdout.write("Trouble with appender: "+e.message)
|
||||
raise e
|
||||
|
||||
|
||||
class Log_usingStream(BaseLog):
|
||||
#stream CAN BE AN OBJCET WITH write() METHOD, OR A STRING
|
||||
#WHICH WILL eval() TO ONE
|
||||
def __init__(self, stream):
|
||||
assert stream
|
||||
|
||||
use_UTF8 = False
|
||||
|
||||
if isinstance(stream, basestring):
|
||||
if stream.startswith("sys."):
|
||||
use_UTF8 = True #sys.* ARE OLD AND CAN NOT HANDLE unicode
|
||||
self.stream = eval(stream)
|
||||
name = stream
|
||||
else:
|
||||
self.stream = stream
|
||||
name = "stream"
|
||||
|
||||
#WRITE TO STREAMS CAN BE *REALLY* SLOW, WE WILL USE A THREAD
|
||||
from threads import Queue
|
||||
|
||||
if use_UTF8:
|
||||
def utf8_appender(value):
|
||||
if isinstance(value, unicode):
|
||||
value = value.encode('utf-8')
|
||||
self.stream.write(value)
|
||||
|
||||
appender = utf8_appender
|
||||
else:
|
||||
appender = self.stream.write
|
||||
|
||||
self.queue = Queue()
|
||||
self.thread = Thread("log to " + name, time_delta_pusher, appender=appender, queue=self.queue, interval=timedelta(seconds=0.3))
|
||||
self.thread.start()
|
||||
|
||||
|
||||
def write(self, template, params):
|
||||
try:
|
||||
self.queue.add({"template": template, "params": params})
|
||||
return self
|
||||
except Exception, e:
|
||||
raise e #OH NO!
|
||||
|
||||
def stop(self):
|
||||
try:
|
||||
if DEBUG_LOGGING:
|
||||
sys.stdout.write("Log_usingStream sees stop, adding stop to queue\n")
|
||||
self.queue.add(Thread.STOP) #BE PATIENT, LET REST OF MESSAGE BE SENT
|
||||
self.thread.join()
|
||||
if DEBUG_LOGGING:
|
||||
sys.stdout.write("Log_usingStream done\n")
|
||||
except Exception, e:
|
||||
if DEBUG_LOGGING:
|
||||
raise e
|
||||
|
||||
try:
|
||||
self.queue.close()
|
||||
except Exception, f:
|
||||
if DEBUG_LOGGING:
|
||||
raise f
|
||||
|
||||
|
||||
|
||||
class Log_usingThread(BaseLog):
|
||||
def __init__(self, logger):
|
||||
#DELAYED LOAD FOR THREADS MODULE
|
||||
from threads import Queue
|
||||
|
||||
self.queue=Queue()
|
||||
self.logger=logger
|
||||
|
||||
def worker(please_stop):
|
||||
while not please_stop:
|
||||
Thread.sleep(1)
|
||||
logs = self.queue.pop_all()
|
||||
for log in logs:
|
||||
if log==Thread.STOP:
|
||||
if DEBUG_LOGGING:
|
||||
sys.stdout.write("Log_usingThread.worker() sees stop, filling rest of queue\n")
|
||||
please_stop.go()
|
||||
else:
|
||||
self.logger.write(**log)
|
||||
self.thread=Thread("log thread", worker)
|
||||
self.thread.start()
|
||||
|
||||
def write(self, template, params):
|
||||
try:
|
||||
self.queue.add({"template":template, "params":params})
|
||||
return self
|
||||
except Exception, e:
|
||||
sys.stdout.write("IF YOU SEE THIS, IT IS LIKELY YOU FORGOT TO RUN Log.start() FIRST\n")
|
||||
raise e #OH NO!
|
||||
|
||||
def stop(self):
|
||||
try:
|
||||
if DEBUG_LOGGING:
|
||||
sys.stdout.write("injecting stop into queue\n")
|
||||
self.queue.add(Thread.STOP) #BE PATIENT, LET REST OF MESSAGE BE SENT
|
||||
self.thread.join()
|
||||
if DEBUG_LOGGING:
|
||||
sys.stdout.write("Log_usingThread telling logger to stop\n")
|
||||
self.logger.stop()
|
||||
except Exception, e:
|
||||
if DEBUG_LOGGING:
|
||||
raise e
|
||||
|
||||
|
||||
try:
|
||||
self.queue.close()
|
||||
except Exception, f:
|
||||
if DEBUG_LOGGING:
|
||||
raise f
|
||||
|
||||
|
||||
|
||||
class Log_usingMulti(BaseLog):
|
||||
def __init__(self):
|
||||
self.many=[]
|
||||
|
||||
def write(self, template, params):
|
||||
for m in self.many:
|
||||
try:
|
||||
m.write(template, params)
|
||||
except Exception, e:
|
||||
pass
|
||||
return self
|
||||
|
||||
def add_log(self, logger):
|
||||
self.many.append(logger)
|
||||
return self
|
||||
|
||||
def remove_log(self, logger):
|
||||
self.many.remove(logger)
|
||||
return self
|
||||
|
||||
def clear_log(self):
|
||||
self.many=[]
|
||||
|
||||
def stop(self):
|
||||
for m in self.many:
|
||||
try:
|
||||
m.stop()
|
||||
except Exception, e:
|
||||
pass
|
||||
|
||||
|
||||
|
||||
if not main_log:
|
||||
main_log = Log_usingStream("sys.stdout")
|
|
@ -1,115 +0,0 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
import math
|
||||
from . import struct
|
||||
from .struct import Null, nvl
|
||||
from .logs import Log
|
||||
from .strings import find_first
|
||||
|
||||
class Math(object):
|
||||
|
||||
@staticmethod
|
||||
def bayesian_add(a, b):
|
||||
if a>=1 or b>=1 or a<=0 or b<=0: Log.error("Only allowed values *between* zero and one")
|
||||
return a*b/(a*b+(1-a)*(1-b))
|
||||
|
||||
|
||||
|
||||
# FOR GOODNESS SAKE - IF YOU PROVIDE A METHOD abs(), PLEASE PROVIDE IT'S COMPLEMENT
|
||||
# x = abs(x)*sign(x)
|
||||
# FOUND IN numpy, BUT WE USUALLY DO NOT NEED TO BRING IN A BIG LIB FOR A SIMPLE DECISION
|
||||
@staticmethod
|
||||
def sign(v):
|
||||
if v<0: return -1
|
||||
if v>0: return +1
|
||||
return 0
|
||||
|
||||
|
||||
|
||||
@staticmethod
|
||||
def is_number(s):
|
||||
try:
|
||||
float(s)
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def is_integer(s):
|
||||
try:
|
||||
if float(s)==round(float(s), 0):
|
||||
return True
|
||||
return False
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def round_sci(value, decimal=None, digits=None):
|
||||
if digits != None:
|
||||
m=pow(10, math.floor(math.log10(digits)))
|
||||
return round(value/m, digits)*m
|
||||
|
||||
return round(value, decimal)
|
||||
|
||||
|
||||
@staticmethod
|
||||
def floor(value, mod=None):
|
||||
"""
|
||||
x == floor(x, a) + mod(x, a) FOR ALL a
|
||||
"""
|
||||
mod = nvl(mod, 1)
|
||||
v = int(math.floor(value))
|
||||
return v - (v % mod)
|
||||
|
||||
|
||||
#RETURN A VALUE CLOSE TO value, BUT WITH SHORTER len(unicode(value))<len(unicode(value)):
|
||||
@staticmethod
|
||||
def approx_str(value):
|
||||
v=unicode(value)
|
||||
d=v.find(".")
|
||||
if d==-1: return value
|
||||
|
||||
i=find_first(v, ["9999", "0000"], d)
|
||||
if i==-1: return value
|
||||
|
||||
return Math.round_sci(value, decimal=i-d-1)
|
||||
|
||||
|
||||
@staticmethod
|
||||
def min(values):
|
||||
output = Null
|
||||
for v in values:
|
||||
if v == None:
|
||||
continue
|
||||
if math.isnan(v):
|
||||
continue
|
||||
if output == None:
|
||||
output = v
|
||||
continue
|
||||
output = min(output, v)
|
||||
return output
|
||||
|
||||
|
||||
|
||||
@staticmethod
|
||||
def max(values):
|
||||
output = Null
|
||||
for v in values:
|
||||
if v == None:
|
||||
continue
|
||||
if math.isnan(v):
|
||||
continue
|
||||
if output == None:
|
||||
output = v
|
||||
continue
|
||||
output = max(output, v)
|
||||
return output
|
||||
|
||||
|
|
@ -0,0 +1,163 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
import math
|
||||
import __builtin__
|
||||
from ..struct import Null, nvl
|
||||
from ..env.logs import Log
|
||||
from ..strings import find_first
|
||||
from . import stats
|
||||
|
||||
|
||||
class Math(object):
|
||||
"""
|
||||
MATH FUNCTIONS THAT ASSUME None IMPLY *NOT APPLICABLE* RATHER THAN *MISSING*
|
||||
LET "." BE SOME OPERATOR (+, -, *, etc)
|
||||
a.None == None
|
||||
None.a == None
|
||||
.None == None
|
||||
func(None, *kwargs)) == None
|
||||
"""
|
||||
|
||||
|
||||
@staticmethod
|
||||
def bayesian_add(*args):
|
||||
a = args[0]
|
||||
if a >= 1 or a <= 0:
|
||||
Log.error("Only allowed values *between* zero and one")
|
||||
|
||||
for b in args[1:]:
|
||||
if b == None:
|
||||
continue
|
||||
if b >= 1 or b <= 0:
|
||||
Log.error("Only allowed values *between* zero and one")
|
||||
a = a * b / (a * b + (1 - a) * (1 - b))
|
||||
|
||||
return a
|
||||
|
||||
@staticmethod
|
||||
def bayesian_subtract(a, b):
|
||||
return Math.bayesian_add(a, 1 - b)
|
||||
|
||||
|
||||
@staticmethod
|
||||
def abs(v):
|
||||
if v == None:
|
||||
return Null
|
||||
return abs(v)
|
||||
|
||||
@staticmethod
|
||||
def log(v, base=None):
|
||||
if v == None:
|
||||
return Null
|
||||
return math.log(v, base)
|
||||
|
||||
@staticmethod
|
||||
def log10(v):
|
||||
try:
|
||||
return math.log(v, 10)
|
||||
except Exception, e:
|
||||
return Null
|
||||
|
||||
# FOR GOODNESS SAKE - IF YOU PROVIDE A METHOD abs(), PLEASE PROVIDE ITS COMPLEMENT
|
||||
# x = abs(x)*sign(x)
|
||||
# FOUND IN numpy, BUT WE USUALLY DO NOT NEED TO BRING IN A BIG LIB FOR A SIMPLE DECISION
|
||||
@staticmethod
|
||||
def sign(v):
|
||||
if v == None:
|
||||
return Null
|
||||
if v < 0:
|
||||
return -1
|
||||
if v > 0:
|
||||
return +1
|
||||
return 0
|
||||
|
||||
|
||||
@staticmethod
|
||||
def is_number(s):
|
||||
try:
|
||||
float(s)
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def is_nan(s):
|
||||
return math.isnan(s)
|
||||
|
||||
@staticmethod
|
||||
def is_integer(s):
|
||||
try:
|
||||
if float(s) == round(float(s), 0):
|
||||
return True
|
||||
return False
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def round(value, decimal=0, digits=None):
|
||||
"""
|
||||
ROUND TO GIVEN NUMBER OF DIGITS, OR GIVEN NUMBER OF DECIMAL PLACES
|
||||
decimal - NUMBER OF SIGNIFICANT DIGITS (LESS THAN 1 IS INVALID)
|
||||
digits - NUMBER OF DIGITS AFTER DECIMAL POINT (NEGATIVE IS VALID)
|
||||
"""
|
||||
if value == None:
|
||||
return None
|
||||
|
||||
if digits != None:
|
||||
m = pow(10, math.ceil(math.log10(value)))
|
||||
return __builtin__.round(value / m, digits) * m
|
||||
|
||||
return __builtin__.round(value, decimal)
|
||||
|
||||
|
||||
@staticmethod
|
||||
def floor(value, mod=None):
|
||||
"""
|
||||
x == floor(x, a) + mod(x, a) FOR ALL a
|
||||
"""
|
||||
mod = nvl(mod, 1)
|
||||
v = int(math.floor(value))
|
||||
return v - (v % mod)
|
||||
|
||||
|
||||
#RETURN A VALUE CLOSE TO value, BUT WITH SHORTER len(unicode(value))<len(unicode(value)):
|
||||
@staticmethod
|
||||
def approx_str(value):
|
||||
v = unicode(value)
|
||||
d = v.find(".")
|
||||
if d == -1:
|
||||
return value
|
||||
|
||||
if Math.round(value) == value:
|
||||
return int(value)
|
||||
|
||||
i = find_first(v, ["9999", "0000"], d)
|
||||
if i != -1:
|
||||
Math.round(value, decimal=i - d - 1)
|
||||
|
||||
return value
|
||||
|
||||
@staticmethod
|
||||
def ceiling(value):
|
||||
return int(math.ceil(value))
|
||||
|
||||
|
||||
@staticmethod
|
||||
def max(*values):
|
||||
output = None
|
||||
for v in values:
|
||||
if v == None:
|
||||
continue
|
||||
elif output == None or v > output:
|
||||
output = v
|
||||
else:
|
||||
pass
|
||||
return output
|
|
@ -0,0 +1,90 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from ..cnv import CNV
|
||||
from ..env.logs import Log
|
||||
from ..queries import Q
|
||||
from ..struct import Struct
|
||||
from ..maths.randoms import Random
|
||||
from ..vendor.aespython import key_expander, aes_cipher, cbc_mode
|
||||
|
||||
|
||||
DEBUG = False
|
||||
|
||||
|
||||
def encrypt(text, _key, salt=None):
|
||||
"""
|
||||
RETURN JSON OF ENCRYPTED DATA {"salt":s, "length":l, "data":d}
|
||||
"""
|
||||
if not isinstance(text, unicode):
|
||||
Log.error("only unicode is encrypted")
|
||||
if _key is None:
|
||||
Log.error("Expecting a key")
|
||||
|
||||
if salt is None:
|
||||
salt = Random.bytes(16)
|
||||
|
||||
data = bytearray(text.encode("utf8"))
|
||||
|
||||
#Initialize encryption using key and iv
|
||||
key_expander_256 = key_expander.KeyExpander(256)
|
||||
expanded_key = key_expander_256.expand(_key)
|
||||
aes_cipher_256 = aes_cipher.AESCipher(expanded_key)
|
||||
aes_cbc_256 = cbc_mode.CBCMode(aes_cipher_256, 16)
|
||||
aes_cbc_256.set_iv(salt)
|
||||
|
||||
output = Struct()
|
||||
output.type = "AES256"
|
||||
output.salt = CNV.bytearray2base64(salt)
|
||||
output.length = len(data)
|
||||
|
||||
encrypted = bytearray()
|
||||
for i, d in Q.groupby(data, size=16):
|
||||
encrypted.extend(aes_cbc_256.encrypt_block(d))
|
||||
output.data = CNV.bytearray2base64(encrypted)
|
||||
json = CNV.object2JSON(output)
|
||||
|
||||
if DEBUG:
|
||||
test = decrypt(json, _key)
|
||||
if test != text:
|
||||
Log.error("problem with encryption")
|
||||
|
||||
return json
|
||||
|
||||
|
||||
def decrypt(data, _key):
|
||||
"""
|
||||
ACCEPT JSON OF ENCRYPTED DATA {"salt":s, "length":l, "data":d}
|
||||
"""
|
||||
#Key and iv have not been generated or provided, bail out
|
||||
if _key is None:
|
||||
Log.error("Expecting a key")
|
||||
|
||||
_input = CNV.JSON2object(data)
|
||||
|
||||
#Initialize encryption using key and iv
|
||||
key_expander_256 = key_expander.KeyExpander(256)
|
||||
expanded_key = key_expander_256.expand(_key)
|
||||
aes_cipher_256 = aes_cipher.AESCipher(expanded_key)
|
||||
aes_cbc_256 = cbc_mode.CBCMode(aes_cipher_256, 16)
|
||||
aes_cbc_256.set_iv(CNV.base642bytearray(_input.salt))
|
||||
|
||||
raw = CNV.base642bytearray(_input.data)
|
||||
out_data = bytearray()
|
||||
for i, e in Q.groupby(raw, size=16):
|
||||
out_data.extend(aes_cbc_256.decrypt_block(e))
|
||||
|
||||
return str(out_data[:_input.length:]).decode("utf8")
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,56 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
import random
|
||||
import string
|
||||
|
||||
|
||||
SIMPLE_ALPHABET = string.ascii_letters + string.digits
|
||||
SEED = random.Random()
|
||||
|
||||
|
||||
class Random(object):
|
||||
@staticmethod
|
||||
def string(length, alphabet=SIMPLE_ALPHABET):
|
||||
result = ''
|
||||
for i in range(0, length):
|
||||
result += SEED.choice(alphabet)
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def hex(length):
|
||||
return Random.string(length, string.digits + 'ABCDEF')
|
||||
|
||||
@staticmethod
|
||||
def int(*args):
|
||||
return random.randrange(*args)
|
||||
|
||||
@staticmethod
|
||||
def float(*args):
|
||||
if args:
|
||||
return random.random()*args[0]
|
||||
else:
|
||||
return random.random()
|
||||
|
||||
@staticmethod
|
||||
def sample(data, count):
|
||||
num = len(data)
|
||||
return [data[Random.int(num)] for i in range(count)]
|
||||
|
||||
@staticmethod
|
||||
def combination(data):
|
||||
output = []
|
||||
data = list(data)
|
||||
num = len(data)
|
||||
for i in range(num):
|
||||
n = Random.int(num-i)
|
||||
output.append(data[n])
|
||||
del data[n]
|
||||
return output
|
||||
|
||||
|
||||
@staticmethod
|
||||
def bytes(count):
|
||||
output = bytearray(random.randrange(256) for i in range(count))
|
||||
return output
|
|
@ -0,0 +1,336 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
import sys
|
||||
from ..vendor import strangman
|
||||
|
||||
from math import sqrt
|
||||
from ..cnv import CNV
|
||||
from ..collections import OR
|
||||
from ..struct import nvl, Struct, Null
|
||||
from ..env.logs import Log
|
||||
|
||||
|
||||
DEBUG = True
|
||||
DEBUG_STRANGMAN = True
|
||||
EPSILON = 0.000000001
|
||||
ABS_EPSILON = sys.float_info.min*2 # *2 FOR SAFETY
|
||||
|
||||
|
||||
if DEBUG_STRANGMAN:
|
||||
try:
|
||||
import numpy
|
||||
from scipy import stats
|
||||
import scipy
|
||||
except Exception, e:
|
||||
DEBUG_STRANGMAN = False
|
||||
|
||||
|
||||
def chisquare(f_obs, f_exp):
|
||||
py_result = strangman.stats.chisquare(
|
||||
f_obs,
|
||||
f_exp
|
||||
)
|
||||
|
||||
if DEBUG_STRANGMAN:
|
||||
sp_result = scipy.stats.chisquare(
|
||||
numpy.array(f_obs),
|
||||
f_exp=numpy.array(f_exp)
|
||||
)
|
||||
if not closeEnough(sp_result[0], py_result[0]) and closeEnough(sp_result[1], py_result[1]):
|
||||
Log.error("problem with stats lib")
|
||||
|
||||
return py_result
|
||||
|
||||
|
||||
|
||||
def stats2z_moment(stats):
|
||||
# MODIFIED FROM http://statsmodels.sourceforge.net/devel/_modules/statsmodels/stats/moment_helpers.html
|
||||
# ADDED count
|
||||
mc0, mc1, mc2, skew, kurt = stats.count, nvl(stats.mean, 0), nvl(stats.variance, 0), nvl(stats.skew, 0), nvl(stats.kurtosis, 0)
|
||||
|
||||
mz0 = mc0
|
||||
mz1 = mc1 * mc0
|
||||
mz2 = (mc2 + mc1 * mc1) * mc0
|
||||
mc3 = nvl(skew, 0) * (mc2 ** 1.5) # 3rd central moment
|
||||
mz3 = (mc3 + 3 * mc1 * mc2 + mc1 ** 3) * mc0 # 3rd non-central moment
|
||||
mc4 = (nvl(kurt, 0) + 3.0) * (mc2 ** 2.0) # 4th central moment
|
||||
mz4 = (mc4 + 4 * mc1 * mc3 + 6 * mc1 * mc1 * mc2 + mc1 ** 4) * mc0
|
||||
|
||||
m = Z_moment(mz0, mz1, mz2, mz3, mz4)
|
||||
if DEBUG:
|
||||
globals()["DEBUG"] = False
|
||||
try:
|
||||
v = z_moment2stats(m, unbiased=False)
|
||||
assert closeEnough(v.count, stats.count)
|
||||
assert closeEnough(v.mean, stats.mean)
|
||||
assert closeEnough(v.variance, stats.variance)
|
||||
assert closeEnough(v.skew, stats.skew)
|
||||
assert closeEnough(v.kurtosis, stats.vkurtosis)
|
||||
except Exception, e:
|
||||
v = z_moment2stats(m, unbiased=False)
|
||||
Log.error("programmer error")
|
||||
globals()["DEBUG"] = True
|
||||
return m
|
||||
|
||||
|
||||
def closeEnough(a, b):
|
||||
if a == None and b == None:
|
||||
return True
|
||||
if a == None or b == None:
|
||||
return False
|
||||
|
||||
if abs(a - b) < ABS_EPSILON:
|
||||
return True
|
||||
|
||||
if abs(b) > abs(a):
|
||||
err = abs((a - b) / b)
|
||||
else:
|
||||
err = abs((a - b) / a)
|
||||
|
||||
if err < EPSILON:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def z_moment2stats(z_moment, unbiased=True):
|
||||
Z = z_moment.S
|
||||
N = Z[0]
|
||||
if N == 0:
|
||||
return Stats()
|
||||
|
||||
mean = Z[1] / N
|
||||
Z2 = Z[2] / N
|
||||
Z3 = Z[3] / N
|
||||
Z4 = Z[4] / N
|
||||
|
||||
if N == 1:
|
||||
variance = None
|
||||
skew = None
|
||||
kurtosis = None
|
||||
else:
|
||||
variance = (Z2 - mean * mean)
|
||||
error = -EPSILON * (abs(Z2) + 1) # EXPECTED FLOAT ERROR
|
||||
|
||||
if error < variance <= 0: # TODO: MAKE THIS A TEST ON SIGNIFICANT DIGITS
|
||||
variance = 0
|
||||
skew = None
|
||||
kurtosis = None
|
||||
elif variance < error:
|
||||
Log.error("variance can not be negative ({{var}})", {"var": variance})
|
||||
else:
|
||||
mc3 = (Z3 - (3 * mean * variance + mean ** 3)) # 3rd central moment
|
||||
mc4 = (Z4 - (4 * mean * mc3 + 6 * mean * mean * variance + mean ** 4))
|
||||
skew = mc3 / (variance ** 1.5)
|
||||
kurtosis = (mc4 / (variance ** 2.0)) - 3.0
|
||||
|
||||
stats = Stats(
|
||||
count=N,
|
||||
mean=mean,
|
||||
variance=variance,
|
||||
skew=skew,
|
||||
kurtosis=kurtosis,
|
||||
unbiased=unbiased
|
||||
)
|
||||
|
||||
if DEBUG:
|
||||
globals()["DEBUG"] = False
|
||||
v=Null
|
||||
try:
|
||||
v = stats2z_moment(stats)
|
||||
for i in range(5):
|
||||
assert closeEnough(v.S[i], Z[i])
|
||||
except Exception, e:
|
||||
Log.error("Convertion failed. Programmer error:\nfrom={{from|indent}},\nresult stats={{stats|indent}},\nexpected parem={{expected|indent}}", {
|
||||
"from": Z,
|
||||
"stats": stats,
|
||||
"expected": v.S
|
||||
}, e)
|
||||
globals()["DEBUG"] = True
|
||||
|
||||
return stats
|
||||
|
||||
|
||||
class Stats(Struct):
|
||||
def __init__(self, **kwargs):
|
||||
Struct.__init__(self)
|
||||
|
||||
if "samples" in kwargs:
|
||||
s = z_moment2stats(Z_moment.new_instance(kwargs["samples"]))
|
||||
self.count = s.count
|
||||
self.mean = s.mean
|
||||
self.variance = s.variance
|
||||
self.skew = s.skew
|
||||
self.kurtosis = s.kurtosis
|
||||
return
|
||||
|
||||
if "count" not in kwargs:
|
||||
self.count = 0
|
||||
self.mean = None
|
||||
self.variance = None
|
||||
self.skew = None
|
||||
self.kurtosis = None
|
||||
elif "mean" not in kwargs:
|
||||
self.count = kwargs["count"]
|
||||
self.mean = None
|
||||
self.variance = None
|
||||
self.skew = None
|
||||
self.kurtosis = None
|
||||
elif "variance" not in kwargs and "std" not in kwargs:
|
||||
self.count = kwargs["count"]
|
||||
self.mean = kwargs["mean"]
|
||||
self.variance = 0
|
||||
self.skew = None
|
||||
self.kurtosis = None
|
||||
elif "skew" not in kwargs:
|
||||
self.count = kwargs["count"]
|
||||
self.mean = kwargs["mean"]
|
||||
self.variance = kwargs["variance"] if "variance" in kwargs else kwargs["std"] ** 2
|
||||
self.skew = None
|
||||
self.kurtosis = None
|
||||
elif "kurtosis" not in kwargs:
|
||||
self.count = kwargs["count"]
|
||||
self.mean = kwargs["mean"]
|
||||
self.variance = kwargs["variance"] if "variance" in kwargs else kwargs["std"] ** 2
|
||||
self.skew = kwargs["skew"]
|
||||
self.kurtosis = None
|
||||
else:
|
||||
self.count = kwargs["count"]
|
||||
self.mean = kwargs["mean"]
|
||||
self.variance = kwargs["variance"] if "variance" in kwargs else kwargs["std"] ** 2
|
||||
self.skew = kwargs["skew"]
|
||||
self.kurtosis = kwargs["kurtosis"]
|
||||
|
||||
self.unbiased = \
|
||||
kwargs["unbiased"] if "unbiased" in kwargs else \
|
||||
not kwargs["biased"] if "biased" in kwargs else \
|
||||
False
|
||||
|
||||
|
||||
@property
|
||||
def std(self):
|
||||
return sqrt(self.variance)
|
||||
|
||||
|
||||
class Z_moment(object):
|
||||
"""
|
||||
ZERO-CENTERED MOMENTS
|
||||
"""
|
||||
|
||||
def __init__(self, *args):
|
||||
self.S = tuple(args)
|
||||
|
||||
def __add__(self, other):
|
||||
return Z_moment(*map(add, self.S, other.S))
|
||||
|
||||
def __sub__(self, other):
|
||||
return Z_moment(*map(sub, self.S, other.S))
|
||||
|
||||
@property
|
||||
def tuple(self):
|
||||
#RETURN AS ORDERED TUPLE
|
||||
return self.S
|
||||
|
||||
@property
|
||||
def dict(self):
|
||||
#RETURN HASH OF SUMS
|
||||
return {u"s" + unicode(i): m for i, m in enumerate(self.S)}
|
||||
|
||||
|
||||
@staticmethod
|
||||
def new_instance(values=None):
|
||||
if values == None:
|
||||
return Z_moment()
|
||||
|
||||
return Z_moment(
|
||||
len(values),
|
||||
sum([n for n in values]),
|
||||
sum([pow(n, 2) for n in values]),
|
||||
sum([pow(n, 3) for n in values]),
|
||||
sum([pow(n, 4) for n in values])
|
||||
)
|
||||
|
||||
|
||||
def add(a, b):
|
||||
return nvl(a, 0) + nvl(b, 0)
|
||||
|
||||
|
||||
def sub(a, b):
|
||||
return nvl(a, 0) - nvl(b, 0)
|
||||
|
||||
|
||||
def z_moment2dict(z):
|
||||
#RETURN HASH OF SUMS
|
||||
return {u"s" + unicode(i): m for i, m in enumerate(z.S)}
|
||||
|
||||
|
||||
setattr(CNV, "z_moment2dict", staticmethod(z_moment2dict))
|
||||
|
||||
|
||||
def median(values, simple=True, mean_weight=0.0):
|
||||
"""
|
||||
RETURN MEDIAN VALUE
|
||||
|
||||
IF simple=False THEN IN THE EVENT MULTIPLE INSTANCES OF THE
|
||||
MEDIAN VALUE, THE MEDIAN IS INTERPOLATED BASED ON ITS POSITION
|
||||
IN THE MEDIAN RANGE
|
||||
|
||||
mean_weight IS TO PICK A MEDIAN VALUE IN THE ODD CASE THAT IS
|
||||
CLOSER TO THE MEAN (PICK A MEDIAN BETWEEN TWO MODES IN BIMODAL CASE)
|
||||
"""
|
||||
|
||||
if OR(v == None for v in values):
|
||||
Log.error("median is not ready to handle None")
|
||||
|
||||
try:
|
||||
if not values:
|
||||
return Null
|
||||
|
||||
l = len(values)
|
||||
_sorted = sorted(values)
|
||||
|
||||
middle = l / 2
|
||||
_median = float(_sorted[middle])
|
||||
|
||||
if len(_sorted) == 1:
|
||||
return _median
|
||||
|
||||
if simple:
|
||||
if l % 2 == 0:
|
||||
return float(_sorted[middle - 1] + _median) / 2
|
||||
return _median
|
||||
|
||||
#FIND RANGE OF THE median
|
||||
start_index = middle - 1
|
||||
while start_index > 0 and _sorted[start_index] == _median:
|
||||
start_index -= 1
|
||||
start_index += 1
|
||||
stop_index = middle + 1
|
||||
while stop_index < l and _sorted[stop_index] == _median:
|
||||
stop_index += 1
|
||||
|
||||
num_middle = stop_index - start_index
|
||||
|
||||
if l % 2 == 0:
|
||||
if num_middle == 1:
|
||||
return float(_sorted[middle - 1] + _median) / 2
|
||||
else:
|
||||
return (_median - 0.5) + float(middle - start_index) / float(num_middle)
|
||||
else:
|
||||
if num_middle == 1:
|
||||
return (1 - mean_weight) * _median + mean_weight * (_sorted[middle - 1] + _sorted[middle + 1]) / 2
|
||||
else:
|
||||
return (_median - 0.5) + float(middle + 0.5 - start_index) / float(num_middle)
|
||||
except Exception, e:
|
||||
Log.error("problem with median of {{values}}", {"values": values}, e)
|
||||
|
||||
|
||||
zero = Stats()
|
|
@ -1,160 +0,0 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
import threading
|
||||
from .struct import nvl
|
||||
from .logs import Log
|
||||
from .threads import Queue, Thread
|
||||
|
||||
|
||||
DEBUG = True
|
||||
|
||||
class worker_thread(threading.Thread):
|
||||
|
||||
#in_queue MUST CONTAIN HASH OF PARAMETERS FOR load()
|
||||
def __init__(self, name, in_queue, out_queue, function):
|
||||
threading.Thread.__init__(self)
|
||||
self.name=name
|
||||
self.in_queue=in_queue
|
||||
self.out_queue=out_queue
|
||||
self.function=function
|
||||
self.keep_running=True
|
||||
self.num_runs=0
|
||||
self.start()
|
||||
|
||||
#REQUIRED TO DETECT KEYBOARD, AND OTHER, INTERRUPTS
|
||||
def join(self, timeout=None):
|
||||
while self.isAlive():
|
||||
Log.note("Waiting on thread {{thread}}", {"thread":self.name})
|
||||
threading.Thread.join(self, nvl(timeout, 0.5))
|
||||
|
||||
def run(self):
|
||||
got_stop=False
|
||||
while self.keep_running:
|
||||
request = self.in_queue.pop()
|
||||
if request == Thread.STOP:
|
||||
got_stop=True
|
||||
if self.in_queue.queue:
|
||||
Log.warning("programmer error")
|
||||
break
|
||||
if not self.keep_running:
|
||||
break
|
||||
|
||||
try:
|
||||
if DEBUG and hasattr(self.function, "func_name"):
|
||||
Log.note("run {{function}}", {"function": self.function.func_name})
|
||||
result = self.function(**request)
|
||||
if self.out_queue != None:
|
||||
self.out_queue.add({"response": result})
|
||||
except Exception, e:
|
||||
Log.warning("Can not execute with params={{params}}", {"params": request}, e)
|
||||
if self.out_queue != None:
|
||||
self.out_queue.add({"exception": e})
|
||||
finally:
|
||||
self.num_runs += 1
|
||||
|
||||
self.keep_running = False
|
||||
if self.num_runs==0:
|
||||
Log.warning("{{name}} thread did no work", {"name":self.name})
|
||||
if DEBUG and self.num_runs!=1:
|
||||
Log.note("{{name}} thread did {{num}} units of work", {
|
||||
"name":self.name,
|
||||
"num":self.num_runs
|
||||
})
|
||||
if got_stop and self.in_queue.queue:
|
||||
Log.warning("multithread programmer error")
|
||||
if DEBUG:
|
||||
Log.note("{{thread}} DONE", {"thread":self.name})
|
||||
|
||||
|
||||
def stop(self):
|
||||
self.keep_running=False
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#PASS A SET OF FUNCTIONS TO BE EXECUTED (ONE PER THREAD)
|
||||
#PASS AN (ITERATOR/LIST) OF PARAMETERS TO BE ISSUED TO NEXT AVAILABLE THREAD
|
||||
class Multithread(object):
|
||||
|
||||
|
||||
def __init__(self, functions):
|
||||
self.outbound=Queue()
|
||||
self.inbound=Queue()
|
||||
|
||||
#MAKE THREADS
|
||||
self.threads=[]
|
||||
for t, f in enumerate(functions):
|
||||
thread=worker_thread("worker "+unicode(t), self.inbound, self.outbound, f)
|
||||
self.threads.append(thread)
|
||||
|
||||
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
#WAIT FOR ALL QUEUED WORK TO BE DONE BEFORE RETURNING
|
||||
def __exit__(self, type, value, traceback):
|
||||
try:
|
||||
if isinstance(value, Exception):
|
||||
self.inbound.close()
|
||||
self.inbound.add(Thread.STOP)
|
||||
self.join()
|
||||
except Exception, e:
|
||||
Log.warning("Problem sending stops", e)
|
||||
|
||||
|
||||
|
||||
#IF YOU SENT A stop(), OR Thread.STOP, YOU MAY WAIT FOR SHUTDOWN
|
||||
def join(self):
|
||||
try:
|
||||
#WAIT FOR FINISH
|
||||
for t in self.threads:
|
||||
t.join()
|
||||
except (KeyboardInterrupt, SystemExit):
|
||||
Log.note("Shutdow Started, please be patient")
|
||||
except Exception, e:
|
||||
Log.error("Unusual shutdown!", e)
|
||||
finally:
|
||||
for t in self.threads:
|
||||
t.keep_running=False
|
||||
self.inbound.close()
|
||||
self.outbound.close()
|
||||
for t in self.threads:
|
||||
t.join()
|
||||
|
||||
|
||||
#RETURN A GENERATOR THAT HAS len(parameters) RESULTS (ANY ORDER)
|
||||
def execute(self, request):
|
||||
#FILL QUEUE WITH WORK
|
||||
self.inbound.extend(request)
|
||||
|
||||
num=len(request)
|
||||
def output():
|
||||
for i in xrange(num):
|
||||
result=self.outbound.pop()
|
||||
if "exception" in result:
|
||||
raise result["exception"]
|
||||
else:
|
||||
yield result["response"]
|
||||
return output()
|
||||
|
||||
#EXTERNAL COMMAND THAT RETURNS IMMEDIATELY
|
||||
def stop(self):
|
||||
self.inbound.close() #SEND STOPS TO WAKE UP THE WORKERS WAITING ON inbound.pop()
|
||||
for t in self.threads:
|
||||
t.keep_running=False
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
from urlparse import urlparse
|
||||
from .struct import Struct
|
||||
|
||||
|
||||
def URL(value):
|
||||
|
||||
output = urlparse(value)
|
||||
|
||||
return Struct(
|
||||
protocol=output.scheme,
|
||||
host=output.netloc,
|
||||
port=output.port,
|
||||
path=output.path,
|
||||
query=output.query,
|
||||
fragmen=output.fragment
|
||||
)
|
|
@ -0,0 +1,739 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http:# mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from datetime import datetime
|
||||
import re
|
||||
from .. import struct
|
||||
from ..cnv import CNV
|
||||
from ..collections import reverse
|
||||
from ..env.logs import Log
|
||||
from ..maths import Math
|
||||
from ..queries.filters import TRUE_FILTER
|
||||
from ..struct import Struct, nvl, split_field, join_field, Null
|
||||
from ..times.durations import Duration
|
||||
|
||||
|
||||
class _MVEL(object):
|
||||
def __init__(self, fromData, isLean=False):
|
||||
self.fromData = fromData
|
||||
self.isLean = isLean
|
||||
self.prefixMap = []
|
||||
self.functions = {}
|
||||
|
||||
|
||||
def code(self, query):
|
||||
"""
|
||||
RETURN THE MVEL THAT WILL FILTER USING query.where AND TERM-PACK THE query.select CLAUSE
|
||||
"""
|
||||
selectList = listwrap(query.select)
|
||||
fromPath = query.frum.name # FIRST NAME IS THE INDEX
|
||||
sourceVar = "__sourcedoc__"
|
||||
whereClause = query.where
|
||||
|
||||
# PARSE THE fromPath
|
||||
code = self.frum(fromPath, sourceVar, "__loop")
|
||||
select = self.select(selectList, fromPath, "output", sourceVar)
|
||||
|
||||
body = "var output = \"\";\n" + \
|
||||
code.replace(
|
||||
"<CODE>",
|
||||
"if (" + _where(whereClause, lambda(v): self._translate(v)) + "){\n" +
|
||||
select.body +
|
||||
"}\n"
|
||||
) + \
|
||||
"output\n"
|
||||
|
||||
# ADD REFERENCED CONTEXT VARIABLES
|
||||
context = self.getFrameVariables(body)
|
||||
|
||||
func = UID()
|
||||
predef = addFunctions(select.head+context+body).head
|
||||
param = "_source" if body.find(sourceVar) else ""
|
||||
|
||||
output = predef + \
|
||||
select.head + \
|
||||
context + \
|
||||
'var ' + func + ' = function('+sourceVar+'){\n' + \
|
||||
body + \
|
||||
'};\n' + \
|
||||
func + '('+param+')\n'
|
||||
|
||||
return Compiled(output)
|
||||
|
||||
def frum(self, fromPath, sourceVar, loopVariablePrefix):
|
||||
"""
|
||||
indexName NAME USED TO REFER TO HIGH LEVEL DOCUMENT
|
||||
loopVariablePrefix PREFIX FOR LOOP VARIABLES
|
||||
"""
|
||||
loopCode = "if (<PATH> != null){ for(<VAR> : <PATH>){\n<CODE>\n}}\n"
|
||||
self.prefixMap = []
|
||||
code = "<CODE>"
|
||||
path = split_field(fromPath)
|
||||
|
||||
# ADD LOCAL VARIABLES
|
||||
from ..queries.es_query_util import INDEX_CACHE
|
||||
|
||||
columns = INDEX_CACHE[path[0]].columns
|
||||
for i, c in enumerate(columns):
|
||||
if c.name=="attachments":
|
||||
Log.debug("")
|
||||
if c.name.find("\\.") >= 0:
|
||||
self.prefixMap.insert(0, {
|
||||
"path": c.name,
|
||||
"variable": "get(" + sourceVar + ", \"" + c.name.replace("\\.", ".") + "\")"
|
||||
})
|
||||
else:
|
||||
self.prefixMap.insert(0, {
|
||||
"path": c.name,
|
||||
"variable": sourceVar + ".?" + c.name
|
||||
})
|
||||
|
||||
# ADD LOOP VARIABLES
|
||||
currPath = []
|
||||
# self.prefixMap.insert(0, {"path": path[0], "variable": path[0]})
|
||||
for i, step in enumerate(path[1::]):
|
||||
loopVariable = loopVariablePrefix + str(i)
|
||||
currPath.append(step)
|
||||
pathi = ".".join(currPath)
|
||||
shortPath = self._translate(pathi)
|
||||
self.prefixMap.insert(0, {"path": pathi, "variable": loopVariable})
|
||||
|
||||
loop = loopCode.replace("<VAR>", loopVariable).replace("<PATH>", shortPath)
|
||||
code = code.replace("<CODE>", loop)
|
||||
return code
|
||||
|
||||
def _translate(self, variableName):
|
||||
shortForm = variableName
|
||||
for p in self.prefixMap:
|
||||
prefix = p["path"]
|
||||
if shortForm == prefix:
|
||||
shortForm = p["variable"]
|
||||
else:
|
||||
shortForm = replacePrefix(shortForm, prefix + ".", p["variable"] + ".?") # ADD NULL CHECK
|
||||
shortForm = replacePrefix(shortForm, prefix + "[", p["variable"] + "[")
|
||||
return shortForm
|
||||
|
||||
# CREATE A PIPE DELIMITED RESULT SET
|
||||
def select(self, selectList, fromPath, varName, sourceVar):
|
||||
path = split_field(fromPath)
|
||||
is_deep = len(path) > 1
|
||||
heads = []
|
||||
list = []
|
||||
for s in selectList:
|
||||
if is_deep:
|
||||
if s.value and isKeyword(s.value):
|
||||
shortForm = self._translate(s.value)
|
||||
list.append("Value2Pipe(" + shortForm + ")\n")
|
||||
else:
|
||||
Log.error("do not know how to handle yet")
|
||||
else:
|
||||
if s.value and isKeyword(s.value):
|
||||
list.append("Value2Pipe(getDocValue(" + value2MVEL(s.value) + "))\n")
|
||||
elif s.value:
|
||||
shortForm = self._translate(s.value)
|
||||
list.append("Value2Pipe(" + shortForm + ")\n")
|
||||
else:
|
||||
code, decode = self.Parts2Term(s.domain)
|
||||
heads.append(code.head)
|
||||
list.append("Value2Pipe(" + code.body + ")\n")
|
||||
|
||||
|
||||
if len(split_field(fromPath)) > 1:
|
||||
output = 'if (' + varName + ' != "") ' + varName + '+="|";\n' + varName + '+=' + '+"|"+'.join(["Value2Pipe("+v+")\n" for v in list]) + ';\n'
|
||||
else:
|
||||
output = varName + ' = ' + '+"|"+'.join(["Value2Pipe("+v+")\n" for v in list]) + ';\n'
|
||||
|
||||
return Struct(
|
||||
head="".join(heads),
|
||||
body=output
|
||||
)
|
||||
def Parts2Term(self, domain):
|
||||
"""
|
||||
TERMS ARE ALWAYS ESCAPED SO THEY CAN BE COMPOUNDED WITH PIPE (|)
|
||||
|
||||
CONVERT AN ARRAY OF PARTS{name, esfilter} TO AN MVEL EXPRESSION
|
||||
RETURN expression, function PAIR, WHERE
|
||||
expression - MVEL EXPRESSION
|
||||
function - TAKES RESULT OF expression AND RETURNS PART
|
||||
"""
|
||||
fields = domain.dimension.fields
|
||||
|
||||
term = []
|
||||
if len(split_field(self.fromData.name)) == 1 and fields:
|
||||
if isinstance(fields, dict):
|
||||
# CONVERT UNORDERED FIELD DEFS
|
||||
qb_fields, es_fields = zip(*[(k, fields[k]) for k in sorted(fields.keys())])
|
||||
else:
|
||||
qb_fields, es_fields = zip(*[(i, e) for i, e in enumerate(fields)])
|
||||
|
||||
#NO LOOPS BECAUSE QUERY IS SHALLOW
|
||||
#DOMAIN IS FROM A DIMENSION, USE IT'S FIELD DEFS TO PULL
|
||||
if len(es_fields) == 1:
|
||||
def fromTerm(term):
|
||||
return domain.getPartByKey(term)
|
||||
|
||||
return Struct(
|
||||
head="",
|
||||
body='getDocValue('+CNV.string2quote(domain.dimension.fields[0])+')'
|
||||
), fromTerm
|
||||
else:
|
||||
def fromTerm(term):
|
||||
terms = [CNV.pipe2value(t) for t in CNV.pipe2value(term).split("|")]
|
||||
|
||||
candidate = dict(zip(qb_fields, terms))
|
||||
for p in domain.partitions:
|
||||
for k, t in candidate.items():
|
||||
if p.value[k] != t:
|
||||
break
|
||||
else:
|
||||
return p
|
||||
if domain.type in ["uid", "default"]:
|
||||
part = {"value": candidate}
|
||||
domain.partitions.append(part)
|
||||
return part
|
||||
else:
|
||||
return Null
|
||||
|
||||
for f in es_fields:
|
||||
term.append('Value2Pipe(getDocValue('+CNV.string2quote(f)+'))')
|
||||
|
||||
return Struct(
|
||||
head="",
|
||||
body='Value2Pipe('+('+"|"+'.join(term))+')'
|
||||
), fromTerm
|
||||
else:
|
||||
for v in domain.partitions:
|
||||
term.append("if (" + _where(v.esfilter, lambda x: self._translate(x)) + ") " + value2MVEL(domain.getKey(v)) + "; else ")
|
||||
term.append(value2MVEL(domain.getKey(domain.NULL)))
|
||||
|
||||
func_name = "_temp"+UID()
|
||||
return self.register_function("+\"|\"+".join(term))
|
||||
|
||||
def Parts2TermScript(self, domain):
|
||||
code, decode = self.Parts2Term(domain)
|
||||
func = addFunctions(code.head + code.body)
|
||||
return func.head + code.head + code.body, decode
|
||||
|
||||
def getFrameVariables(self, body):
|
||||
contextVariables = []
|
||||
columns = self.fromData.columns
|
||||
|
||||
parentVarNames = set() # ALL PARENTS OF VARIABLES WITH "." IN NAME
|
||||
body = body.replace(".?", ".")
|
||||
|
||||
for i, c in enumerate(columns):
|
||||
j = body.find(c.name, 0)
|
||||
while j >= 0:
|
||||
s = j
|
||||
j = body.find(c.name, s + 1)
|
||||
|
||||
test0 = body[s - 1: s + len(c.name) + 1:]
|
||||
test3 = body[s - 8: s + len(c.name):]
|
||||
|
||||
if test0[:-1] == "\"" + c.name:
|
||||
continue
|
||||
if test3 == "_source." + c.name:
|
||||
continue
|
||||
|
||||
def defParent(name):
|
||||
# DO NOT MAKE THE SAME PARENT TWICE
|
||||
if name in parentVarNames:
|
||||
return
|
||||
parentVarNames.add(name)
|
||||
|
||||
if len(split_field(name)) == 1:
|
||||
contextVariables.append("Map " + name + " = new HashMap();\n")
|
||||
else:
|
||||
defParent(join_field(split_field(name)[0:-1]))
|
||||
contextVariables.append(name + " = new HashMap();\n")
|
||||
|
||||
body = body.replace(c.name, "-"*len(c.name))
|
||||
|
||||
if self.isLean or c.useSource:
|
||||
if len(split_field(c.name)) > 1:
|
||||
defParent(join_field(split_field(c.name)[0:-1]))
|
||||
contextVariables.append(c.name + " = getSourceValue(\"" + c.name + "\");\n")
|
||||
else:
|
||||
contextVariables.append(c.name + " = _source[\"" + c.name + "\"];\n")
|
||||
else:
|
||||
if len(split_field(c.name)) > 1:
|
||||
defParent(join_field(split_field(c.name)[0:-1]))
|
||||
contextVariables.append(c.name + " = getDocValue(\"" + c.name + "\");\n")
|
||||
else:
|
||||
contextVariables.append(c.name + " = getDocValue(\"" + c.name + "\");\n")
|
||||
break
|
||||
|
||||
return "".join(contextVariables)
|
||||
|
||||
def compile_expression(self, expression, constants=None):
|
||||
# EXPAND EXPRESSION WITH ANY CONSTANTS
|
||||
expression = setValues(expression, constants)
|
||||
|
||||
fromPath = self.fromData.name # FIRST NAME IS THE INDEX
|
||||
indexName = split_field(fromPath)[0]
|
||||
|
||||
context = self.getFrameVariables(expression)
|
||||
if context == "":
|
||||
return addFunctions(expression).head+expression
|
||||
|
||||
func = UID()
|
||||
code = addFunctions(context+expression)
|
||||
output = code.head + \
|
||||
'var ' + func + ' = function(' + indexName + '){\n' + \
|
||||
context + \
|
||||
expression + ";\n" + \
|
||||
'};\n' + \
|
||||
func + '(_source)\n'
|
||||
|
||||
return Compiled(output)
|
||||
|
||||
def register_function(self, code):
|
||||
for n, c in self.functions.items():
|
||||
if c == code:
|
||||
break
|
||||
else:
|
||||
n = "_temp" + UID()
|
||||
self.functions[n] = code
|
||||
|
||||
return Struct(
|
||||
head='var ' + n + ' = function(){\n' + code + '\n};\n',
|
||||
body=n + '()\n'
|
||||
)
|
||||
|
||||
|
||||
class Compiled(object):
|
||||
def __init__(self, code):
|
||||
self.code=code
|
||||
|
||||
def __str__(self):
|
||||
return self.code
|
||||
|
||||
def __json__(self):
|
||||
return CNV.string2quote(self.code)
|
||||
|
||||
|
||||
|
||||
|
||||
__UID__ = 1000
|
||||
|
||||
|
||||
def UID():
|
||||
output = "_" + str(__UID__)
|
||||
globals()["__UID__"] += 1
|
||||
return output
|
||||
|
||||
|
||||
def setValues(expression, constants):
|
||||
if not constants:
|
||||
return expression
|
||||
|
||||
constants = constants.copy()
|
||||
|
||||
# EXPAND ALL CONSTANTS TO PRIMITIVE VALUES (MVEL CAN ONLY ACCEPT PRIMITIVE VALUES)
|
||||
for c in constants:
|
||||
value = c.value
|
||||
n = c.name
|
||||
if len(split_field(n)) >= 3:
|
||||
continue # DO NOT GO TOO DEEP
|
||||
if isinstance(value, list):
|
||||
continue # DO NOT MESS WITH ARRAYS
|
||||
|
||||
if isinstance(value, dict):
|
||||
for k, v in value.items():
|
||||
constants.append({"name": n + "." + k, "value": v})
|
||||
|
||||
for c in reverse(constants):# REVERSE ORDER, SO LONGER NAMES ARE TESTED FIRST
|
||||
s = 0
|
||||
while True:
|
||||
s = expression.find(c.name, s)
|
||||
if s == -1:
|
||||
break
|
||||
if re.match(r"\w", expression[s - 1]):
|
||||
break
|
||||
if re.match(r"\w", expression[s + len(c.name)]):
|
||||
break
|
||||
|
||||
v = value2MVEL(c.value)
|
||||
expression = expression[:s:] + "" + v + expression[:s + len(c.name):]
|
||||
|
||||
return expression
|
||||
|
||||
|
||||
def unpack_terms(facet, selects):
|
||||
# INTERPRET THE TERM-PACKED ES RESULTS AND RETURN DATA CUBE
|
||||
# ASSUME THE .term IS JSON OBJECT WITH ARRAY OF RESULT OBJECTS
|
||||
mod = len(selects)
|
||||
output = []
|
||||
for t in facet.terms:
|
||||
if t.term == "":
|
||||
continue # NO DATA
|
||||
value = []
|
||||
for i, v in enumerate(t.term.split("|")):
|
||||
value.append(CNV.pipe2value(v))
|
||||
if ((i + 1) % mod) == 0:
|
||||
value.append(t.count)
|
||||
output.append(value)
|
||||
value = []
|
||||
|
||||
return output
|
||||
|
||||
|
||||
# PASS esFilter SIMPLIFIED ElasticSearch FILTER OBJECT
|
||||
# RETURN MVEL EXPRESSION
|
||||
def _where(esFilter, _translate):
|
||||
if not esFilter or esFilter is TRUE_FILTER:
|
||||
return "true"
|
||||
|
||||
keys = esFilter.keys()
|
||||
if len(keys) != 1:
|
||||
Log.error("Expecting only one filter aggregate")
|
||||
|
||||
op = keys[0]
|
||||
if op == "and":
|
||||
list = esFilter[op]
|
||||
if not (list):
|
||||
return "true"
|
||||
if len(list) == 1:
|
||||
return _where(list[0], _translate)
|
||||
output = "(" + " && ".join(_where(l, _translate) for l in list) + ")"
|
||||
return output
|
||||
elif op == "or":
|
||||
list = esFilter[op]
|
||||
if not list:
|
||||
return "false"
|
||||
if len(list) == 1:
|
||||
return _where(list[0], _translate)
|
||||
output = "(" + " || ".join(_where(l, _translate) for l in list) + ")"
|
||||
return output
|
||||
elif op == "not":
|
||||
return "!(" + _where(esFilter[op, _translate]) + ")"
|
||||
elif op == "term":
|
||||
pair = esFilter[op]
|
||||
if len(pair.keys()) == 1:
|
||||
return [_translate(k) + "==" + value2MVEL(v) for k, v in pair.items()][0]
|
||||
else:
|
||||
return "(" + " && ".join(_translate(k) + "==" + value2MVEL(v) for k, v in pair.items()) + ")"
|
||||
elif op == "terms":
|
||||
output = []
|
||||
for variableName, valueList in esFilter[op].items():
|
||||
if not valueList:
|
||||
Log.error("Expecting something in 'terms' array")
|
||||
if len(valueList) == 1:
|
||||
output.append(_translate(variableName) + "==" + value2MVEL(valueList[0]))
|
||||
else:
|
||||
output.append("(" + " || ".join(_translate(variableName) + "==" + value2MVEL(v) for v in valueList) + ")")
|
||||
return " && ".join(output)
|
||||
elif op == "exists":
|
||||
# "exists":{"field":"myField"}
|
||||
pair = esFilter[op]
|
||||
variableName = pair.field
|
||||
return "(" + _translate(variableName) + "!=null)"
|
||||
elif op == "missing":
|
||||
fieldName = _translate(esFilter[op].field)
|
||||
testExistence = nvl(esFilter[op].existence, True)
|
||||
testNull = nvl(esFilter[op].null_value, True)
|
||||
|
||||
output = []
|
||||
if testExistence and not testNull:
|
||||
output.append("(" + fieldName.replace(".?", ".") + " == empty)") # REMOVE THE .? SO WE REFER TO THE FIELD, NOT GET THE VALUE
|
||||
if testNull:
|
||||
output.append("(" + fieldName + "==null)")
|
||||
return " || ".join(output)
|
||||
elif op == "range":
|
||||
pair = esFilter[op]
|
||||
ranges = []
|
||||
|
||||
for variableName, r in pair.items():
|
||||
if r.gte:
|
||||
ranges.append(value2MVEL(r.gte) + "<=" + _translate(variableName))
|
||||
elif r.gt:
|
||||
ranges.append(value2MVEL(r.gt) + "<" + _translate(variableName))
|
||||
elif r["from"]:
|
||||
if r.include_lower == None or r.include_lower:
|
||||
ranges.append(value2MVEL(r["from"]) + "<=" + _translate(variableName))
|
||||
else:
|
||||
ranges.append(value2MVEL(r["from"]) + "<" + _translate(variableName))
|
||||
|
||||
if r.lte:
|
||||
ranges.append(value2MVEL(r.lte) + ">=" + _translate(variableName))
|
||||
elif r.lt:
|
||||
ranges.append(value2MVEL(r.lt) + ">" + _translate(variableName))
|
||||
elif r["from"]:
|
||||
if r.include_lower == None or r.include_lower:
|
||||
ranges.append(value2MVEL(r["from"]) + ">=" + _translate(variableName))
|
||||
else:
|
||||
ranges.append(value2MVEL(r["from"]) + ">" + _translate(variableName))
|
||||
|
||||
return "("+" && ".join(ranges)+")"
|
||||
|
||||
elif op == "script":
|
||||
script = esFilter[op].script
|
||||
return _translate(script)
|
||||
elif op == "prefix":
|
||||
pair = esFilter[op]
|
||||
variableName, value = pair.items()[0]
|
||||
return _translate(variableName) + ".startsWith(" + CNV.string2quote(value) + ")"
|
||||
elif op == "match_all":
|
||||
return "true"
|
||||
else:
|
||||
Log.error("'" + op + "' is an unknown aggregate")
|
||||
|
||||
return ""
|
||||
|
||||
|
||||
VAR_CHAR = "abcdefghijklmnopqurstvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_.\""
|
||||
keyword_pattern = re.compile(r"\w+(?:\.\w+)*")
|
||||
|
||||
|
||||
def isKeyword(value):
|
||||
"""
|
||||
RETURN TRUE IF THE value IS JUST A NAME OF A FIELD, A LIST OF FIELDS, (OR A VALUE)
|
||||
"""
|
||||
if not value or not isinstance(value, basestring):
|
||||
Log.error("Expecting a string")
|
||||
|
||||
if keyword_pattern.match(value):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def value2MVEL(value):
|
||||
"""
|
||||
FROM PYTHON VALUE TO MVEL EQUIVALENT
|
||||
"""
|
||||
if isinstance(value, datetime):
|
||||
return str(CNV.datetime2milli(value)) + " /*" + value.format("yyNNNdd HHmmss") + "*/" # TIME
|
||||
if isinstance(value, Duration):
|
||||
return str(CNV.timedelta2milli(value)) + " /*" + str(value) + "*/" # DURATION
|
||||
|
||||
if Math.is_number(value):
|
||||
return str(value)
|
||||
return CNV.string2quote(value)
|
||||
|
||||
# FROM PYTHON VALUE TO ES QUERY EQUIVALENT
|
||||
def value2query(value):
|
||||
if isinstance(value, datetime):
|
||||
return CNV.datetime2milli(value)
|
||||
if isinstance(value, Duration):
|
||||
return value.milli
|
||||
|
||||
if Math.is_number(value):
|
||||
return value
|
||||
return CNV.string2quote(value)
|
||||
|
||||
|
||||
def value2value(value):
|
||||
"""
|
||||
CONVERT FROM PYTHON VALUE TO ES EQUIVALENT
|
||||
"""
|
||||
if isinstance(value, datetime):
|
||||
return CNV.datetime2milli(value)
|
||||
if isinstance(value, Duration):
|
||||
return value.milli # DURATION
|
||||
return value
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def addFunctions(mvel):
|
||||
"""
|
||||
PREPEND THE REQUIRED MVEL FUNCTIONS TO THE CODE
|
||||
"""
|
||||
isAdded = Struct() # SOME FUNCTIONS DEPEND ON OTHERS
|
||||
|
||||
head=[]
|
||||
body=mvel
|
||||
|
||||
keepAdding = True
|
||||
while keepAdding:
|
||||
keepAdding = False
|
||||
for func_name, func_code in FUNCTIONS.items():
|
||||
if isAdded[func_name]:
|
||||
continue
|
||||
if mvel.find(func_name) == -1:
|
||||
continue
|
||||
keepAdding = True
|
||||
isAdded[func_name] = func_code
|
||||
head.append(func_code)
|
||||
mvel = func_code + mvel
|
||||
return Struct(
|
||||
head="".join(head),
|
||||
body=body
|
||||
)
|
||||
|
||||
|
||||
FUNCTIONS = {
|
||||
"String2Quote":
|
||||
"var String2Quote = function(str){\n" +
|
||||
"if (!(str is String)){ str; }else{\n" + # LAST VALUE IS RETURNED. "return" STOPS EXECUTION COMPLETELY!
|
||||
"" + value2MVEL("\"") + "+" +
|
||||
"str.replace(" + value2MVEL("\\") + "," + value2MVEL("\\\\") +
|
||||
").replace(" + value2MVEL("\"") + "," + value2MVEL("\\\"") +
|
||||
").replace(" + value2MVEL("\'") + "," + value2MVEL("\\\'") + ")+" +
|
||||
value2MVEL("\"") + ";\n" +
|
||||
"}};\n",
|
||||
|
||||
"Value2Pipe":
|
||||
'var Value2Pipe = function(value){\n' + # SPACES ARE IMPORTANT BETWEEN "="
|
||||
"if (value==null){ \"0\" }else " +
|
||||
"if (value is ArrayList || value is org.elasticsearch.common.mvel2.util.FastList){" +
|
||||
"var out = \"\";\n" +
|
||||
"foreach (v : value) out = (out==\"\") ? v : out + \"|\" + Value2Pipe(v);\n" +
|
||||
"'a'+Value2Pipe(out);\n" +
|
||||
"}else \n" +
|
||||
"if (value is Long || value is Integer || value is Double){ 'n'+value; }else \n" +
|
||||
"if (!(value is String)){ 's'+value.getClass().getName(); }else \n" +
|
||||
'"s"+value.replace("\\\\", "\\\\\\\\").replace("|", "\\\\p");' + # CAN NOT ""+value TO MAKE NUMBER A STRING (OR EVEN TO PREPEND A STRING!)
|
||||
"};\n",
|
||||
|
||||
# "replaceAll":
|
||||
# "var replaceAll = function(output, find, replace){\n" +
|
||||
# "if (output.length()==0) return output;\n"+
|
||||
# "s = output.indexOf(find, 0);\n" +
|
||||
# "while(s>=0){\n" +
|
||||
# "output=output.replace(find, replace);\n" +
|
||||
# "s=s-find.length()+replace.length();\n" +
|
||||
# "s = output.indexOf(find, s);\n" +
|
||||
# "}\n"+
|
||||
# "output;\n"+
|
||||
# '};\n',
|
||||
|
||||
"floorDay":
|
||||
"var floorDay = function(value){ Math.floor(value/86400000))*86400000;};\n",
|
||||
|
||||
"floorInterval":
|
||||
"var floorInterval = function(value, interval){ Math.floor((double)value/(double)interval)*interval;};\n",
|
||||
|
||||
"maximum": # JUST BECAUSE MVEL'S MAX ONLY USES MAX(int, int). G*DDA*NIT!
|
||||
"var maximum = function(a, b){if (a==null) b; else if (b==null) a; else if (a>b) a; else b;\n};\n",
|
||||
|
||||
"minimum": # JUST BECAUSE MVEL'S MAX ONLY USES MAX(int, int). G*DDA*NIT!
|
||||
"var minimum = function(a, b){if (a==null) b; else if (b==null) a; else if (a<b) a; else b;\n};\n",
|
||||
|
||||
"coalesce": # PICK FIRST NOT-NULL VALUE
|
||||
"var coalesce = function(a, b){if (a==null) b; else a; \n};\n",
|
||||
|
||||
"zero2null": # ES MAKES IT DIFFICULT TO DETECT NULL/MISSING VALUES, BUT WHEN DEALING WITH NUMBERS, ES DEFAULTS TO RETURNING ZERO FOR missing VALUES!!
|
||||
"var zero2null = function(a){if (a==0) null; else a; \n};\n",
|
||||
|
||||
"get": # MY OWN PERSONAL *FU* TO THE TWISTED MVEL PROPERTY ACCESS
|
||||
"var get = function(hash, key){\n" +
|
||||
"if (hash==null) null; else hash[key];\n" +
|
||||
"};\n",
|
||||
|
||||
"isNumeric":
|
||||
"var isNumeric = function(value){\n" +
|
||||
"value = value + \"\";\n" +
|
||||
# "try{ value-0; }catch(e){ 0; }"+
|
||||
"var isNum = value.length()>0;\n" +
|
||||
"for (v : value.toCharArray()){\n" +
|
||||
"if (\"0123456789\".indexOf(v)==-1) isNum = false;\n" +
|
||||
"};\n" +
|
||||
"isNum;\n" +
|
||||
"};\n",
|
||||
|
||||
"alpha2zero":
|
||||
"var alpha2zero = function(value){\n" +
|
||||
"var output = 0;\n" +
|
||||
"if (isNumeric(value)) output = value-0;\n" +
|
||||
"return output;" +
|
||||
"};\n",
|
||||
|
||||
# KANBAN SOFTWARE
|
||||
# CAN SEE QUEUE BLOCKAGES AND SEE SINGLE BLOCKERS
|
||||
|
||||
|
||||
"concat":
|
||||
"var concat = function(array){\n" +
|
||||
"if (array==null) \"\"; else {\n" +
|
||||
"var output = \"\";\n" +
|
||||
"for (v : array){ output = output+\"|\"+v+\"|\"; };\n" +
|
||||
"output;\n" +
|
||||
"}};\n",
|
||||
|
||||
# "contains":
|
||||
# "var contains = function(array, value){\n"+
|
||||
# "if (array==null) false; else {\n"+
|
||||
# "var good = false;\n"+
|
||||
# "for (v : array){ if (v==value) good=true; };\n"+
|
||||
# "good;\n"+
|
||||
# "}};\n",
|
||||
|
||||
"getFlagValue": # SPECIFICALLY FOR cf_* FLAGS: CONCATENATE THE ATTRIBUTE NAME WITH ATTRIBUTE VALUE, IF EXISTS
|
||||
"var getFlagValue = function(name){\n" +
|
||||
"if (_source[name]!=null)" +
|
||||
"\" \"+name+_source[name];\n" +
|
||||
"else \n" +
|
||||
"\"\";\n" +
|
||||
"};\n",
|
||||
|
||||
"getDocValue":
|
||||
"var getDocValue = function(name){\n" +
|
||||
"var out = [];\n" +
|
||||
"var v = doc[name];\n" +
|
||||
# "if (v is org.elasticsearch.common.mvel2.ast.Function) v = v();=n" +
|
||||
"if (v==null || v.value==null) { null; } else\n" +
|
||||
"if (v.values.size()<=1){ v.value; } else\n" + # ES MAKES NO DISTINCTION BETWEEN v or [v], SO NEITHER DO I
|
||||
"{for(k : v.values) out.add(k); out;}" +
|
||||
"};\n",
|
||||
|
||||
"getSourceValue":
|
||||
"var getSourceValue = function(name){\n" +
|
||||
"var out = [];\n" +
|
||||
"var v = _source[name];\n" +
|
||||
# "if (v is org.elasticsearch.common.mvel2.ast.Function) v = v();=n" +
|
||||
"if (v==null) { null; } else\n" +
|
||||
"if (v[\"values\"]==null || v.values.size()<=1){ v.value; } else {\n" + # ES MAKES NO DISTINCTION BETWEEN v or [v], SO NEITHER DO I
|
||||
"for(k : v) out.add(k); out;\n" + # .size() MUST BE USED INSTEAD OF .length, THE LATTER WILL CRASH IF JITTED (https://github.com/elasticsearch/elasticsearch/issues/3094)
|
||||
"}};\n",
|
||||
|
||||
"getDocArray":
|
||||
"var getDocArray = function(name){\n" +
|
||||
"var out = [];\n" +
|
||||
"var v = doc[name];\n" +
|
||||
"if (v!=null && v.value!=null) for(k : v.values) out.add(k);" +
|
||||
"out;" +
|
||||
"};\n",
|
||||
|
||||
|
||||
"milli2Month":
|
||||
"var milli2Month = function(value, milliOffset){\n" +
|
||||
"g=new java.util.GregorianCalendar(new java.util.SimpleTimeZone(0, \"GMT\"));\n" +
|
||||
"g.setTimeInMillis(value);\n" +
|
||||
"g.add(java.util.GregorianCalendar.MILLISECOND, -milliOffset);\n" +
|
||||
"m = g.get(java.util.GregorianCalendar.MONTH);\n" +
|
||||
"output = \"\"+g.get(java.util.GregorianCalendar.YEAR)+(m>9?\"\":\"0\")+m;\n" +
|
||||
"output;\n" +
|
||||
"};\n",
|
||||
|
||||
"between":
|
||||
"var between = function(value, prefix, suffix){\n" +
|
||||
"if (value==null){ null; }else{\n" +
|
||||
"var start = value.indexOf(prefix, 0);\n" +
|
||||
"if (start==-1){ null; }else{\n" +
|
||||
"var end = value.indexOf(suffix, start+prefix.length());\n" +
|
||||
"if (end==-1){ null; }else{\n" +
|
||||
"value.substring(start+prefix.length(), end);\n" +
|
||||
"}}}\n" +
|
||||
"};\n"
|
||||
}
|
||||
|
||||
|
||||
def replacePrefix(value, prefix, new_prefix):
|
||||
try:
|
||||
if value.startswith(prefix):
|
||||
return new_prefix+value[len(prefix)::]
|
||||
return value
|
||||
except Exception, e:
|
||||
Log.error("can not replace prefix", e)
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,240 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from .. import struct
|
||||
from ..collections.matrix import Matrix
|
||||
from ..collections import MAX, OR
|
||||
from ..queries.query import _normalize_edge
|
||||
from ..struct import StructList
|
||||
from ..structs.wraps import wrap, wrap_dot, listwrap
|
||||
from ..env.logs import Log
|
||||
|
||||
|
||||
class Cube(object):
|
||||
"""
|
||||
A CUBE IS LIKE A NUMPY ARRAY, ONLY WITH THE DIMENSIONS TYPED AND NAMED.
|
||||
CUBES ARE BETTER THAN PANDAS BECAUSE THEY DEAL WITH NULLS GRACEFULLY
|
||||
"""
|
||||
|
||||
def __init__(self, select, edges, data, frum=None):
|
||||
"""
|
||||
data IS EXPECTED TO BE A dict TO MATRICES, BUT OTHER COLLECTIONS ARE
|
||||
ALLOWED, USING THE select AND edges TO DESCRIBE THE data
|
||||
"""
|
||||
|
||||
self.is_value = False if isinstance(select, list) else True
|
||||
self.select = select
|
||||
|
||||
#ENSURE frum IS PROPER FORM
|
||||
if isinstance(select, list):
|
||||
if OR(not isinstance(v, Matrix) for v in data.values()):
|
||||
Log.error("Expecting data to be a dict with Matrix values")
|
||||
|
||||
if not edges:
|
||||
if not data:
|
||||
if isinstance(select, list):
|
||||
Log.error("not expecting a list of records")
|
||||
|
||||
data = {select.name: Matrix.ZERO}
|
||||
self.edges = StructList.EMPTY
|
||||
elif isinstance(data, dict):
|
||||
# EXPECTING NO MORE THAN ONE rownum EDGE IN THE DATA
|
||||
length = MAX([len(v) for v in data.values()])
|
||||
if length >= 1:
|
||||
self.edges = [{"name": "rownum", "domain": {"type": "index"}}]
|
||||
else:
|
||||
self.edges = StructList.EMPTY
|
||||
elif isinstance(data, list):
|
||||
if isinstance(select, list):
|
||||
Log.error("not expecting a list of records")
|
||||
|
||||
data = {select.name: Matrix.wrap(data)}
|
||||
self.edges = [{"name": "rownum", "domain": {"type": "index"}}]
|
||||
elif isinstance(data, Matrix):
|
||||
if isinstance(select, list):
|
||||
Log.error("not expecting a list of records")
|
||||
|
||||
data = {select.name: data}
|
||||
else:
|
||||
if isinstance(select, list):
|
||||
Log.error("not expecting a list of records")
|
||||
|
||||
data = {select.name: Matrix(value=data)}
|
||||
self.edges = StructList.EMPTY
|
||||
else:
|
||||
self.edges = edges
|
||||
|
||||
self.data = data
|
||||
|
||||
def __len__(self):
|
||||
"""
|
||||
RETURN DATA VOLUME
|
||||
"""
|
||||
if not self.edges:
|
||||
return 1
|
||||
|
||||
return len(self.data.values()[0])
|
||||
|
||||
def __iter__(self):
|
||||
if self.is_value:
|
||||
return self.data[self.select.name].__iter__()
|
||||
|
||||
if not self.edges:
|
||||
return list.__iter__([])
|
||||
|
||||
if len(self.edges) == 1 and wrap(self.edges[0]).domain.type == "index":
|
||||
# ITERATE AS LIST OF RECORDS
|
||||
keys = list(self.data.keys())
|
||||
output = (struct.zip(keys, r) for r in zip(*self.data.values()))
|
||||
return output
|
||||
|
||||
Log.error("This is a multicube")
|
||||
|
||||
@property
|
||||
def value(self):
|
||||
if self.edges:
|
||||
Log.error("can not get value of with dimension")
|
||||
if isinstance(self.select, list):
|
||||
Log.error("can not get value of multi-valued cubes")
|
||||
return self.data[self.select.name].cube
|
||||
|
||||
def __lt__(self, other):
|
||||
return self.value < other
|
||||
|
||||
def __gt__(self, other):
|
||||
return self.value > other
|
||||
|
||||
def __eq__(self, other):
|
||||
if other == None:
|
||||
if self.edges:
|
||||
return False
|
||||
if self.is_value and self.value == None:
|
||||
return True
|
||||
return False
|
||||
return self.value == other
|
||||
|
||||
def __ne__(self, other):
|
||||
return not Cube.__eq__(self, other)
|
||||
|
||||
def __add__(self, other):
|
||||
return self.value + other
|
||||
|
||||
def __radd__(self, other):
|
||||
return other + self.value
|
||||
|
||||
def __sub__(self, other):
|
||||
return self.value - other
|
||||
|
||||
def __rsub__(self, other):
|
||||
return other - self.value
|
||||
|
||||
def __mul__(self, other):
|
||||
return self.value * other
|
||||
|
||||
def __rmul__(self, other):
|
||||
return other * self.value
|
||||
|
||||
def __div__(self, other):
|
||||
return self.value / other
|
||||
|
||||
def __rdiv__(self, other):
|
||||
return other / self.value
|
||||
|
||||
def __getitem__(self, item):
|
||||
return self.data[item]
|
||||
|
||||
def __getattr__(self, item):
|
||||
return self.data[item]
|
||||
|
||||
def get_columns(self):
|
||||
return self.edges + listwrap(self.select)
|
||||
|
||||
def _select(self, select):
|
||||
selects = listwrap(select)
|
||||
is_aggregate = OR(s.aggregate != None and s.aggregate != "none" for s in selects)
|
||||
if is_aggregate:
|
||||
values = {s.name: Matrix(value=self.data[s.value].aggregate(s.aggregate)) for s in selects}
|
||||
return Cube(select, [], values)
|
||||
else:
|
||||
values = {s.name: self.data[s.value] for s in selects}
|
||||
return Cube(select, self.edges, values)
|
||||
|
||||
def groupby(self, edges):
|
||||
"""
|
||||
SLICE THIS CUBE IN TO ONES WITH LESS DIMENSIONALITY
|
||||
simple==True WILL HAVE GROUPS BASED ON PARTITION VALUE, NOT PARTITION OBJECTS
|
||||
"""
|
||||
edges = StructList([_normalize_edge(e) for e in edges])
|
||||
|
||||
stacked = [e for e in self.edges if e.name in edges.name]
|
||||
remainder = [e for e in self.edges if e.name not in edges.name]
|
||||
selector = [1 if e.name in edges.name else 0 for e in self.edges]
|
||||
|
||||
if len(stacked) + len(remainder) != len(self.edges):
|
||||
Log.error("can not find some edges to group by")
|
||||
#CACHE SOME RESULTS
|
||||
keys = [e.name for e in self.edges]
|
||||
getKey = [e.domain.getKey for e in self.edges]
|
||||
lookup = [[getKey[i](p) for p in e.domain.partitions] for i, e in enumerate(self.edges)]
|
||||
|
||||
def coord2term(coord):
|
||||
output = wrap_dot({keys[i]: lookup[i][c] for i, c in enumerate(coord)})
|
||||
return output
|
||||
|
||||
if isinstance(self.select, list):
|
||||
selects = listwrap(self.select)
|
||||
index, v = zip(*self.data[selects[0].name].groupby(selector))
|
||||
|
||||
coord = wrap([coord2term(c) for c in index])
|
||||
|
||||
values = [v]
|
||||
for s in selects[1::]:
|
||||
i, v = zip(*self.data[s.name].group_by(selector))
|
||||
values.append(v)
|
||||
|
||||
output = zip(coord, [Cube(self.select, remainder, {s.name: v[i] for i, s in enumerate(selects)}) for v in zip(*values)])
|
||||
elif not remainder:
|
||||
# v IS A VALUE, NO NEED TO WRAP IT IN A Cube
|
||||
output = (
|
||||
(
|
||||
coord2term(coord),
|
||||
v
|
||||
)
|
||||
for coord, v in self.data[self.select.name].groupby(selector)
|
||||
)
|
||||
else:
|
||||
output = (
|
||||
(
|
||||
coord2term(coord),
|
||||
Cube(self.select, remainder, v)
|
||||
)
|
||||
for coord, v in self.data[self.select.name].groupby(selector)
|
||||
)
|
||||
|
||||
return output
|
||||
|
||||
def __str__(self):
|
||||
if self.is_value:
|
||||
return str(self.data)
|
||||
else:
|
||||
return str(self.data)
|
||||
|
||||
def __int__(self):
|
||||
if self.is_value:
|
||||
return int(self.value)
|
||||
else:
|
||||
return int(self.data)
|
||||
|
||||
def __float__(self):
|
||||
if self.is_value:
|
||||
return float(self.value)
|
||||
else:
|
||||
return float(self.data)
|
||||
|
|
@ -0,0 +1,438 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .. import struct
|
||||
from ..cnv import CNV
|
||||
from ..collections.matrix import Matrix
|
||||
from .query import Query
|
||||
from ..sql.db import int_list_packer, SQL, DB
|
||||
from ..env.logs import Log
|
||||
from ..strings import indent, expand_template
|
||||
from ..struct import nvl, StructList
|
||||
from ..structs.wraps import wrap, listwrap
|
||||
|
||||
|
||||
class DBQuery(object):
|
||||
"""
|
||||
Qb to MySQL DATABASE QUERIES
|
||||
"""
|
||||
def __init__(self, db):
|
||||
object.__init__(self)
|
||||
if isinstance(db, DB):
|
||||
self.db = db
|
||||
else:
|
||||
self.db = DB(db)
|
||||
|
||||
def query(self, query, stacked=False):
|
||||
"""
|
||||
TRANSLATE Qb QUERY ON SINGLE TABLE TO SQL QUERY
|
||||
"""
|
||||
query = Query(query)
|
||||
|
||||
sql, post = self._subquery(query, isolate=False, stacked=stacked)
|
||||
query.data = post(sql)
|
||||
return query.data
|
||||
|
||||
def update(self, query):
|
||||
self.db.execute("""
|
||||
UPDATE {{table_name}}
|
||||
SET {{assignment}}
|
||||
{{where}}
|
||||
""", {
|
||||
"table_name": query["from"],
|
||||
"assignment": ",".join(self.db.quote_column(k) + "=" + self.db.quote_value(v) for k, v in query.set),
|
||||
"where": self._where2sql(query.where)
|
||||
})
|
||||
|
||||
|
||||
def _subquery(self, query, isolate=True, stacked=False):
|
||||
if isinstance(query, basestring):
|
||||
return self.db.quote_column(query), None
|
||||
if query.name: # IT WOULD BE SAFER TO WRAP TABLE REFERENCES IN A TYPED OBJECT (Cube, MAYBE?)
|
||||
return self.db.quote_column(query.name), None
|
||||
|
||||
if query.edges:
|
||||
# RETURN A CUBE
|
||||
sql, post = self._grouped(query, stacked)
|
||||
else:
|
||||
select = listwrap(query.select)
|
||||
if select[0].aggregate != "none":
|
||||
sql, post = self._aggop(query)
|
||||
else:
|
||||
sql, post = self._setop(query)
|
||||
|
||||
if isolate:
|
||||
return "(\n"+sql+"\n) a\n", post
|
||||
else:
|
||||
return sql, post
|
||||
|
||||
def _grouped(self, query, stacked=False):
|
||||
select = listwrap(query.select)
|
||||
|
||||
# RETURN SINGLE OBJECT WITH AGGREGATES
|
||||
for s in select:
|
||||
if s.aggregate not in aggregates:
|
||||
Log.error("Expecting all columns to have an aggregate: {{select}}", {"select": s})
|
||||
|
||||
selects = StructList()
|
||||
groups = StructList()
|
||||
edges = query.edges
|
||||
for e in edges:
|
||||
if e.domain.type != "default":
|
||||
Log.error("domain of type {{type}} not supported, yet", {"type": e.domain.type})
|
||||
groups.append(e.value)
|
||||
selects.append(e.value + " AS " + self.db.quote_column(e.name))
|
||||
|
||||
for s in select:
|
||||
selects.append(aggregates[s.aggregate].replace("{{code}}", s.value) + " AS " + self.db.quote_column(s.name))
|
||||
|
||||
sql = expand_template("""
|
||||
SELECT
|
||||
{{selects}}
|
||||
FROM
|
||||
{{table}}
|
||||
{{where}}
|
||||
GROUP BY
|
||||
{{groups}}
|
||||
""", {
|
||||
"selects": SQL(",\n".join(selects)),
|
||||
"groups": SQL(",\n".join(groups)),
|
||||
"table": self._subquery(query["from"])[0],
|
||||
"where": self._where2sql(query.where)
|
||||
})
|
||||
|
||||
def post_stacked(sql):
|
||||
# RETURN IN THE USUAL DATABASE RESULT SET FORMAT
|
||||
return self.db.query(sql)
|
||||
|
||||
def post(sql):
|
||||
# FIND OUT THE default DOMAIN SIZES
|
||||
result = self.db.column_query(sql)
|
||||
num_edges = len(edges)
|
||||
for e, edge in enumerate(edges):
|
||||
domain = edge.domain
|
||||
if domain.type == "default":
|
||||
domain.type = "set"
|
||||
parts = set(result[e])
|
||||
domain.partitions = [{"index": i, "value": p} for i, p in enumerate(parts)]
|
||||
domain.map = {p: i for i, p in enumerate(parts)}
|
||||
else:
|
||||
Log.error("Do not know what to do here, yet")
|
||||
|
||||
# FILL THE DATA CUBE
|
||||
maps = [(struct.unwrap(e.domain.map), result[i]) for i, e in enumerate(edges)]
|
||||
cubes = StructList()
|
||||
for c, s in enumerate(select):
|
||||
data = Matrix(*[len(e.domain.partitions) + (1 if e.allow_nulls else 0) for e in edges])
|
||||
for rownum, value in enumerate(result[c + num_edges]):
|
||||
coord = [m[r[rownum]] for m, r in maps]
|
||||
data[coord] = value
|
||||
cubes.append(data)
|
||||
|
||||
if isinstance(query.select, list):
|
||||
return cubes
|
||||
else:
|
||||
return cubes[0]
|
||||
|
||||
return sql, post if not stacked else post_stacked
|
||||
|
||||
def _aggop(self, query):
|
||||
"""
|
||||
SINGLE ROW RETURNED WITH AGGREGATES
|
||||
"""
|
||||
if isinstance(query.select, list):
|
||||
# RETURN SINGLE OBJECT WITH AGGREGATES
|
||||
for s in query.select:
|
||||
if s.aggregate not in aggregates:
|
||||
Log.error("Expecting all columns to have an aggregate: {{select}}", {"select": s})
|
||||
|
||||
selects = StructList()
|
||||
for s in query.select:
|
||||
selects.append(aggregates[s.aggregate].replace("{{code}}", s.value) + " AS " + self.db.quote_column(s.name))
|
||||
|
||||
sql = expand_template("""
|
||||
SELECT
|
||||
{{selects}}
|
||||
FROM
|
||||
{{table}}
|
||||
{{where}}
|
||||
""", {
|
||||
"selects": SQL(",\n".join(selects)),
|
||||
"table": self._subquery(query["from"])[0],
|
||||
"where": self._where2sql(query.filter)
|
||||
})
|
||||
|
||||
return sql, lambda sql: self.db.column(sql)[0] # RETURNING SINGLE OBJECT WITH AGGREGATE VALUES
|
||||
else:
|
||||
# RETURN SINGLE VALUE
|
||||
s0 = query.select
|
||||
if s0.aggregate not in aggregates:
|
||||
Log.error("Expecting all columns to have an aggregate: {{select}}", {"select": s0})
|
||||
|
||||
select = aggregates[s0.aggregate].replace("{{code}}", s0.value) + " AS " + self.db.quote_column(s0.name)
|
||||
|
||||
sql = expand_template("""
|
||||
SELECT
|
||||
{{selects}}
|
||||
FROM
|
||||
{{table}}
|
||||
{{where}}
|
||||
""", {
|
||||
"selects": SQL(select),
|
||||
"table": self._subquery(query["from"])[0],
|
||||
"where": self._where2sql(query.where)
|
||||
})
|
||||
|
||||
def post(sql):
|
||||
result = self.db.column_query(sql)
|
||||
return result[0][0]
|
||||
|
||||
return sql, post # RETURN SINGLE VALUE
|
||||
|
||||
def _setop(self, query):
|
||||
"""
|
||||
NO AGGREGATION, SIMPLE LIST COMPREHENSION
|
||||
"""
|
||||
if isinstance(query.select, list):
|
||||
# RETURN BORING RESULT SET
|
||||
selects = StructList()
|
||||
for s in listwrap(query.select):
|
||||
if isinstance(s.value, dict):
|
||||
for k, v in s.value.items:
|
||||
selects.append(v + " AS " + self.db.quote_column(s.name+"."+k))
|
||||
if isinstance(s.value, list):
|
||||
for i, ss in enumerate(s.value):
|
||||
selects.append(s.value + " AS " + self.db.quote_column(s.name+","+str(i)))
|
||||
else:
|
||||
selects.append(s.value + " AS " + self.db.quote_column(s.name))
|
||||
|
||||
sql = expand_template("""
|
||||
SELECT
|
||||
{{selects}}
|
||||
FROM
|
||||
{{table}}
|
||||
{{where}}
|
||||
{{limit}}
|
||||
{{sort}}
|
||||
""", {
|
||||
"selects": SQL(",\n".join(selects)),
|
||||
"table": self._subquery(query["from"])[0],
|
||||
"where": self._where2sql(query.where),
|
||||
"limit": self._limit2sql(query.limit),
|
||||
"sort": self._sort2sql(query.sort)
|
||||
})
|
||||
|
||||
def post_process(sql):
|
||||
result = self.db.query(sql)
|
||||
for s in listwrap(query.select):
|
||||
if isinstance(s.value, dict):
|
||||
for r in result:
|
||||
r[s.name] = {}
|
||||
for k, v in s.value:
|
||||
r[s.name][k] = r[s.name+"."+k]
|
||||
r[s.name+"."+k] = None
|
||||
|
||||
if isinstance(s.value, list):
|
||||
#REWRITE AS TUPLE
|
||||
for r in result:
|
||||
r[s.name] = tuple(r[s.name + "," + str(i)] for i, ss in enumerate(s.value))
|
||||
for i, ss in enumerate(s.value):
|
||||
r[s.name + "," + str(i)] = None
|
||||
|
||||
expand_json(result)
|
||||
return result
|
||||
|
||||
return sql, post_process # RETURN BORING RESULT SET
|
||||
else:
|
||||
# RETURN LIST OF VALUES
|
||||
if query.select.value == "*":
|
||||
select = "*"
|
||||
else:
|
||||
name = query.select.name
|
||||
select = query.select.value + " AS " + self.db.quote_column(name)
|
||||
|
||||
sql = expand_template("""
|
||||
SELECT
|
||||
{{selects}}
|
||||
FROM
|
||||
{{table}}
|
||||
{{where}}
|
||||
{{limit}}
|
||||
{{sort}}
|
||||
""", {
|
||||
"selects": SQL(select),
|
||||
"table": self._subquery(query["from"])[0],
|
||||
"where": self._where2sql(query.where),
|
||||
"limit": self._limit2sql(query.limit),
|
||||
"sort": self._sort2sql(query.sort)
|
||||
})
|
||||
|
||||
if query.select.value == "*":
|
||||
def post(sql):
|
||||
result = self.db.query(sql)
|
||||
expand_json(result)
|
||||
return result
|
||||
|
||||
return sql, post
|
||||
else:
|
||||
return sql, lambda sql: [r[name] for r in self.db.query(sql)] # RETURNING LIST OF VALUES
|
||||
|
||||
def _sort2sql(self, sort):
|
||||
"""
|
||||
RETURN ORDER BY CLAUSE
|
||||
"""
|
||||
if not sort:
|
||||
return ""
|
||||
return SQL("ORDER BY "+",\n".join([self.db.quote_column(o.field)+(" DESC" if o.sort==-1 else "") for o in sort]))
|
||||
|
||||
def _limit2sql(self, limit):
|
||||
return SQL("" if not limit else "LIMIT "+str(limit))
|
||||
|
||||
|
||||
def _where2sql(self, where):
|
||||
if where == None:
|
||||
return ""
|
||||
return SQL("WHERE "+_esfilter2sqlwhere(self.db, where))
|
||||
|
||||
|
||||
def _isolate(separator, list):
|
||||
try:
|
||||
if len(list) > 1:
|
||||
return "(\n" + indent((" " + separator + "\n").join(list)) + "\n)"
|
||||
else:
|
||||
return list[0]
|
||||
except Exception, e:
|
||||
Log.error("Programming problem: separator={{separator}}, list={{list}", {
|
||||
"list": list,
|
||||
"separator": separator
|
||||
}, e)
|
||||
|
||||
|
||||
def esfilter2sqlwhere(db, esfilter):
|
||||
return SQL(_esfilter2sqlwhere(db, esfilter))
|
||||
|
||||
def _esfilter2sqlwhere(db, esfilter):
|
||||
"""
|
||||
CONVERT ElassticSearch FILTER TO SQL FILTER
|
||||
db - REQUIRED TO PROPERLY QUOTE VALUES AND COLUMN NAMES
|
||||
"""
|
||||
esfilter = wrap(esfilter)
|
||||
|
||||
if esfilter["and"]:
|
||||
return _isolate("AND", [esfilter2sqlwhere(db, a) for a in esfilter["and"]])
|
||||
elif esfilter["or"]:
|
||||
return _isolate("OR", [esfilter2sqlwhere(db, a) for a in esfilter["or"]])
|
||||
elif esfilter["not"]:
|
||||
return "NOT (" + esfilter2sqlwhere(db, esfilter["not"]) + ")"
|
||||
elif esfilter.term:
|
||||
return _isolate("AND", [db.quote_column(col) + "=" + db.quote_value(val) for col, val in esfilter.term.items()])
|
||||
elif esfilter.terms:
|
||||
for col, v in esfilter.terms.items():
|
||||
if len(v) == 0:
|
||||
return "FALSE"
|
||||
|
||||
try:
|
||||
int_list = CNV.value2intlist(v)
|
||||
has_null = False
|
||||
for vv in v:
|
||||
if vv == None:
|
||||
has_null = True
|
||||
break
|
||||
if int_list:
|
||||
filter = int_list_packer(col, int_list)
|
||||
if has_null:
|
||||
return esfilter2sqlwhere(db, {"or": [{"missing": col}, filter]})
|
||||
else:
|
||||
return esfilter2sqlwhere(db, filter)
|
||||
else:
|
||||
if has_null:
|
||||
return esfilter2sqlwhere(db, {"missing": col})
|
||||
else:
|
||||
return "false"
|
||||
except Exception, e:
|
||||
pass
|
||||
return db.quote_column(col) + " in (" + ", ".join([db.quote_value(val) for val in v]) + ")"
|
||||
elif esfilter.script:
|
||||
return "(" + esfilter.script + ")"
|
||||
elif esfilter.range:
|
||||
name2sign = {
|
||||
"gt": ">",
|
||||
"gte": ">=",
|
||||
"lte": "<=",
|
||||
"lt": "<"
|
||||
}
|
||||
|
||||
def single(col, r):
|
||||
min = nvl(r["gte"], r[">="])
|
||||
max = nvl(r["lte"], r["<="])
|
||||
if min and max:
|
||||
#SPECIAL CASE (BETWEEN)
|
||||
return db.quote_column(col) + " BETWEEN " + db.quote_value(min) + " AND " + db.quote_value(max)
|
||||
else:
|
||||
return " AND ".join(
|
||||
db.quote_column(col) + name2sign[sign] + db.quote_value(value)
|
||||
for sign, value in r.items()
|
||||
)
|
||||
|
||||
output = _isolate("AND", [single(col, ranges) for col, ranges in esfilter.range.items()])
|
||||
return output
|
||||
elif esfilter.missing:
|
||||
if isinstance(esfilter.missing, basestring):
|
||||
return "(" + db.quote_column(esfilter.missing) + " IS Null)"
|
||||
else:
|
||||
return "(" + db.quote_column(esfilter.missing.field) + " IS Null)"
|
||||
elif esfilter.exists:
|
||||
if isinstance(esfilter.exists, basestring):
|
||||
return "(" + db.quote_column(esfilter.exists) + " IS NOT Null)"
|
||||
else:
|
||||
return "(" + db.quote_column(esfilter.exists.field) + " IS NOT Null)"
|
||||
elif esfilter.match_all:
|
||||
return "1=1"
|
||||
elif esfilter.instr:
|
||||
return _isolate("AND", ["instr(" + db.quote_column(col) + ", " + db.quote_value(val) + ")>0" for col, val in esfilter.instr.items()])
|
||||
else:
|
||||
Log.error("Can not convert esfilter to SQL: {{esfilter}}", {"esfilter": esfilter})
|
||||
|
||||
|
||||
def expand_json(rows):
|
||||
#CONVERT JSON TO VALUES
|
||||
for r in rows:
|
||||
for k, json in list(r.items()):
|
||||
if isinstance(json, basestring) and json[0:1] in ("[", "{"):
|
||||
try:
|
||||
value = CNV.JSON2object(json)
|
||||
r[k] = value
|
||||
except Exception, e:
|
||||
pass
|
||||
|
||||
|
||||
#MAP NAME TO SQL FUNCTION
|
||||
aggregates = {
|
||||
"one": "COUNT({{code}})",
|
||||
"sum": "SUM({{code}})",
|
||||
"add": "SUM({{code}})",
|
||||
"count": "COUNT({{code}})",
|
||||
"maximum": "MAX({{code}})",
|
||||
"minimum": "MIN({{code}})",
|
||||
"max": "MAX({{code}})",
|
||||
"min": "MIN({{code}})",
|
||||
"mean": "AVG({{code}})",
|
||||
"average": "AVG({{code}})",
|
||||
"avg": "AVG({{code}})",
|
||||
"N": "COUNT({{code}})",
|
||||
"X0": "COUNT({{code}})",
|
||||
"X1": "SUM({{code}})",
|
||||
"X2": "SUM(POWER({{code}}, 2))",
|
||||
"std": "STDDEV({{code}})",
|
||||
"stddev": "STDDEV({{code}})",
|
||||
"var": "POWER(STDDEV({{code}}), 2)",
|
||||
"variance": "POWER(STDDEV({{code}}), 2)"
|
||||
}
|
|
@ -0,0 +1,336 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# self Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with self file,
|
||||
# You can obtain one at http:# mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from .. import struct
|
||||
from ..collections import SUM
|
||||
from ..queries.domains import Domain, ALGEBRAIC, KNOWN
|
||||
from ..struct import Struct, nvl, Null, StructList, join_field, split_field
|
||||
from ..times.timer import Timer
|
||||
from ..env.logs import Log
|
||||
from ..structs.wraps import wrap, listwrap
|
||||
|
||||
|
||||
DEFAULT_QUERY_LIMIT = 20
|
||||
|
||||
|
||||
class Dimension(object):
|
||||
|
||||
def __init__(self, dim, parent, qb):
|
||||
self.name = dim.name
|
||||
self.parent = parent
|
||||
self.full_name = join_field(split_field(self.parent.full_name)+[self.name])
|
||||
self.min = dim.min
|
||||
self.max = dim.max
|
||||
self.interval = dim.interval
|
||||
self.value = dim.value
|
||||
self.label = dim.label
|
||||
self.end = dim.end
|
||||
self.esfilter = dim.esfilter
|
||||
self.weight = dim.weight
|
||||
self.style = dim.style
|
||||
self.isFacet = dim.isFacet
|
||||
|
||||
self.type = nvl(dim.type, "set")
|
||||
self.limit = nvl(dim.limit, DEFAULT_QUERY_LIMIT)
|
||||
self.index = nvl(dim.index, nvl(parent, Null).index, qb.es.settings.name)
|
||||
|
||||
if not self.index:
|
||||
Log.error("Expecting an index name")
|
||||
|
||||
# ALLOW ACCESS TO SUB-PART BY NAME (IF ONLY THERE IS NO NAME COLLISION)
|
||||
self.edges = {}
|
||||
for e in listwrap(dim.edges):
|
||||
new_e = Dimension(e, self, qb)
|
||||
self.edges[new_e.full_name] = new_e
|
||||
|
||||
self.partitions = wrap(nvl(dim.partitions, []))
|
||||
parse_partition(self)
|
||||
|
||||
fields = nvl(dim.field, dim.fields)
|
||||
if not fields:
|
||||
return # NO FIELDS TO SEARCH
|
||||
elif isinstance(fields, dict):
|
||||
self.fields = wrap(fields)
|
||||
edges = wrap([{"name": k, "value": v, "allowNulls": False} for k, v in self.fields.items()])
|
||||
else:
|
||||
self.fields = listwrap(fields)
|
||||
edges = wrap([{"name": f, "value": f, "index": i, "allowNulls": False} for i, f in enumerate(self.fields)])
|
||||
|
||||
if dim.partitions:
|
||||
return # ALREADY HAVE PARTS
|
||||
if dim.type not in KNOWN - ALGEBRAIC:
|
||||
return # PARTS OR TOO FUZZY (OR TOO NUMEROUS) TO FETCH
|
||||
|
||||
with Timer("Get parts of {{name}}", {"name": self.name}):
|
||||
parts = qb.query({
|
||||
"from": self.index,
|
||||
"select": {"name": "count", "aggregate": "count"},
|
||||
"edges": edges,
|
||||
"esfilter": self.esfilter,
|
||||
"limit": self.limit
|
||||
})
|
||||
|
||||
d = parts.edges[0].domain
|
||||
|
||||
if dim.path:
|
||||
if len(edges) > 1:
|
||||
Log.error("Not supported yet")
|
||||
# EACH TERM RETURNED IS A PATH INTO A PARTITION TREE
|
||||
temp = Struct(partitions=[])
|
||||
for i, count in enumerate(parts):
|
||||
a = dim.path(d.getEnd(d.partitions[i]))
|
||||
if not isinstance(a, list):
|
||||
Log.error("The path function on " + dim.name + " must return an ARRAY of parts")
|
||||
addParts(
|
||||
temp,
|
||||
dim.path(d.getEnd(d.partitions[i])),
|
||||
count,
|
||||
0
|
||||
)
|
||||
self.value = nvl(dim.value, "name")
|
||||
self.partitions = temp.partitions
|
||||
elif isinstance(fields, dict):
|
||||
self.value = "name" # USE THE "name" ATTRIBUTE OF PARTS
|
||||
|
||||
partitions = StructList()
|
||||
for g, p in parts.groupby(edges):
|
||||
if p:
|
||||
partitions.append({
|
||||
"value": g,
|
||||
"esfilter": {"and": [
|
||||
{"term": {e.value: g[e.name]}}
|
||||
for e in edges
|
||||
]},
|
||||
"count": int(p)
|
||||
})
|
||||
self.partitions = partitions
|
||||
elif len(edges) == 1:
|
||||
self.value = "name" # USE THE "name" ATTRIBUTE OF PARTS
|
||||
|
||||
# SIMPLE LIST OF PARTS RETURNED, BE SURE TO INTERRELATE THEM
|
||||
self.partitions = wrap([
|
||||
{
|
||||
"name": str(d.partitions[i].name), # CONVERT TO STRING
|
||||
"value": d.getEnd(d.partitions[i]),
|
||||
"esfilter": {"term": {edges[0].value: d.partitions[i].value}},
|
||||
"count": count
|
||||
}
|
||||
for i, count in enumerate(parts)
|
||||
])
|
||||
elif len(edges) == 2:
|
||||
self.value = "name" # USE THE "name" ATTRIBUTE OF PARTS
|
||||
d2 = parts.edges[1].domain
|
||||
|
||||
# SIMPLE LIST OF PARTS RETURNED, BE SURE TO INTERRELATE THEM
|
||||
array = parts.data.values()[0].cube # DIG DEEP INTO RESULT (ASSUME SINGLE VALUE CUBE, WITH NULL AT END)
|
||||
|
||||
def edges2value(*values):
|
||||
if isinstance(fields, dict):
|
||||
output = Struct()
|
||||
for e, v in zip(edges, values):
|
||||
output[e.name] = v
|
||||
return output
|
||||
else:
|
||||
return tuple(values)
|
||||
|
||||
self.partitions = wrap([
|
||||
{
|
||||
"name": str(d.partitions[i].name), # CONVERT TO STRING
|
||||
"value": d.getEnd(d.partitions[i]),
|
||||
"esfilter": {"term": {edges[0].value: d.partitions[i].value}},
|
||||
"count": SUM(subcube),
|
||||
"partitions": [
|
||||
{
|
||||
"name": str(d2.partitions[j].name), # CONVERT TO STRING
|
||||
"value": edges2value(d.getEnd(d.partitions[i]), d2.getEnd(d2.partitions[j])),
|
||||
"esfilter": {"and": [
|
||||
{"term": {edges[0].value: d.partitions[i].value}},
|
||||
{"term": {edges[1].value: d2.partitions[j].value}}
|
||||
]},
|
||||
"count": count2
|
||||
}
|
||||
for j, count2 in enumerate(subcube)
|
||||
if count2 > 0 # ONLY INCLUDE PROPERTIES THAT EXIST
|
||||
]
|
||||
}
|
||||
for i, subcube in enumerate(array)
|
||||
])
|
||||
else:
|
||||
Log.error("Not supported")
|
||||
|
||||
parse_partition(self) # RELATE THE PARTS TO THE PARENTS
|
||||
|
||||
def __getattr__(self, key):
|
||||
"""
|
||||
RETURN CHILD EDGE OR PARTITION BY NAME
|
||||
"""
|
||||
e = self.edges[key]
|
||||
if e:
|
||||
return e
|
||||
for p in self.partitions:
|
||||
if p.name == key:
|
||||
return p
|
||||
return Null
|
||||
|
||||
def getDomain(self, **kwargs):
|
||||
# kwargs.depth IS MEANT TO REACH INTO SUB-PARTITIONS
|
||||
kwargs = wrap(kwargs)
|
||||
kwargs.depth = nvl(kwargs.depth, len(self.fields)-1 if isinstance(self.fields, list) else None)
|
||||
|
||||
if not self.partitions and self.edges:
|
||||
# USE EACH EDGE AS A PARTITION, BUT isFacet==True SO IT ALLOWS THE OVERLAP
|
||||
partitions = [
|
||||
{
|
||||
"name":v.name,
|
||||
"value":v.name,
|
||||
"esfilter":v.esfilter,
|
||||
"style":v.style,
|
||||
"weight":v.weight # YO! WHAT DO WE *NOT* COPY?
|
||||
}
|
||||
for i, v in enumerate(self.edges)
|
||||
if i < nvl(self.limit, DEFAULT_QUERY_LIMIT) and v.esfilter
|
||||
]
|
||||
self.isFacet = True
|
||||
elif kwargs.depth == None: # ASSUME self.fields IS A dict
|
||||
partitions = StructList()
|
||||
for i, part in enumerate(self.partitions):
|
||||
if i >= nvl(self.limit, DEFAULT_QUERY_LIMIT):
|
||||
break
|
||||
partitions.append({
|
||||
"name":part.name,
|
||||
"value":part.value,
|
||||
"esfilter":part.esfilter,
|
||||
"style":nvl(part.style, part.parent.style),
|
||||
"weight":part.weight # YO! WHAT DO WE *NOT* COPY?
|
||||
})
|
||||
elif kwargs.depth == 0:
|
||||
partitions = [
|
||||
{
|
||||
"name":v.name,
|
||||
"value":v.value,
|
||||
"esfilter":v.esfilter,
|
||||
"style":v.style,
|
||||
"weight":v.weight # YO! WHAT DO WE *NOT* COPY?
|
||||
}
|
||||
for i, v in enumerate(self.partitions)
|
||||
if i < nvl(self.limit, DEFAULT_QUERY_LIMIT)]
|
||||
elif kwargs.depth == 1:
|
||||
partitions = StructList()
|
||||
rownum = 0
|
||||
for i, part in enumerate(self.partitions):
|
||||
if i >= nvl(self.limit, DEFAULT_QUERY_LIMIT):
|
||||
continue
|
||||
rownum += 1
|
||||
try:
|
||||
for j, subpart in enumerate(part.partitions):
|
||||
partitions.append({
|
||||
"name":join_field(split_field(subpart.parent.name) + [subpart.name]),
|
||||
"value":subpart.value,
|
||||
"esfilter":subpart.esfilter,
|
||||
"style":nvl(subpart.style, subpart.parent.style),
|
||||
"weight":subpart.weight # YO! WHAT DO WE *NOT* COPY?
|
||||
})
|
||||
except Exception, e:
|
||||
Log.error("", e)
|
||||
else:
|
||||
Log.error("deeper than 2 is not supported yet")
|
||||
|
||||
return Domain(
|
||||
type=self.type,
|
||||
name=self.name,
|
||||
partitions=wrap(partitions),
|
||||
min=self.min,
|
||||
max=self.max,
|
||||
interval=self.interval,
|
||||
# THE COMPLICATION IS THAT SOMETIMES WE WANT SIMPLE PARTITIONS, LIKE
|
||||
# STRINGS, DATES, OR NUMBERS. OTHER TIMES WE WANT PARTITION OBJECTS
|
||||
# WITH NAME, VALUE, AND OTHER MARKUP.
|
||||
# USUALLY A "set" IS MEANT TO BE SIMPLE, BUT THE end() FUNCTION IS
|
||||
# OVERRIDES EVERYTHING AND IS EXPLICIT. - NOT A GOOD SOLUTION BECAUSE
|
||||
# end() IS USED BOTH TO INDICATE THE QUERY PARTITIONS *AND* DISPLAY
|
||||
# COORDINATES ON CHARTS
|
||||
|
||||
# PLEASE SPLIT end() INTO value() (replacing the string value) AND
|
||||
# label() (for presentation)
|
||||
value="name" if not self.value and self.partitions else self.value,
|
||||
key="value",
|
||||
label=nvl(self.label, (self.type == "set" and self.name)),
|
||||
end=nvl(self.end, (self.type == "set" and self.name)),
|
||||
isFacet=self.isFacet,
|
||||
dimension=self
|
||||
)
|
||||
|
||||
def getSelect(self, **kwargs):
|
||||
if self.fields:
|
||||
if len(self.fields) == 1:
|
||||
return Struct(
|
||||
name=self.full_name,
|
||||
value=self.fields[0],
|
||||
aggregate="none"
|
||||
)
|
||||
else:
|
||||
return Struct(
|
||||
name=self.full_name,
|
||||
value=self.fields,
|
||||
aggregate="none"
|
||||
)
|
||||
|
||||
domain = self.getDomain(**kwargs)
|
||||
if not domain.getKey:
|
||||
Log.error("Should not happen")
|
||||
if not domain.NULL:
|
||||
Log.error("Should not happen")
|
||||
|
||||
return Struct(
|
||||
name=self.full_name,
|
||||
domain=domain,
|
||||
aggregate="none"
|
||||
)
|
||||
|
||||
def addParts(parentPart, childPath, count, index):
|
||||
"""
|
||||
BUILD A hierarchy BY REPEATEDLY CALLING self METHOD WITH VARIOUS childPaths
|
||||
count IS THE NUMBER FOUND FOR self PATH
|
||||
"""
|
||||
if index == None:
|
||||
index = 0
|
||||
if index == len(childPath):
|
||||
return
|
||||
c = childPath[index]
|
||||
parentPart.count = nvl(parentPart.count, 0) + count
|
||||
|
||||
if parentPart.partitions == None:
|
||||
parentPart.partitions = StructList()
|
||||
for i, part in enumerate(parentPart.partitions):
|
||||
if part.name == c.name:
|
||||
addParts(part, childPath, count, index + 1)
|
||||
return
|
||||
|
||||
parentPart.partitions.append(c)
|
||||
addParts(c, childPath, count, index + 1)
|
||||
|
||||
|
||||
def parse_partition(part):
|
||||
for p in part.partitions:
|
||||
if part.index:
|
||||
p.index = part.index # COPY INDEX DOWN
|
||||
parse_partition(p)
|
||||
p.value = nvl(p.value, p.name)
|
||||
p.parent = part
|
||||
|
||||
if not part.esfilter:
|
||||
if len(part.partitions) > 100:
|
||||
Log.error("Must define an esfilter on {{name}} there are too many partitions ({{num_parts}})", {
|
||||
"name": part.name,
|
||||
"num_parts": len(part.partitions)
|
||||
})
|
||||
|
||||
# DEFAULT esfilter IS THE UNION OF ALL CHILD FILTERS
|
||||
part.esfilter = {"or": part.partitions.esfilter}
|
|
@ -0,0 +1,237 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with self file,
|
||||
# You can obtain one at http:# mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
import re
|
||||
from ..cnv import CNV
|
||||
from ..collections import UNION
|
||||
from .index import UniqueIndex
|
||||
from ..env.logs import Log
|
||||
from ..struct import Struct, nvl, StructList
|
||||
from ..structs.wraps import wrap, unwrap
|
||||
|
||||
ALGEBRAIC = {"time", "duration", "numeric", "count", "datetime"} # DOMAINS THAT HAVE ALGEBRAIC OPERATIONS DEFINED
|
||||
KNOWN = {"set", "boolean", "duration", "time", "numeric"} # DOMAINS THAT HAVE A KNOWN NUMBER FOR PARTS AT QUERY TIME
|
||||
PARTITION = {"uid", "set", "boolean"} # DIMENSIONS WITH CLEAR PARTS
|
||||
|
||||
|
||||
class Domain(object):
|
||||
def __new__(cls, **desc):
|
||||
desc = wrap(desc)
|
||||
if desc.type == "value":
|
||||
return ValueDomain(**unwrap(desc))
|
||||
elif desc.type == "default":
|
||||
return DefaultDomain(**unwrap(desc))
|
||||
elif desc.type == "set":
|
||||
if isinstance(desc.key, (list, tuple)):
|
||||
Log.error("multi key not supported yet")
|
||||
return SetDomain(**unwrap(desc))
|
||||
elif desc.type == "uid":
|
||||
return DefaultDomain(**unwrap(desc))
|
||||
else:
|
||||
Log.error("Do not know domain of type {{type}}", {"type": desc.type})
|
||||
|
||||
def __init__(self, **desc):
|
||||
desc = wrap(desc)
|
||||
self.name = nvl(desc.name, desc.type)
|
||||
self.type = desc.type
|
||||
self.min = desc.min
|
||||
self.max = desc.max
|
||||
self.interval = desc.interval
|
||||
self.value = desc.value,
|
||||
self.key = desc.key,
|
||||
self.label = desc.label,
|
||||
self.end = desc.end,
|
||||
self.isFacet = nvl(desc.isFacet, False)
|
||||
self.dimension = desc.dimension
|
||||
|
||||
@property
|
||||
def dict(self):
|
||||
return Struct(
|
||||
type=self.type,
|
||||
name=self.name,
|
||||
partitions=self.partitions,
|
||||
min=self.min,
|
||||
max=self.max,
|
||||
interval=self.interval,
|
||||
value=self.value,
|
||||
key=self.key,
|
||||
label=self.label,
|
||||
end=self.end,
|
||||
isFacet=self.isFacet
|
||||
)
|
||||
|
||||
def __json__(self):
|
||||
return CNV.object2JSON(self.dict)
|
||||
|
||||
|
||||
class ValueDomain(Domain):
|
||||
def __new__(cls, **desc):
|
||||
return object.__new__(ValueDomain)
|
||||
|
||||
def __init__(self, **desc):
|
||||
Domain.__init__(self, **desc)
|
||||
self.NULL = None
|
||||
|
||||
def compare(self, a, b):
|
||||
return value_compare(a, b)
|
||||
|
||||
def getCanonicalPart(self, part):
|
||||
return part
|
||||
|
||||
def getPartByKey(self, key):
|
||||
return key
|
||||
|
||||
def getKey(self, part):
|
||||
return part
|
||||
|
||||
def getEnd(self, value):
|
||||
return value
|
||||
|
||||
|
||||
class DefaultDomain(Domain):
|
||||
"""
|
||||
DOMAIN IS A LIST OF OBJECTS, EACH WITH A value PROPERTY
|
||||
"""
|
||||
|
||||
def __new__(cls, **desc):
|
||||
return object.__new__(DefaultDomain)
|
||||
|
||||
def __init__(self, **desc):
|
||||
Domain.__init__(self, **desc)
|
||||
|
||||
self.NULL = Struct(value=None)
|
||||
self.partitions = StructList()
|
||||
self.map = dict()
|
||||
self.map[None] = self.NULL
|
||||
|
||||
def compare(self, a, b):
|
||||
return value_compare(a.value, b.value)
|
||||
|
||||
def getCanonicalPart(self, part):
|
||||
return self.getPartByKey(part.value)
|
||||
|
||||
def getPartByKey(self, key):
|
||||
canonical = self.map.get(key, None)
|
||||
if canonical:
|
||||
return canonical
|
||||
|
||||
canonical = Struct(name=key, value=key)
|
||||
|
||||
self.partitions.append(canonical)
|
||||
self.map[key] = canonical
|
||||
return canonical
|
||||
|
||||
def getKey(self, part):
|
||||
return part.value
|
||||
|
||||
def getEnd(self, part):
|
||||
return part.value
|
||||
|
||||
def getLabel(self, part):
|
||||
return part.value
|
||||
|
||||
|
||||
class SetDomain(Domain):
|
||||
"""
|
||||
DOMAIN IS A LIST OF OBJECTS, EACH WITH A value PROPERTY
|
||||
"""
|
||||
|
||||
def __new__(cls, **desc):
|
||||
return object.__new__(SetDomain)
|
||||
|
||||
def __init__(self, **desc):
|
||||
Domain.__init__(self, **desc)
|
||||
desc = wrap(desc)
|
||||
|
||||
self.NULL = Struct(value=None)
|
||||
self.partitions = StructList()
|
||||
if desc.partitions and desc.dimension.fields and len(desc.dimension.fields)>1:
|
||||
self.map = UniqueIndex(keys=desc.dimension.fields)
|
||||
elif desc.partitions and isinstance(desc.partitions[0][desc.key], dict):
|
||||
keys = UNION(set(d[desc.key].keys()) for d in desc.partitions)
|
||||
self.map = UniqueIndex(keys=keys)
|
||||
else:
|
||||
self.map = dict()
|
||||
self.map[None] = self.NULL
|
||||
|
||||
self.label = nvl(self.label, "name")
|
||||
|
||||
if not isinstance(desc.partitions, list):
|
||||
Log.error("expecting a list of partitions")
|
||||
|
||||
if isinstance(desc.partitions[0], basestring):
|
||||
# ASSMUE PARTS ARE STRINGS, CONVERT TO REAL PART OBJECTS
|
||||
for p in desc.partitions:
|
||||
part = {"name": p, "value": p}
|
||||
self.partitions.append(part)
|
||||
self.map[p] = part
|
||||
self.key = ("value", )
|
||||
else:
|
||||
if desc.key == None:
|
||||
Log.error("Domains must have keys")
|
||||
if not is_keyword(desc.key):
|
||||
Log.error("scripts not supported yet")
|
||||
self.key = desc.key
|
||||
|
||||
self.partitions = desc.partitions.copy()
|
||||
for p in desc.partitions:
|
||||
self.map[p[self.key]] = p
|
||||
|
||||
def compare(self, a, b):
|
||||
return value_compare(self.getKey(a), self.getKey(b))
|
||||
|
||||
def getCanonicalPart(self, part):
|
||||
return self.getPartByKey(part.value)
|
||||
|
||||
def getPartByKey(self, key):
|
||||
try:
|
||||
canonical = self.map.get(key, None)
|
||||
if not canonical:
|
||||
return self.NULL
|
||||
return canonical
|
||||
except Exception, e:
|
||||
Log.error("problem", e)
|
||||
|
||||
def getKey(self, part):
|
||||
return part[self.key]
|
||||
|
||||
def getEnd(self, part):
|
||||
if self.value:
|
||||
return part[self.value]
|
||||
else:
|
||||
return part
|
||||
|
||||
def getLabel(self, part):
|
||||
return part[self.label]
|
||||
|
||||
|
||||
|
||||
def value_compare(a, b):
|
||||
if a == None:
|
||||
if b == None:
|
||||
return 0
|
||||
return -1
|
||||
elif b == None:
|
||||
return 1
|
||||
|
||||
if a > b:
|
||||
return 1
|
||||
elif a < b:
|
||||
return -1
|
||||
else:
|
||||
return 0
|
||||
|
||||
|
||||
|
||||
keyword_pattern = re.compile(r"\w+(?:\.\w+)*")
|
||||
def is_keyword(value):
|
||||
if value == None:
|
||||
return False
|
||||
return True if keyword_pattern.match(value) else False
|
|
@ -0,0 +1,188 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http:# mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from ..cnv import CNV
|
||||
from ..queries import MVEL
|
||||
from ..queries.es_query_aggop import is_aggop, es_aggop
|
||||
from ..queries.es_query_setop import is_fieldop, is_setop, is_deep, es_setop, es_deepop, es_fieldop
|
||||
from ..queries.es_query_terms import es_terms, is_terms
|
||||
from ..queries.es_query_terms_stats import es_terms_stats, is_terms_stats
|
||||
from ..queries.es_query_util import aggregates, loadColumns
|
||||
from . import Q
|
||||
from ..queries.dimensions import Dimension
|
||||
from ..queries.query import Query, _normalize_where
|
||||
from ..env.logs import Log
|
||||
from ..queries.MVEL import _MVEL
|
||||
from ..struct import Struct, split_field, StructList, nvl
|
||||
from ..structs.wraps import wrap, unwrap, listwrap
|
||||
|
||||
class ESQuery(object):
|
||||
"""
|
||||
SEND GENERAL Qb QUERIES TO ElasticSearch
|
||||
"""
|
||||
def __init__(self, es):
|
||||
self.es = es
|
||||
self.edges = Struct()
|
||||
self.worker = None
|
||||
self.ready=False
|
||||
|
||||
def __enter__(self):
|
||||
self.ready = True
|
||||
return self
|
||||
|
||||
def __exit__(self, type, value, traceback):
|
||||
self.ready = False
|
||||
if not self.worker:
|
||||
return
|
||||
|
||||
if isinstance(value, Exception):
|
||||
self.worker.stop()
|
||||
self.worker.join()
|
||||
else:
|
||||
self.worker.join()
|
||||
|
||||
def query(self, _query):
|
||||
if not self.ready:
|
||||
Log.error("Must use with clause for any instance of ESQuery")
|
||||
|
||||
query = Query(_query, schema=self)
|
||||
|
||||
for s in listwrap(query.select):
|
||||
if not aggregates[s.aggregate]:
|
||||
Log.error("ES can not aggregate " + self.select[0].name + " because '" + self.select[0].aggregate + "' is not a recognized aggregate")
|
||||
|
||||
frum = query["from"]
|
||||
if isinstance(frum, Query):
|
||||
result = self.query(frum)
|
||||
q2 = query.copy()
|
||||
q2.frum = result
|
||||
return Q.run(q2)
|
||||
|
||||
frum = loadColumns(self.es, query["from"])
|
||||
mvel = _MVEL(frum)
|
||||
|
||||
if is_fieldop(query):
|
||||
return es_fieldop(self.es, query)
|
||||
elif is_deep(query):
|
||||
return es_deepop(self.es, mvel, query)
|
||||
elif is_setop(query):
|
||||
return es_setop(self.es, mvel, query)
|
||||
elif is_aggop(query):
|
||||
return es_aggop(self.es, mvel, query)
|
||||
elif is_terms(query):
|
||||
return es_terms(self.es, mvel, query)
|
||||
elif is_terms_stats(query):
|
||||
return es_terms_stats(self, mvel, query)
|
||||
|
||||
Log.error("Can not handle")
|
||||
|
||||
|
||||
def addDimension(self, dim):
|
||||
if isinstance(dim, list):
|
||||
Log.error("Expecting dimension to be a object, not a list:\n{{dim}}", {"dim":dim})
|
||||
self._addDimension(dim, [])
|
||||
|
||||
def _addDimension(self, dim, path):
|
||||
dim.full_name = dim.name
|
||||
for e in dim.edges:
|
||||
d = Dimension(e, dim, self)
|
||||
self.edges[d.full_name] = d
|
||||
|
||||
def __getitem__(self, item):
|
||||
f = split_field(item)
|
||||
e = self.edges[f[0]]
|
||||
for i in f[1::]:
|
||||
e = e[i]
|
||||
return e
|
||||
|
||||
def __getattr__(self, item):
|
||||
return self.edges[item]
|
||||
|
||||
def normalize_edges(self, edges):
|
||||
output = StructList()
|
||||
for e in listwrap(edges):
|
||||
output.extend(self._normalize_edge(e))
|
||||
return output
|
||||
|
||||
def _normalize_edge(self, edge):
|
||||
"""
|
||||
RETURN A EDGE DEFINITION INTO A SIMPLE ARRAY OF PATH-LEAF
|
||||
DEFINITIONS [ {"name":<pathA>, "value":<pathB>}, ... ]
|
||||
|
||||
USEFUL FOR DECLARING HIGH-LEVEL DIMENSIONS, AND RELIEVING LOW LEVEL PATH PAIRS
|
||||
"""
|
||||
if isinstance(edge, basestring):
|
||||
e = self[edge]
|
||||
if e:
|
||||
domain = e.getDomain()
|
||||
fields = domain.dimension.fields
|
||||
|
||||
if isinstance(fields, list):
|
||||
if len(fields) == 1:
|
||||
return [{"value": fields[0]}]
|
||||
else:
|
||||
return [{"name": (edge + "["+str(i)+"]"), "value": v} for i, v in enumerate(fields)]
|
||||
elif isinstance(fields, dict):
|
||||
return [{"name": (edge + "." + k), "value": v} for k, v in fields.items()]
|
||||
else:
|
||||
Log.error("do not know how to handle")
|
||||
|
||||
return [{
|
||||
"name": edge,
|
||||
"value": edge
|
||||
}]
|
||||
else:
|
||||
return [{
|
||||
"name": nvl(edge.name, edge.value),
|
||||
"value": edge.value
|
||||
}]
|
||||
|
||||
|
||||
|
||||
|
||||
def update(self, command):
|
||||
"""
|
||||
EXPECTING command == {"set":term, "where":where}
|
||||
THE set CLAUSE IS A DICT MAPPING NAMES TO VALUES
|
||||
THE where CLAUSE IS AN ES FILTER
|
||||
"""
|
||||
command = wrap(command)
|
||||
|
||||
#GET IDS OF DOCUMENTS
|
||||
results = self.es.search({
|
||||
"fields": [],
|
||||
"query": {"filtered": {
|
||||
"query": {"match_all": {}},
|
||||
"filter": _normalize_where(command.where, self)
|
||||
}},
|
||||
"size": 200000
|
||||
})
|
||||
|
||||
# SCRIPT IS SAME FOR ALL (CAN ONLY HANDLE ASSIGNMENT TO CONSTANT)
|
||||
scripts = StructList()
|
||||
for k, v in command.set.items():
|
||||
if not MVEL.isKeyword(k):
|
||||
Log.error("Only support simple paths for now")
|
||||
|
||||
scripts.append("ctx._source."+k+" = "+MVEL.value2MVEL(v)+";")
|
||||
script = "".join(scripts)
|
||||
|
||||
if results.hits.hits:
|
||||
command = []
|
||||
for id in results.hits.hits._id:
|
||||
command.append({"update": {"_id": id}})
|
||||
command.append({"script": script})
|
||||
content = ("\n".join(CNV.object2JSON(c) for c in command)+"\n").encode('utf-8')
|
||||
self.es._post(
|
||||
self.es.path + "/_bulk",
|
||||
data=content,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
|
@ -0,0 +1,102 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http:# mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from ..collections.matrix import Matrix
|
||||
from ..collections import AND
|
||||
from ..structs.wraps import listwrap
|
||||
from ..struct import unwrap
|
||||
from ..queries import es_query_util
|
||||
from ..queries.es_query_util import aggregates, fix_es_stats, buildESQuery
|
||||
from ..queries.filters import simplify
|
||||
from ..queries import MVEL
|
||||
from ..queries.cube import Cube
|
||||
|
||||
|
||||
def is_aggop(query):
|
||||
if not query.edges:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def es_aggop(es, mvel, query):
|
||||
select = listwrap(query.select)
|
||||
esQuery = buildESQuery(query)
|
||||
|
||||
isSimple = AND(aggregates[s.aggregate] == "count" for s in select)
|
||||
if isSimple:
|
||||
return es_countop(es, query) # SIMPLE, USE TERMS FACET INSTEAD
|
||||
|
||||
|
||||
value2facet = dict() # ONLY ONE FACET NEEDED PER
|
||||
name2facet = dict() # MAP name TO FACET WITH STATS
|
||||
|
||||
for s in select:
|
||||
if s.value not in value2facet:
|
||||
if MVEL.isKeyword(s.value):
|
||||
unwrap(esQuery.facets)[s.name] = {
|
||||
"statistical": {
|
||||
"field": s.value
|
||||
},
|
||||
"facet_filter": simplify(query.where)
|
||||
}
|
||||
else:
|
||||
unwrap(esQuery.facets)[s.name] = {
|
||||
"statistical": {
|
||||
"script": mvel.compile_expression(s.value, query)
|
||||
},
|
||||
"facet_filter": simplify(query.where)
|
||||
}
|
||||
value2facet[s.value] = s.name
|
||||
name2facet[s.name] = value2facet[s.value]
|
||||
|
||||
data = es_query_util.post(es, esQuery, query.limit)
|
||||
|
||||
matricies = {s.name: Matrix(value=fix_es_stats(unwrap(data.facets)[s.name])[aggregates[s.aggregate]]) for s in select}
|
||||
cube = Cube(query.select, [], matricies)
|
||||
cube.frum = query
|
||||
return cube
|
||||
|
||||
|
||||
|
||||
def es_countop(es, mvel, query):
|
||||
"""
|
||||
RETURN SINGLE COUNT
|
||||
"""
|
||||
select = listwrap(query.select)
|
||||
esQuery = buildESQuery(query)
|
||||
for s in select:
|
||||
|
||||
if MVEL.isKeyword(s.value):
|
||||
esQuery.facets[s.name] = {
|
||||
"terms": {
|
||||
"field": s.value,
|
||||
"size": query.limit,
|
||||
},
|
||||
"facet_filter":{"exists":{"field":s.value}}
|
||||
}
|
||||
else:
|
||||
# COMPLICATED value IS PROBABLY A SCRIPT, USE IT
|
||||
esQuery.facets[s.name] = {
|
||||
"terms": {
|
||||
"script_field": mvel.compile_expression(s.value, query),
|
||||
"size": 200000
|
||||
}
|
||||
}
|
||||
|
||||
data = es_query_util.post(es, esQuery, query.limit)
|
||||
|
||||
matricies = {}
|
||||
for s in select:
|
||||
matricies[s.name] = Matrix(value=data.hits.facets[s.name].total)
|
||||
|
||||
cube = Cube(query.select, query.edges, matricies)
|
||||
cube.frum = query
|
||||
return cube
|
|
@ -0,0 +1,256 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http:# mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .. import struct
|
||||
from ..collections.matrix import Matrix
|
||||
from ..collections import AND, SUM, OR
|
||||
from ..structs.wraps import listwrap
|
||||
from ..queries.es_query_util import aggregates
|
||||
from ..queries import domains, es_query_util
|
||||
from ..queries.filters import simplify, TRUE_FILTER
|
||||
from ..env.logs import Log
|
||||
from ..queries import MVEL, filters
|
||||
from ..queries.cube import Cube
|
||||
from ..struct import split_field, unwrap, nvl, StructList
|
||||
|
||||
|
||||
def is_fieldop(query):
|
||||
# THESE SMOOTH EDGES REQUIRE ALL DATA (SETOP)
|
||||
|
||||
select = listwrap(query.select)
|
||||
if not query.edges:
|
||||
isDeep = len(split_field(query.frum.name)) > 1 # LOOKING INTO NESTED WILL REQUIRE A SCRIPT
|
||||
isSimple = AND(s.value != None and (s.value == "*" or isKeyword(s.value)) for s in select)
|
||||
noAgg = AND(s.aggregate == "none" for s in select)
|
||||
|
||||
if not isDeep and isSimple and noAgg:
|
||||
return True
|
||||
else:
|
||||
isSmooth = AND((e.domain.type in domains.ALGEBRAIC and e.domain.interval == "none") for e in query.edges)
|
||||
if isSmooth:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def isKeyword(value):
|
||||
if isinstance(value, dict):
|
||||
return AND(isKeyword(v) for k, v in value.items())
|
||||
if isinstance(value, list):
|
||||
return AND(isKeyword(v) for v in value)
|
||||
return MVEL.isKeyword(value)
|
||||
|
||||
|
||||
def es_fieldop(es, query):
|
||||
esQuery = es_query_util.buildESQuery(query)
|
||||
select = listwrap(query.select)
|
||||
esQuery.query = {
|
||||
"filtered": {
|
||||
"query": {
|
||||
"match_all": {}
|
||||
},
|
||||
"filter": filters.simplify(query.where)
|
||||
}
|
||||
}
|
||||
esQuery.size = nvl(query.limit, 200000)
|
||||
esQuery.fields = StructList()
|
||||
for s in select.value:
|
||||
if s == "*":
|
||||
esQuery.fields = None
|
||||
elif isinstance(s, list):
|
||||
esQuery.fields.extend(s)
|
||||
elif isinstance(s, dict):
|
||||
esQuery.fields.extend(s.values())
|
||||
else:
|
||||
esQuery.fields.append(s)
|
||||
esQuery.sort = [{s.field: "asc" if s.sort >= 0 else "desc"} for s in query.sort]
|
||||
|
||||
data = es_query_util.post(es, esQuery, query.limit)
|
||||
|
||||
T = data.hits.hits
|
||||
matricies = {}
|
||||
for s in select:
|
||||
if s.value == "*":
|
||||
matricies[s.name] = Matrix.wrap([t._source for t in T])
|
||||
elif isinstance(s.value, dict):
|
||||
# for k, v in s.value.items():
|
||||
# matricies[join_field(split_field(s.name)+[k])] = Matrix.wrap([unwrap(t.fields)[v] for t in T])
|
||||
matricies[s.name] = Matrix.wrap([{k: unwrap(t.fields).get(v, None) for k, v in s.value.items()}for t in T])
|
||||
elif isinstance(s.value, list):
|
||||
matricies[s.name] = Matrix.wrap([tuple(unwrap(t.fields).get(ss, None) for ss in s.value) for t in T])
|
||||
elif not s.value:
|
||||
matricies[s.name] = Matrix.wrap([unwrap(t.fields).get(s.value, None) for t in T])
|
||||
else:
|
||||
try:
|
||||
matricies[s.name] = Matrix.wrap([unwrap(t.fields).get(s.value, None) for t in T])
|
||||
except Exception, e:
|
||||
Log.error("", e)
|
||||
|
||||
cube = Cube(query.select, query.edges, matricies, frum=query)
|
||||
cube.frum = query
|
||||
return cube
|
||||
|
||||
|
||||
def is_setop(query):
|
||||
select = listwrap(query.select)
|
||||
|
||||
if not query.edges:
|
||||
isDeep = len(split_field(query.frum.name)) > 1 # LOOKING INTO NESTED WILL REQUIRE A SCRIPT
|
||||
simpleAgg = AND([s.aggregate in ("count", "none") for s in select]) # CONVERTING esfilter DEFINED PARTS WILL REQUIRE SCRIPT
|
||||
|
||||
# NO EDGES IMPLIES SIMPLER QUERIES: EITHER A SET OPERATION, OR RETURN SINGLE AGGREGATE
|
||||
if simpleAgg or isDeep:
|
||||
return True
|
||||
else:
|
||||
isSmooth = AND((e.domain.type in domains.ALGEBRAIC and e.domain.interval == "none") for e in query.edges)
|
||||
if isSmooth:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def es_setop(es, mvel, query):
|
||||
esQuery = es_query_util.buildESQuery(query)
|
||||
select = listwrap(query.select)
|
||||
|
||||
isDeep = len(split_field(query.frum.name)) > 1 # LOOKING INTO NESTED WILL REQUIRE A SCRIPT
|
||||
isComplex = OR([s.value == None and s.aggregate not in ("count", "none") for s in select]) # CONVERTING esfilter DEFINED PARTS WILL REQUIRE SCRIPT
|
||||
|
||||
if not isDeep and not isComplex and len(select) == 1:
|
||||
if not select[0].value:
|
||||
esQuery.query = {"filtered": {
|
||||
"query": {"match_all": {}},
|
||||
"filter": simplify(query.where)
|
||||
}}
|
||||
esQuery.size = 1 # PREVENT QUERY CHECKER FROM THROWING ERROR
|
||||
elif MVEL.isKeyword(select[0].value):
|
||||
esQuery.facets.mvel = {
|
||||
"terms": {
|
||||
"field": select[0].value,
|
||||
"size": nvl(query.limit, 200000)
|
||||
},
|
||||
"facet_filter": simplify(query.where)
|
||||
}
|
||||
if query.sort:
|
||||
s = query.sort
|
||||
if len(s) > 1:
|
||||
Log.error("can not sort by more than one field")
|
||||
|
||||
s0 = s[0]
|
||||
if s0.field != select[0].value:
|
||||
Log.error("can not sort by anything other than count, or term")
|
||||
|
||||
esQuery.facets.mvel.terms.order = "term" if s0.sort >= 0 else "reverse_term"
|
||||
elif not isDeep:
|
||||
simple_query = query.copy()
|
||||
simple_query.where = TRUE_FILTER #THE FACET FILTER IS FASTER
|
||||
esQuery.facets.mvel = {
|
||||
"terms": {
|
||||
"script_field": mvel.code(simple_query),
|
||||
"size": nvl(simple_query.limit, 200000)
|
||||
},
|
||||
"facet_filter": simplify(query.where)
|
||||
}
|
||||
else:
|
||||
esQuery.facets.mvel = {
|
||||
"terms": {
|
||||
"script_field": mvel.code(query),
|
||||
"size": nvl(query.limit, 200000)
|
||||
},
|
||||
"facet_filter": simplify(query.where)
|
||||
}
|
||||
|
||||
data = es_query_util.post(es, esQuery, query.limit)
|
||||
|
||||
if len(select) == 1:
|
||||
if not select[0].value:
|
||||
# SPECIAL CASE FOR SINGLE COUNT
|
||||
output = Matrix(value=data.hits.total)
|
||||
cube = Cube(query.select, [], {select[0].name: output})
|
||||
elif MVEL.isKeyword(select[0].value):
|
||||
# SPECIAL CASE FOR SINGLE TERM
|
||||
T = data.facets.mvel.terms
|
||||
output = Matrix.wrap([t.term for t in T])
|
||||
cube = Cube(query.select, [], {select[0].name: output})
|
||||
else:
|
||||
data_list = MVEL.unpack_terms(data.facets.mvel, select)
|
||||
if not data_list:
|
||||
cube = Cube(select, [], {s.name: Matrix.wrap([]) for s in select})
|
||||
else:
|
||||
output = zip(*data_list)
|
||||
cube = Cube(select, [], {s.name: Matrix(list=output[i]) for i, s in enumerate(select)})
|
||||
|
||||
cube.frum = query
|
||||
return cube
|
||||
|
||||
|
||||
|
||||
def is_deep(query):
|
||||
select = listwrap(query.select)
|
||||
if len(select) > 1:
|
||||
return False
|
||||
|
||||
if aggregates[select[0].aggregate] not in ("none", "count"):
|
||||
return False
|
||||
|
||||
if len(query.edges)<=1:
|
||||
return False
|
||||
|
||||
isDeep = len(split_field(query["from"].name)) > 1 # LOOKING INTO NESTED WILL REQUIRE A SCRIPT
|
||||
if not isDeep:
|
||||
return False # BETTER TO USE TERM QUERY
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def es_deepop(es, mvel, query):
|
||||
esQuery = es_query_util.buildESQuery(query)
|
||||
|
||||
select = query.edges
|
||||
|
||||
temp_query = query.copy()
|
||||
temp_query.select = select
|
||||
temp_query.edges = StructList()
|
||||
esQuery.facets.mvel = {
|
||||
"terms": {
|
||||
"script_field": mvel.code(temp_query),
|
||||
"size": query.limit
|
||||
},
|
||||
"facet_filter": simplify(query.where)
|
||||
}
|
||||
|
||||
data = es_query_util.post(es, esQuery, query.limit)
|
||||
|
||||
rows = MVEL.unpack_terms(data.facets.mvel, query.edges)
|
||||
terms = zip(*rows)
|
||||
|
||||
# NUMBER ALL EDGES FOR Qb INDEXING
|
||||
edges = query.edges
|
||||
for f, e in enumerate(edges):
|
||||
for r in terms[f]:
|
||||
e.domain.getPartByKey(r)
|
||||
|
||||
e.index = f
|
||||
for p, part in enumerate(e.domain.partitions):
|
||||
part.dataIndex = p
|
||||
e.domain.NULL.dataIndex = len(e.domain.partitions)
|
||||
|
||||
# MAKE CUBE
|
||||
dims = [len(e.domain.partitions) for e in query.edges]
|
||||
output = Matrix(*dims)
|
||||
|
||||
# FILL CUBE
|
||||
for r in rows:
|
||||
term_coord = [e.domain.getPartByKey(r[i]).dataIndex for i, e in enumerate(edges)]
|
||||
output[term_coord] = SUM(output[term_coord], r[-1])
|
||||
|
||||
cube = Cube(query.select, query.edges, {query.select.name: output})
|
||||
cube.frum = query
|
||||
return cube
|
|
@ -0,0 +1,151 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http:# mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .. import struct
|
||||
from ..collections.matrix import Matrix
|
||||
from ..collections import AND
|
||||
from ..queries import Q
|
||||
from ..queries import es_query_util
|
||||
from ..queries.es_query_util import aggregates, buildESQuery, compileEdges2Term
|
||||
from ..queries.filters import simplify
|
||||
from ..queries.cube import Cube
|
||||
from ..struct import nvl, StructList
|
||||
from ..structs.wraps import wrap, listwrap
|
||||
|
||||
|
||||
def is_terms(query):
|
||||
select = listwrap(query.select)
|
||||
|
||||
isSimple = not query.select or AND(aggregates[s.aggregate] in ("none", "count") for s in select)
|
||||
if isSimple:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def es_terms(es, mvel, query):
|
||||
"""
|
||||
RETURN LIST OF ALL EDGE QUERIES
|
||||
|
||||
EVERY FACET IS NAMED <select.name>, <c1>, ... <cN> WHERE <ci> ARE THE ELEMENT COORDINATES
|
||||
WE TRY TO PACK DIMENSIONS INTO THE TERMS TO MINIMIZE THE CROSS-PRODUCT EXPLOSION
|
||||
"""
|
||||
if len(query.edges) == 2:
|
||||
return _es_terms2(es, mvel, query)
|
||||
|
||||
select = listwrap(query.select)
|
||||
esQuery = buildESQuery(query)
|
||||
packed_term = compileEdges2Term(mvel, query.edges, wrap([]))
|
||||
for s in select:
|
||||
esQuery.facets[s.name] = {
|
||||
"terms": {
|
||||
"field": packed_term.field,
|
||||
"script_field": packed_term.expression,
|
||||
"size": nvl(query.limit, 200000)
|
||||
},
|
||||
"facet_filter": simplify(query.where)
|
||||
}
|
||||
|
||||
term2Parts = packed_term.term2parts
|
||||
|
||||
data = es_query_util.post(es, esQuery, query.limit)
|
||||
|
||||
# GETTING ALL PARTS WILL EXPAND THE EDGES' DOMAINS
|
||||
# BUT HOW TO UNPACK IT FROM THE term FASTER IS UNKNOWN
|
||||
for k, f in data.facets.items():
|
||||
for t in f.terms:
|
||||
term2Parts(t.term)
|
||||
|
||||
# NUMBER ALL EDGES FOR Qb INDEXING
|
||||
for f, e in enumerate(query.edges):
|
||||
e.index = f
|
||||
if e.domain.type in ["uid", "default"]:
|
||||
# e.domain.partitions = Q.sort(e.domain.partitions, "value")
|
||||
for p, part in enumerate(e.domain.partitions):
|
||||
part.dataIndex = p
|
||||
e.domain.NULL.dataIndex = len(e.domain.partitions)
|
||||
|
||||
# MAKE CUBE
|
||||
output = {}
|
||||
dims = [len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges]
|
||||
for s in select:
|
||||
output[s.name] = Matrix(*dims)
|
||||
|
||||
# FILL CUBE
|
||||
# EXPECTING ONLY SELECT CLAUSE FACETS
|
||||
for facetName, facet in data.facets.items():
|
||||
for term in facet.terms:
|
||||
term_coord = term2Parts(term.term).dataIndex
|
||||
for s in select:
|
||||
try:
|
||||
output[s.name][term_coord] = term[aggregates[s.aggregate]]
|
||||
except Exception, e:
|
||||
#USUALLY CAUSED BY output[s.name] NOT BEING BIG ENOUGH TO HANDLE NULL COUNTS
|
||||
pass
|
||||
cube = Cube(query.select, query.edges, output)
|
||||
cube.query = query
|
||||
return cube
|
||||
|
||||
|
||||
def _es_terms2(es, mvel, query):
|
||||
"""
|
||||
WE ASSUME THERE ARE JUST TWO EDGES, AND EACH HAS A SIMPLE value
|
||||
"""
|
||||
|
||||
# REQUEST VALUES IN FIRST DIMENSION
|
||||
q1 = query.copy()
|
||||
q1.edges = query.edges[0:1:]
|
||||
values1 = es_terms(es, mvel, q1).edges[0].domain.partitions.value
|
||||
|
||||
select = listwrap(query.select)
|
||||
esQuery = buildESQuery(query)
|
||||
for s in select:
|
||||
for i, v in enumerate(values1):
|
||||
esQuery.facets[s.name + "," + str(i)] = {
|
||||
"terms": {
|
||||
"field": query.edges[1].value,
|
||||
"size": nvl(query.limit, 200000)
|
||||
},
|
||||
"facet_filter": simplify({"and": [
|
||||
query.where,
|
||||
{"term": {query.edges[0].value: v}}
|
||||
]})
|
||||
}
|
||||
|
||||
data = es_query_util.post(es, esQuery, query.limit)
|
||||
|
||||
# UNION ALL TERMS FROM SECOND DIMENSION
|
||||
values2 = set()
|
||||
for k, f in data.facets.items():
|
||||
values2.update(f.terms.term)
|
||||
values2 = Q.sort(values2)
|
||||
term2index = {v: i for i, v in enumerate(values2)}
|
||||
query.edges[1].domain.partitions = StructList([{"name": v, "value": v} for v in values2])
|
||||
|
||||
# MAKE CUBE
|
||||
output = {}
|
||||
dims = [len(values1), len(values2)]
|
||||
for s in select:
|
||||
output[s.name] = Matrix(*dims)
|
||||
|
||||
# FILL CUBE
|
||||
# EXPECTING ONLY SELECT CLAUSE FACETS
|
||||
for facetName, facet in data.facets.items():
|
||||
coord = facetName.split(",")
|
||||
s = [s for s in select if s.name == coord[0]][0]
|
||||
i1 = int(coord[1])
|
||||
for term in facet.terms:
|
||||
i2 = term2index[term.term]
|
||||
output[s.name][(i1, i2)] = term[aggregates[s.aggregate]]
|
||||
|
||||
cube = Cube(query.select, query.edges, output)
|
||||
cube.query = query
|
||||
return cube
|
||||
|
|
@ -0,0 +1,333 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http:# mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from ..collections.matrix import Matrix
|
||||
from ..collections import COUNT, PRODUCT
|
||||
from ..queries import es_query_util
|
||||
from ..queries.cube import Cube
|
||||
from ..queries.es_query_util import aggregates, buildESQuery, compileEdges2Term
|
||||
from ..queries.filters import simplify
|
||||
from ..env.logs import Log
|
||||
from ..queries import domains, MVEL, filters
|
||||
from ..queries.MVEL import UID
|
||||
from ..struct import nvl, StructList
|
||||
from ..structs.wraps import wrap, listwrap
|
||||
|
||||
|
||||
def is_terms_stats(query):
|
||||
#ONLY ALLOWED ONE UNKNOWN DOMAIN
|
||||
num_unknown = COUNT(1 for e in query.edges if e.domain.type not in domains.KNOWN)
|
||||
|
||||
if num_unknown <= 1:
|
||||
if query.sort:
|
||||
Log.error("terms_stats can not be sorted")
|
||||
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def es_terms_stats(esq, mvel, query):
|
||||
select = listwrap(query.select)
|
||||
facetEdges = [] # EDGES THAT WILL REQUIRE A FACET FOR EACH PART
|
||||
termsEdges = StructList()
|
||||
specialEdge = None
|
||||
special_index = -1
|
||||
|
||||
# A SPECIAL EDGE IS ONE THAT HAS AN UNDEFINED NUMBER OF PARTITIONS AT QUERY TIME
|
||||
# FIND THE specialEdge, IF ONE
|
||||
for f, tedge in enumerate(query.edges):
|
||||
if tedge.domain.type in domains.KNOWN:
|
||||
for p, part in enumerate(tedge.domain.partitions):
|
||||
part.dataIndex = p
|
||||
|
||||
# FACETS ARE ONLY REQUIRED IF SQL JOIN ON DOMAIN IS REQUIRED (RANGE QUERY)
|
||||
# OR IF WE ARE NOT SIMPLY COUNTING
|
||||
# OR IF NO SCRIPTING IS ALLOWED (SOME OTHER CODE IS RESPONSIBLE FOR SETTING isFacet)
|
||||
# OR IF WE JUST WANT TO FORCE IT :)
|
||||
# OF COURSE THE default EDGE IS NOT EXPLICIT, SO MUST BE A TERM
|
||||
|
||||
facetEdges.append(tedge)
|
||||
else:
|
||||
if specialEdge:
|
||||
Log.error("There is more than one open-ended edge: self can not be handled")
|
||||
specialEdge = tedge
|
||||
special_index = f
|
||||
termsEdges.append(tedge)
|
||||
|
||||
if not specialEdge:
|
||||
# WE SERIOUSLY WANT A SPECIAL EDGE, OTHERWISE WE WILL HAVE TOO MANY FACETS
|
||||
#THE BIGGEST EDGE MAY BE COLLAPSED TO A TERM, MAYBE?
|
||||
num_parts = 0
|
||||
special_index = -1
|
||||
for i, e in enumerate(facetEdges):
|
||||
l = len(e.domain.partitions)
|
||||
if ((e.value and MVEL.isKeyword(e.value)) or len(e.domain.dimension.fields) == 1) and l > num_parts:
|
||||
num_parts = l
|
||||
specialEdge = e
|
||||
special_index = i
|
||||
|
||||
facetEdges.pop(special_index)
|
||||
termsEdges.append(specialEdge)
|
||||
|
||||
total_facets = PRODUCT(len(f.domain.partitions) for f in facetEdges)*len(select)
|
||||
if total_facets > 100:
|
||||
# WE GOT A PROBLEM, LETS COUNT THE SIZE OF REALITY:
|
||||
counts = esq.query({
|
||||
"from": query.frum,
|
||||
"select": {"aggregate": "count"},
|
||||
"edges": facetEdges,
|
||||
"where": query.where,
|
||||
"limit": query.limit
|
||||
})
|
||||
|
||||
esFacets = []
|
||||
|
||||
def add_facet(value, coord, cube):
|
||||
if value:
|
||||
esFacets.append([e.domain.partitions[coord[i]] for i, e in enumerate(facetEdges)])
|
||||
|
||||
counts["count"].forall(add_facet)
|
||||
|
||||
Log.note("{{theory_count}} theoretical combinations, {{real_count}} actual combos found", {"real_count": len(esFacets), "theory_count":total_facets})
|
||||
|
||||
if not esFacets:
|
||||
# MAKE EMPTY CUBE
|
||||
matricies = {}
|
||||
dims = [len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges]
|
||||
for s in select:
|
||||
matricies[s.name] = Matrix(*dims)
|
||||
cube = Cube(query.select, query.edges, matricies)
|
||||
cube.frum = query
|
||||
return cube
|
||||
|
||||
else:
|
||||
# GENERATE ALL COMBOS
|
||||
esFacets = getAllEdges(facetEdges)
|
||||
|
||||
calcTerm = compileEdges2Term(mvel, termsEdges, StructList())
|
||||
term2parts = calcTerm.term2parts
|
||||
|
||||
if len(esFacets) * len(select) > 1000:
|
||||
# WE HAVE SOME SERIOUS PERMUTATIONS, WE MUST ISSUE MULTIPLE QUERIES
|
||||
pass
|
||||
|
||||
esQuery = buildESQuery(query)
|
||||
|
||||
for s in select:
|
||||
for parts in esFacets:
|
||||
condition = StructList()
|
||||
constants = StructList()
|
||||
name = [s.name]
|
||||
for f, fedge in enumerate(facetEdges):
|
||||
name.append(str(parts[f].dataIndex))
|
||||
condition.append(buildCondition(mvel, fedge, parts[f]))
|
||||
constants.append({"name": fedge.domain.name, "value": parts[f]})
|
||||
condition.append(query.where)
|
||||
name = ",".join(name)
|
||||
|
||||
esQuery.facets[name] = {
|
||||
"terms_stats": {
|
||||
"key_field": calcTerm.field,
|
||||
"value_field": s.value if MVEL.isKeyword(s.value) else None,
|
||||
"value_script": mvel.compile_expression(s.value) if not MVEL.isKeyword(s.value) else None,
|
||||
"size": nvl(query.limit, 200000)
|
||||
}
|
||||
}
|
||||
if condition:
|
||||
esQuery.facets[name].facet_filter = simplify({"and": condition})
|
||||
|
||||
data = es_query_util.post(esq.es, esQuery, query.limit)
|
||||
|
||||
if specialEdge.domain.type not in domains.KNOWN:
|
||||
#WE BUILD THE PARTS BASED ON THE RESULTS WE RECEIVED
|
||||
partitions = StructList()
|
||||
map = {}
|
||||
for facetName, parts in data.facets.items():
|
||||
for stats in parts.terms:
|
||||
if not map[stats]:
|
||||
part = {"value": stats, "name": stats}
|
||||
partitions.append(part)
|
||||
map[stats] = part
|
||||
|
||||
partitions.sort(specialEdge.domain.compare)
|
||||
for p, part in enumerate(partitions):
|
||||
part.dataIndex = p
|
||||
|
||||
specialEdge.domain.map = map
|
||||
specialEdge.domain.partitions = partitions
|
||||
|
||||
# MAKE CUBE
|
||||
matricies = {}
|
||||
dims = [len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges]
|
||||
for s in select:
|
||||
matricies[s.name] = Matrix(*dims)
|
||||
|
||||
name2agg = {s.name: aggregates[s.aggregate] for s in select}
|
||||
|
||||
# FILL CUBE
|
||||
for edgeName, parts in data.facets.items():
|
||||
temp = edgeName.split(",")
|
||||
pre_coord = tuple(int(c) for c in temp[1:])
|
||||
sname = temp[0]
|
||||
|
||||
for stats in parts.terms:
|
||||
if specialEdge:
|
||||
special = term2parts(stats.term)[0]
|
||||
coord = pre_coord[:special_index]+(special.dataIndex, )+pre_coord[special_index:]
|
||||
else:
|
||||
coord = pre_coord
|
||||
matricies[sname][coord] = stats[name2agg[sname]]
|
||||
|
||||
cube = Cube(query.select, query.edges, matricies)
|
||||
cube.frum = query
|
||||
return cube
|
||||
|
||||
|
||||
def register_script_field(esQuery, code):
|
||||
if not esQuery.script_fields:
|
||||
esQuery.script_fields = {}
|
||||
|
||||
#IF CODE IS IDENTICAL, THEN USE THE EXISTING SCRIPT
|
||||
for n, c in esQuery.script_fields.items():
|
||||
if c.script == code:
|
||||
return n
|
||||
|
||||
name = "script" + UID()
|
||||
esQuery.script_fields[name].script = code
|
||||
return name
|
||||
|
||||
|
||||
def getAllEdges(facetEdges):
|
||||
if not facetEdges:
|
||||
return [()]
|
||||
return _getAllEdges(facetEdges, 0)
|
||||
|
||||
|
||||
def _getAllEdges(facetEdges, edgeDepth):
|
||||
"""
|
||||
RETURN ALL PARTITION COMBINATIONS: A LIST OF ORDERED TUPLES
|
||||
"""
|
||||
if edgeDepth == len(facetEdges):
|
||||
return [()]
|
||||
edge = facetEdges[edgeDepth]
|
||||
|
||||
deeper = _getAllEdges(facetEdges, edgeDepth + 1)
|
||||
|
||||
output = StructList()
|
||||
partitions = edge.domain.partitions
|
||||
for part in partitions:
|
||||
for deep in deeper:
|
||||
output.append((part,) + deep)
|
||||
return output
|
||||
|
||||
|
||||
def buildCondition(mvel, edge, partition):
|
||||
"""
|
||||
RETURN AN ES FILTER OBJECT
|
||||
"""
|
||||
output = {}
|
||||
|
||||
if edge.domain.isFacet:
|
||||
# MUST USE THIS' esFacet
|
||||
condition = wrap(nvl(partition.where, {"and": []}))
|
||||
|
||||
if partition.min and partition.max and MVEL.isKeyword(edge.value):
|
||||
condition["and"].append({
|
||||
"range": {edge.value: {"gte": partition.min, "lt": partition.max}}
|
||||
})
|
||||
|
||||
# ES WILL FREAK OUT IF WE SEND {"not":{"and":x}} (OR SOMETHING LIKE THAT)
|
||||
return filters.simplify(condition)
|
||||
elif edge.range:
|
||||
# THESE REALLY NEED FACETS TO PERFORM THE JOIN-TO-DOMAIN
|
||||
# USE MVEL CODE
|
||||
if edge.domain.type in domains.ALGEBRAIC:
|
||||
output = {"and": []}
|
||||
|
||||
if edge.range.mode and edge.range.mode == "inclusive":
|
||||
# IF THE range AND THE partition OVERLAP, THEN MATCH IS MADE
|
||||
if MVEL.isKeyword(edge.range.min):
|
||||
output["and"].append({"range": {edge.range.min: {"lt": MVEL.value2value(partition.max)}}})
|
||||
else:
|
||||
# WHOA!! SUPER SLOW!!
|
||||
output["and"].append({"script": {"script": mvel.compile_expression(
|
||||
edge.range.min + " < " + MVEL.value2MVEL(partition.max)
|
||||
)}})
|
||||
|
||||
if MVEL.isKeyword(edge.range.max):
|
||||
output["and"].append({"or": [
|
||||
{"missing": {"field": edge.range.max}},
|
||||
{"range": {edge.range.max, {"gt": MVEL.value2value(partition.min)}}}
|
||||
]})
|
||||
else:
|
||||
# WHOA!! SUPER SLOW!!
|
||||
output["and"].append({"script": {"script": mvel.compile_expression(
|
||||
edge.range.max + " > " + MVEL.value2MVEL(partition.min))}})
|
||||
|
||||
else:
|
||||
# SNAPSHOT - IF range INCLUDES partition.min, THEN MATCH IS MADE
|
||||
if MVEL.isKeyword(edge.range.min):
|
||||
output["and"].append({"range": {edge.range.min: {"lte": MVEL.value2value(partition.min)}}})
|
||||
else:
|
||||
# WHOA!! SUPER SLOW!!
|
||||
output["and"].append({"script": {"script": mvel.compile_expression(
|
||||
edge.range.min + "<=" + MVEL.value2MVEL(partition.min)
|
||||
)}})
|
||||
|
||||
if MVEL.isKeyword(edge.range.max):
|
||||
output["and"].append({"or": [
|
||||
{"missing": {"field": edge.range.max}},
|
||||
{"range": {edge.range.max, {"gte": MVEL.value2value(partition.min)}}}
|
||||
]})
|
||||
else:
|
||||
# WHOA!! SUPER SLOW!!
|
||||
output["and"].append({"script": {"script": mvel.compile_expression(
|
||||
MVEL.value2MVEL(partition.min) + " <= " + edge.range.max
|
||||
)}})
|
||||
return output
|
||||
else:
|
||||
Log.error("Do not know how to handle range query on non-continuous domain")
|
||||
|
||||
elif not edge.value:
|
||||
# MUST USE THIS' esFacet, AND NOT(ALL THOSE ABOVE)
|
||||
return partition.esfilter
|
||||
elif MVEL.isKeyword(edge.value):
|
||||
# USE FAST ES SYNTAX
|
||||
if edge.domain.type in domains.ALGEBRAIC:
|
||||
output.range = {}
|
||||
output.range[edge.value] = {"gte": MVEL.value2query(partition.min), "lt": MVEL.value2query(partition.max)}
|
||||
elif edge.domain.type == "set":
|
||||
if partition.value:
|
||||
if partition.value != edge.domain.getKey(partition):
|
||||
Log.error("please ensure the key attribute of the domain matches the value attribute of all partitions, if only because we are now using the former")
|
||||
# DEFAULT TO USING THE .value ATTRIBUTE, IF ONLY BECAUSE OF LEGACY REASONS
|
||||
output.term = {edge.value: partition.value}
|
||||
else:
|
||||
output.term = {edge.value: edge.domain.getKey(partition)}
|
||||
|
||||
elif edge.domain.type == "default":
|
||||
output.term = dict()
|
||||
output.term[edge.value] = partition.value
|
||||
else:
|
||||
Log.error("Edge \"" + edge.name + "\" is not supported")
|
||||
|
||||
return output
|
||||
else:
|
||||
# USE MVEL CODE
|
||||
if edge.domain.type in domains.ALGEBRAIC:
|
||||
output.script = {"script": edge.value + ">=" + MVEL.value2MVEL(partition.min) + " and " + edge.value + "<" + MVEL.value2MVEL(partition.max)}
|
||||
else:
|
||||
output.script = {"script": "( " + edge.value + " ) ==" + MVEL.value2MVEL(partition.value)}
|
||||
|
||||
code = MVEL.addFunctions(output.script.script)
|
||||
output.script.script = code.head + code.body
|
||||
return output
|
||||
|
|
@ -0,0 +1,498 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http:# mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from datetime import datetime
|
||||
|
||||
from .. import struct
|
||||
from ..cnv import CNV
|
||||
from .. import strings
|
||||
from ..collections import COUNT
|
||||
from ..maths import stats
|
||||
from ..env.elasticsearch import ElasticSearch
|
||||
from ..env.logs import Log
|
||||
from ..maths import Math
|
||||
from ..queries import domains, MVEL, filters
|
||||
from ..struct import nvl, StructList, Struct, split_field, join_field
|
||||
from ..structs.wraps import wrap
|
||||
from ..times import durations
|
||||
|
||||
|
||||
TrueFilter = {"match_all": {}}
|
||||
DEBUG = False
|
||||
|
||||
INDEX_CACHE = {} # MATCH NAMES TO FULL CONNECTION INFO
|
||||
|
||||
|
||||
def loadColumns(es, frum):
|
||||
"""
|
||||
ENSURE COLUMNS FOR GIVEN INDEX/QUERY ARE LOADED, AND MVEL COMPILATION WORKS BETTER
|
||||
"""
|
||||
if isinstance(frum, basestring):
|
||||
if frum in INDEX_CACHE:
|
||||
return INDEX_CACHE[frum]
|
||||
frum = Struct(
|
||||
name=frum
|
||||
)
|
||||
else:
|
||||
if not frum.name:
|
||||
Log.error("Expecting from clause to have a name")
|
||||
|
||||
if frum.name in INDEX_CACHE:
|
||||
return INDEX_CACHE[frum.name]
|
||||
|
||||
# FILL frum WITH DEFAULTS FROM es.settings
|
||||
struct.set_default(frum, es.settings)
|
||||
|
||||
if not frum.host:
|
||||
Log.error("must have host defined")
|
||||
|
||||
#DETERMINE IF THE es IS FUNCTIONALLY DIFFERENT
|
||||
diff = False
|
||||
for k, v in es.settings.items():
|
||||
if k != "name" and v != frum[k]:
|
||||
diff = True
|
||||
if diff:
|
||||
es = ElasticSearch(frum)
|
||||
|
||||
output = wrap(frum).copy()
|
||||
schema = es.get_schema()
|
||||
properties = schema.properties
|
||||
output.es = es
|
||||
|
||||
root = split_field(frum.name)[0]
|
||||
if root != frum.name:
|
||||
INDEX_CACHE[frum.name] = output
|
||||
loadColumns(es, root)
|
||||
else:
|
||||
INDEX_CACHE[root] = output
|
||||
output.columns = parseColumns(frum.index, root, properties)
|
||||
|
||||
return output
|
||||
|
||||
|
||||
def post(es, esQuery, limit):
|
||||
if not esQuery.facets and esQuery.size == 0:
|
||||
Log.error("ESQuery is sending no facets")
|
||||
# DO NOT KNOW WHY THIS WAS HERE
|
||||
# if isinstance(query.select, list) or len(query.edges) and not esQuery.facets.keys and esQuery.size == 0:
|
||||
# Log.error("ESQuery is sending no facets")
|
||||
|
||||
postResult = None
|
||||
try:
|
||||
postResult = es.search(esQuery)
|
||||
|
||||
for facetName, f in postResult.facets:
|
||||
if f._type == "statistical":
|
||||
return None
|
||||
if not f.terms:
|
||||
return None
|
||||
|
||||
if not DEBUG and not limit and len(f.terms) == limit:
|
||||
Log.error("Not all data delivered (" + str(len(f.terms)) + "/" + str(f.total) + ") try smaller range")
|
||||
except Exception, e:
|
||||
Log.error("Error with ESQuery", e)
|
||||
|
||||
return postResult
|
||||
|
||||
|
||||
def buildESQuery(query):
|
||||
output = wrap({
|
||||
"query": {"match_all": {}},
|
||||
"from": 0,
|
||||
"size": 100 if DEBUG else 0,
|
||||
"sort": [],
|
||||
"facets": {
|
||||
}
|
||||
})
|
||||
|
||||
if DEBUG:
|
||||
# TO LIMIT RECORDS TO WHAT'S IN FACETS
|
||||
output.query = {
|
||||
"filtered": {
|
||||
"query": {
|
||||
"match_all": {}
|
||||
},
|
||||
"filter": filters.simplify(query.where)
|
||||
}
|
||||
}
|
||||
|
||||
return output
|
||||
|
||||
|
||||
def parseColumns(index_name, parent_path, esProperties):
|
||||
"""
|
||||
RETURN THE COLUMN DEFINITIONS IN THE GIVEN esProperties OBJECT
|
||||
"""
|
||||
columns = StructList()
|
||||
for name, property in esProperties.items():
|
||||
if parent_path:
|
||||
path = join_field(split_field(parent_path) + [name])
|
||||
else:
|
||||
path = name
|
||||
|
||||
childColumns = None
|
||||
|
||||
if property.type == "nested" and property.properties:
|
||||
# NESTED TYPE IS A NEW TYPE DEFINITION
|
||||
if path not in INDEX_CACHE:
|
||||
INDEX_CACHE[path] = INDEX_CACHE[parent_path].copy()
|
||||
INDEX_CACHE[path].name = path
|
||||
INDEX_CACHE[path].columns = childColumns
|
||||
|
||||
columns.append({
|
||||
"name": struct.join_field(split_field(path)[1::]),
|
||||
"type": property.type,
|
||||
"useSource": True
|
||||
})
|
||||
continue
|
||||
|
||||
if property.properties:
|
||||
childColumns = parseColumns(index_name, path, property.properties)
|
||||
columns.extend(childColumns)
|
||||
columns.append({
|
||||
"name": join_field(split_field(path)[1::]),
|
||||
"type": "object",
|
||||
"useSource": True
|
||||
})
|
||||
|
||||
if property.dynamic:
|
||||
continue
|
||||
if not property.type:
|
||||
continue
|
||||
if property.type == "multi_field":
|
||||
property.type = property.fields[name].type # PULL DEFAULT TYPE
|
||||
for i, n, p in enumerate(property.fields):
|
||||
if n == name:
|
||||
# DEFAULT
|
||||
columns.append({"name": struct.join_field(split_field(path)[1::]), "type": p.type, "useSource": p.index == "no"})
|
||||
else:
|
||||
columns.append({"name": struct.join_field(split_field(path)[1::]) + "\\." + n, "type": p.type, "useSource": p.index == "no"})
|
||||
continue
|
||||
|
||||
if property.type in ["string", "boolean", "integer", "date", "long", "double"]:
|
||||
columns.append({
|
||||
"name": struct.join_field(split_field(path)[1::]),
|
||||
"type": property.type,
|
||||
"useSource": property.index == "no"
|
||||
})
|
||||
if property.index_name and name != property.index_name:
|
||||
columns.append({
|
||||
"name": property.index_name,
|
||||
"type": property.type,
|
||||
"useSource": property.index == "no"
|
||||
})
|
||||
elif property.enabled == False:
|
||||
columns.append({
|
||||
"name": struct.join_field(split_field(path)[1::]),
|
||||
"type": property.type,
|
||||
"useSource": "yes"
|
||||
})
|
||||
else:
|
||||
Log.warning("unknown type {{type}} for property {{path}}", {"type": property.type, "path": path})
|
||||
|
||||
# SPECIAL CASE FOR PROPERTIES THAT WILL CAUSE OutOfMemory EXCEPTIONS
|
||||
for c in columns:
|
||||
if name == "bugs" and (c.name == "dependson" or c.name == "blocked"):
|
||||
c.useSource = True
|
||||
|
||||
return columns
|
||||
|
||||
|
||||
def compileTime2Term(edge):
|
||||
"""
|
||||
RETURN MVEL CODE THAT MAPS TIME AND DURATION DOMAINS DOWN TO AN INTEGER AND
|
||||
AND THE JAVASCRIPT THAT WILL TURN THAT INTEGER BACK INTO A PARTITION (INCLUDING NULLS)
|
||||
"""
|
||||
if edge.esscript:
|
||||
Log.error("edge script not supported yet")
|
||||
|
||||
# IS THERE A LIMIT ON THE DOMAIN?
|
||||
numPartitions = len(edge.domain.partitions)
|
||||
value = edge.value
|
||||
if MVEL.isKeyword(value):
|
||||
value = "doc[\"" + value + "\"].value"
|
||||
|
||||
nullTest = compileNullTest(edge)
|
||||
ref = nvl(edge.domain.min, edge.domain.max, datetime(2000, 1, 1))
|
||||
|
||||
if edge.domain.interval.month > 0:
|
||||
offset = ref.subtract(ref.floorMonth(), durations.DAY).milli
|
||||
if offset > durations.DAY.milli * 28:
|
||||
offset = ref.subtract(ref.ceilingMonth(), durations.DAY).milli
|
||||
partition2int = "milli2Month(" + value + ", " + MVEL.value2MVEL(offset) + ")"
|
||||
partition2int = "((" + nullTest + ") ? 0 : " + partition2int + ")"
|
||||
|
||||
def int2Partition(value):
|
||||
if Math.round(value) == 0:
|
||||
return edge.domain.NULL
|
||||
|
||||
d = datetime(str(value)[:4:], str(value).right(2), 1)
|
||||
d = d.addMilli(offset)
|
||||
return edge.domain.getPartByKey(d)
|
||||
else:
|
||||
partition2int = "Math.floor((" + value + "-" + MVEL.value2MVEL(ref) + ")/" + edge.domain.interval.milli + ")"
|
||||
partition2int = "((" + nullTest + ") ? " + numPartitions + " : " + partition2int + ")"
|
||||
|
||||
def int2Partition(value):
|
||||
if Math.round(value) == numPartitions:
|
||||
return edge.domain.NULL
|
||||
return edge.domain.getPartByKey(ref.add(edge.domain.interval.multiply(value)))
|
||||
|
||||
return Struct(toTerm={"head": "", "body": partition2int}, fromTerm=int2Partition)
|
||||
|
||||
|
||||
# RETURN MVEL CODE THAT MAPS DURATION DOMAINS DOWN TO AN INTEGER AND
|
||||
# AND THE JAVASCRIPT THAT WILL TURN THAT INTEGER BACK INTO A PARTITION (INCLUDING NULLS)
|
||||
def compileDuration2Term(edge):
|
||||
if edge.esscript:
|
||||
Log.error("edge script not supported yet")
|
||||
|
||||
# IS THERE A LIMIT ON THE DOMAIN?
|
||||
numPartitions = len(edge.domain.partitions)
|
||||
value = edge.value
|
||||
if MVEL.isKeyword(value):
|
||||
value = "doc[\"" + value + "\"].value"
|
||||
|
||||
ref = nvl(edge.domain.min, edge.domain.max, durations.ZERO)
|
||||
nullTest = compileNullTest(edge)
|
||||
|
||||
ms = edge.domain.interval.milli
|
||||
if edge.domain.interval.month > 0:
|
||||
ms = durations.YEAR.milli / 12 * edge.domain.interval.month
|
||||
|
||||
partition2int = "Math.floor((" + value + "-" + MVEL.value2MVEL(ref) + ")/" + ms + ")"
|
||||
partition2int = "((" + nullTest + ") ? " + numPartitions + " : " + partition2int + ")"
|
||||
|
||||
def int2Partition(value):
|
||||
if Math.round(value) == numPartitions:
|
||||
return edge.domain.NULL
|
||||
return edge.domain.getPartByKey(ref.add(edge.domain.interval.multiply(value)))
|
||||
|
||||
return Struct(toTerm={"head": "", "body": partition2int}, fromTerm=int2Partition)
|
||||
|
||||
|
||||
# RETURN MVEL CODE THAT MAPS THE numeric DOMAIN DOWN TO AN INTEGER AND
|
||||
# AND THE JAVASCRIPT THAT WILL TURN THAT INTEGER BACK INTO A PARTITION (INCLUDING NULLS)
|
||||
def compileNumeric2Term(edge):
|
||||
if edge.script:
|
||||
Log.error("edge script not supported yet")
|
||||
|
||||
if edge.domain.type != "numeric" and edge.domain.type != "count":
|
||||
Log.error("can only translate numeric domains")
|
||||
|
||||
numPartitions = len(edge.domain.partitions)
|
||||
value = edge.value
|
||||
if MVEL.isKeyword(value):
|
||||
value = "doc[\"" + value + "\"].value"
|
||||
|
||||
if not edge.domain.max:
|
||||
if not edge.domain.min:
|
||||
ref = 0
|
||||
partition2int = "Math.floor(" + value + ")/" + MVEL.value2MVEL(edge.domain.interval) + ")"
|
||||
nullTest = "false"
|
||||
else:
|
||||
ref = MVEL.value2MVEL(edge.domain.min)
|
||||
partition2int = "Math.floor((" + value + "-" + ref + ")/" + MVEL.value2MVEL(edge.domain.interval) + ")"
|
||||
nullTest = "" + value + "<" + ref
|
||||
elif not edge.domain.min:
|
||||
ref = MVEL.value2MVEL(edge.domain.max)
|
||||
partition2int = "Math.floor((" + value + "-" + ref + ")/" + MVEL.value2MVEL(edge.domain.interval) + ")"
|
||||
nullTest = "" + value + ">=" + ref
|
||||
else:
|
||||
top = MVEL.value2MVEL(edge.domain.max)
|
||||
ref = MVEL.value2MVEL(edge.domain.min)
|
||||
partition2int = "Math.floor((" + value + "-" + ref + ")/" + MVEL.value2MVEL(edge.domain.interval) + ")"
|
||||
nullTest = "(" + value + "<" + ref + ") or (" + value + ">=" + top + ")"
|
||||
|
||||
partition2int = "((" + nullTest + ") ? " + numPartitions + " : " + partition2int + ")"
|
||||
offset = CNV.value2int(ref)
|
||||
|
||||
def int2Partition(value):
|
||||
if Math.round(value) == numPartitions:
|
||||
return edge.domain.NULL
|
||||
return edge.domain.getPartByKey((value * edge.domain.interval) + offset)
|
||||
|
||||
return Struct(toTerm={"head": "", "body": partition2int}, fromTerm=int2Partition)
|
||||
|
||||
|
||||
def compileString2Term(edge):
|
||||
if edge.esscript:
|
||||
Log.error("edge script not supported yet")
|
||||
|
||||
value = edge.value
|
||||
if MVEL.isKeyword(value):
|
||||
value = strings.expand_template("getDocValue({{path}})", {"path": CNV.string2quote(value)})
|
||||
else:
|
||||
Log.error("not handled")
|
||||
|
||||
def fromTerm(value):
|
||||
return edge.domain.getPartByKey(value)
|
||||
|
||||
return Struct(
|
||||
toTerm={"head": "", "body": value},
|
||||
fromTerm=fromTerm
|
||||
)
|
||||
|
||||
|
||||
def compileNullTest(edge):
|
||||
"""
|
||||
RETURN A MVEL EXPRESSION THAT WILL EVALUATE TO true FOR OUT-OF-BOUNDS
|
||||
"""
|
||||
if edge.domain.type not in domains.ALGEBRAIC:
|
||||
Log.error("can only translate time and duration domains")
|
||||
|
||||
# IS THERE A LIMIT ON THE DOMAIN?
|
||||
value = edge.value
|
||||
if MVEL.isKeyword(value):
|
||||
value = "doc[\"" + value + "\"].value"
|
||||
|
||||
if not edge.domain.max:
|
||||
if not edge.domain.min:
|
||||
return False
|
||||
bot = MVEL.value2MVEL(edge.domain.min)
|
||||
nullTest = "" + value + "<" + bot
|
||||
elif not edge.domain.min:
|
||||
top = MVEL.value2MVEL(edge.domain.max)
|
||||
nullTest = "" + value + ">=" + top
|
||||
else:
|
||||
top = MVEL.value2MVEL(edge.domain.max)
|
||||
bot = MVEL.value2MVEL(edge.domain.min)
|
||||
nullTest = "(" + value + "<" + bot + ") or (" + value + ">=" + top + ")"
|
||||
|
||||
return nullTest
|
||||
|
||||
|
||||
def compileEdges2Term(mvel_compiler, edges, constants):
|
||||
"""
|
||||
TERMS ARE ALWAYS ESCAPED SO THEY CAN BE COMPOUNDED WITH PIPE (|)
|
||||
|
||||
GIVE MVEL CODE THAT REDUCES A UNIQUE TUPLE OF PARTITIONS DOWN TO A UNIQUE TERM
|
||||
GIVE LAMBDA THAT WILL CONVERT THE TERM BACK INTO THE TUPLE
|
||||
RETURNS TUPLE OBJECT WITH "type" and "value" ATTRIBUTES.
|
||||
"type" CAN HAVE A VALUE OF "script", "field" OR "count"
|
||||
CAN USE THE constants (name, value pairs)
|
||||
"""
|
||||
|
||||
# IF THE QUERY IS SIMPLE ENOUGH, THEN DO NOT USE TERM PACKING
|
||||
edge0 = edges[0]
|
||||
|
||||
if len(edges) == 1 and edge0.domain.type in ["set", "default"]:
|
||||
# THE TERM RETURNED WILL BE A MEMBER OF THE GIVEN SET
|
||||
def temp(term):
|
||||
return StructList([edge0.domain.getPartByKey(term)])
|
||||
|
||||
if edge0.value and MVEL.isKeyword(edge0.value):
|
||||
return Struct(
|
||||
field=edge0.value,
|
||||
term2parts=temp
|
||||
)
|
||||
elif COUNT(edge0.domain.dimension.fields) == 1:
|
||||
return Struct(
|
||||
field=edge0.domain.dimension.fields[0],
|
||||
term2parts=temp
|
||||
)
|
||||
elif not edge0.value and edge0.domain.partitions:
|
||||
script = mvel_compiler.Parts2TermScript(edge0.domain)
|
||||
return Struct(
|
||||
expression=script,
|
||||
term2parts=temp
|
||||
)
|
||||
else:
|
||||
return Struct(
|
||||
expression=mvel_compiler.compile_expression(edge0.value, constants),
|
||||
term2parts=temp
|
||||
)
|
||||
|
||||
mvel_terms = [] # FUNCTION TO PACK TERMS
|
||||
fromTerm2Part = [] # UNPACK TERMS BACK TO PARTS
|
||||
for e in edges:
|
||||
domain = e.domain
|
||||
fields = domain.dimension.fields
|
||||
|
||||
if not e.value and fields:
|
||||
code, decode = mvel_compiler.Parts2Term(e.domain)
|
||||
t = Struct(
|
||||
toTerm=code,
|
||||
fromTerm=decode
|
||||
)
|
||||
elif fields:
|
||||
Log.error("not expected")
|
||||
elif e.domain.type == "time":
|
||||
t = compileTime2Term(e)
|
||||
elif e.domain.type == "duration":
|
||||
t = compileDuration2Term(e)
|
||||
elif e.domain.type in domains.ALGEBRAIC:
|
||||
t = compileNumeric2Term(e)
|
||||
elif e.domain.type == "set" and not fields:
|
||||
def fromTerm(term):
|
||||
return e.domain.getPartByKey(term)
|
||||
|
||||
code, decode = mvel_compiler.Parts2Term(e.domain)
|
||||
t = Struct(
|
||||
toTerm=code,
|
||||
fromTerm=decode
|
||||
)
|
||||
else:
|
||||
t = compileString2Term(e)
|
||||
|
||||
if not t.toTerm.body:
|
||||
mvel_compiler.Parts2Term(e.domain)
|
||||
Log.error("")
|
||||
|
||||
fromTerm2Part.append(t.fromTerm)
|
||||
mvel_terms.append(t.toTerm.body)
|
||||
|
||||
# REGISTER THE DECODE FUNCTION
|
||||
def temp(term):
|
||||
terms = term.split('|')
|
||||
|
||||
output = StructList([t2p(t) for t, t2p in zip(terms, fromTerm2Part)])
|
||||
return output
|
||||
|
||||
return Struct(
|
||||
expression=mvel_compiler.compile_expression("+'|'+".join(mvel_terms), constants),
|
||||
term2parts=temp
|
||||
)
|
||||
|
||||
|
||||
def fix_es_stats(s):
|
||||
"""
|
||||
ES RETURNS BAD DEFAULT VALUES FOR STATS
|
||||
"""
|
||||
s = wrap(s)
|
||||
if s.count == 0:
|
||||
return stats.zero
|
||||
return s
|
||||
|
||||
|
||||
#MAP NAME TO SQL FUNCTION
|
||||
aggregates = {
|
||||
"none": "none",
|
||||
"one": "count",
|
||||
"sum": "total",
|
||||
"add": "total",
|
||||
"count": "count",
|
||||
"maximum": "max",
|
||||
"minimum": "min",
|
||||
"max": "max",
|
||||
"min": "min",
|
||||
"mean": "mean",
|
||||
"average": "mean",
|
||||
"avg": "mean",
|
||||
"N": "count",
|
||||
"X0": "count",
|
||||
"X1": "total",
|
||||
"X2": "sum_of_squares",
|
||||
"std": "std_deviation",
|
||||
"stddev": "std_deviation",
|
||||
"var": "variance",
|
||||
"variance": "variance"
|
||||
}
|
||||
|
|
@ -0,0 +1,148 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http:# mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from ..structs.wraps import wrap
|
||||
|
||||
TRUE_FILTER = True
|
||||
FALSE_FILTER = False
|
||||
|
||||
|
||||
def simplify(esfilter):
|
||||
output = normalize(esfilter)
|
||||
if output is TRUE_FILTER:
|
||||
return {"match_all": {}}
|
||||
output.isNormal = None
|
||||
return output
|
||||
|
||||
|
||||
def removeOr(esfilter):
|
||||
if esfilter["not"]:
|
||||
return {"not": removeOr(esfilter["not"])}
|
||||
|
||||
if esfilter["and"]:
|
||||
return {"and": [removeOr(v) for v in esfilter["and"]]}
|
||||
|
||||
if esfilter["or"]: # CONVERT OR TO NOT.AND.NOT
|
||||
return {"not": {"and": [{"not": removeOr(v)} for v in esfilter["or"]]}}
|
||||
|
||||
return esfilter
|
||||
|
||||
def normalize(esfilter):
|
||||
"""
|
||||
SIMPLFY THE LOGIC EXPRESSION
|
||||
"""
|
||||
return wrap(_normalize(wrap(esfilter)))
|
||||
|
||||
|
||||
|
||||
def _normalize(esfilter):
|
||||
"""
|
||||
DO NOT USE Structs, WE ARE SPENDING TOO MUCH TIME WRAPPING/UNWRAPPING
|
||||
REALLY, WE JUST COLLAPSE CASCADING and AND or FILTERS
|
||||
"""
|
||||
if esfilter is TRUE_FILTER or esfilter is FALSE_FILTER or esfilter.isNormal:
|
||||
return esfilter
|
||||
|
||||
# Log.note("from: " + CNV.object2JSON(esfilter))
|
||||
isDiff = True
|
||||
|
||||
while isDiff:
|
||||
isDiff = False
|
||||
|
||||
if esfilter["and"]:
|
||||
output = []
|
||||
for a in esfilter["and"]:
|
||||
if isinstance(a, (list, set)):
|
||||
from dzAlerts.util.env.logs import Log
|
||||
Log.error("and clause is not allowed a list inside a list")
|
||||
a_ = normalize(a)
|
||||
if a_ is not a:
|
||||
isDiff = True
|
||||
a = a_
|
||||
if a == TRUE_FILTER:
|
||||
isDiff = True
|
||||
continue
|
||||
if a == FALSE_FILTER:
|
||||
return FALSE_FILTER
|
||||
if a.get("and", None):
|
||||
isDiff = True
|
||||
a.isNormal = None
|
||||
output.extend(a.get("and", None))
|
||||
else:
|
||||
a.isNormal = None
|
||||
output.append(a)
|
||||
if not output:
|
||||
return TRUE_FILTER
|
||||
elif len(output) == 1:
|
||||
# output[0].isNormal = True
|
||||
esfilter = output[0]
|
||||
break
|
||||
elif isDiff:
|
||||
esfilter = wrap({"and": output})
|
||||
continue
|
||||
|
||||
if esfilter["or"]:
|
||||
output = []
|
||||
for a in esfilter["or"]:
|
||||
a_ = _normalize(a)
|
||||
if a_ is not a:
|
||||
isDiff = True
|
||||
a = a_
|
||||
|
||||
if a == TRUE_FILTER:
|
||||
return TRUE_FILTER
|
||||
if a == FALSE_FILTER:
|
||||
isDiff = True
|
||||
continue
|
||||
if a.get("or", None):
|
||||
a.isNormal = None
|
||||
isDiff = True
|
||||
output.extend(a["or"])
|
||||
else:
|
||||
a.isNormal = None
|
||||
output.append(a)
|
||||
if not output:
|
||||
return FALSE_FILTER
|
||||
elif len(output) == 1:
|
||||
esfilter = output[0]
|
||||
break
|
||||
elif isDiff:
|
||||
esfilter = wrap({"or": output})
|
||||
continue
|
||||
|
||||
if esfilter.term != None:
|
||||
if esfilter.term.keys():
|
||||
esfilter.isNormal = True
|
||||
return esfilter
|
||||
else:
|
||||
return TRUE_FILTER
|
||||
|
||||
if esfilter.terms != None:
|
||||
for k, v in esfilter.terms.items():
|
||||
if len(v) > 0:
|
||||
esfilter.isNormal = True
|
||||
return esfilter
|
||||
return FALSE_FILTER
|
||||
|
||||
if esfilter["not"] != None:
|
||||
_sub = esfilter["not"]
|
||||
sub = _normalize(_sub)
|
||||
if sub is FALSE_FILTER:
|
||||
return TRUE_FILTER
|
||||
elif sub is TRUE_FILTER:
|
||||
return FALSE_FILTER
|
||||
elif sub is not _sub:
|
||||
sub.isNormal = None
|
||||
return wrap({"not": sub, "isNormal": True})
|
||||
else:
|
||||
sub.isNormal = None
|
||||
|
||||
esfilter.isNormal = True
|
||||
return esfilter
|
|
@ -0,0 +1,87 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from ..collections import MIN
|
||||
from ..env.logs import Log
|
||||
from ..struct import nvl, split_field, StructList
|
||||
from ..structs.wraps import wrap
|
||||
|
||||
|
||||
class FlatList(list):
|
||||
"""
|
||||
FlatList IS A RESULT OF FILTERING SETS OF TREES
|
||||
WE SAVED OURSELVES FROM COPYING ALL OBJECTS IN ALL PATHS OF ALL TREES,
|
||||
BUT WE ARE LEFT WITH THIS LIST OF TUPLES THAT POINT TO THE SAME
|
||||
"""
|
||||
|
||||
def __init__(self, path, data):
|
||||
"""
|
||||
data IS A LIST OF TUPLES
|
||||
EACH TUPLE IS THE SEQUENCE OF OBJECTS FOUND ALONG A PATH IN A TREE
|
||||
IT IS EXPECTED len(data[i]) == len(path)+1 (data[i][0] IS THE ORIGINAL ROW OBJECT)
|
||||
"""
|
||||
list.__init__(self)
|
||||
self.data = data
|
||||
self.path = path
|
||||
|
||||
def __len__(self):
|
||||
return len(self.data)
|
||||
|
||||
def __iter__(self):
|
||||
"""
|
||||
WE ARE NOW DOOMED TO COPY THE RECORDS (BECAUSE LISTS DOWN THE PATH ARE SPECIFIC ELEMENTS)
|
||||
"""
|
||||
for d in self.data:
|
||||
r = d[-1]
|
||||
for i in range(len(self.path)):
|
||||
temp = dict(d[-i - 2])
|
||||
temp[self.path[-i - 1]] = r
|
||||
r = temp
|
||||
yield r
|
||||
|
||||
def select(self, field_name):
|
||||
if isinstance(field_name, dict):
|
||||
field_name=field_name.value
|
||||
|
||||
if isinstance(field_name, basestring):
|
||||
# RETURN LIST OF VALUES
|
||||
if len(split_field(field_name)) == 1:
|
||||
if self.path[0] == field_name:
|
||||
return [d[1] for d in self.data]
|
||||
else:
|
||||
return [d[0][field_name] for d in self.data]
|
||||
else:
|
||||
keys = split_field(field_name)
|
||||
depth = nvl(MIN([i for i, (k, p) in enumerate(zip(keys, self.path)) if k != p]), len(self.path)) # LENGTH OF COMMON PREFIX
|
||||
short_keys = keys[depth:]
|
||||
|
||||
output = StructList()
|
||||
_select1((wrap(d[depth]) for d in self.data), short_keys, 0, output)
|
||||
return output
|
||||
|
||||
Log.error("multiselect over FlatList not supported")
|
||||
|
||||
|
||||
def _select1(data, field, depth, output):
|
||||
"""
|
||||
SELECT A SINGLE FIELD
|
||||
"""
|
||||
for d in data:
|
||||
for i, f in enumerate(field[depth:]):
|
||||
d = d[f]
|
||||
if d == None:
|
||||
output.append(None)
|
||||
break
|
||||
elif isinstance(d, list):
|
||||
_select1(d, field, i + 1, output)
|
||||
break
|
||||
else:
|
||||
output.append(d)
|
|
@ -0,0 +1,169 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
import sys
|
||||
from .cube import Cube
|
||||
from ..queries.index import value2key
|
||||
from ..struct import StructList, Struct
|
||||
from ..structs.wraps import listwrap, wrap
|
||||
from ..env.logs import Log
|
||||
from ..collections.multiset import Multiset
|
||||
|
||||
|
||||
def groupby(data, keys=None, size=None, min_size=None, max_size=None, contiguous=False):
|
||||
"""
|
||||
return list of (keys, values) pairs where
|
||||
group by the set of set of keys
|
||||
values IS LIST OF ALL data that has those keys
|
||||
contiguous - MAINTAIN THE ORDER OF THE DATA, STARTING THE NEW GROUP WHEN THE SELECTOR CHANGES
|
||||
"""
|
||||
|
||||
if size != None or min_size != None or max_size != None:
|
||||
if size != None:
|
||||
max_size = size
|
||||
return groupby_min_max_size(data, min_size=min_size, max_size=max_size)
|
||||
|
||||
if isinstance(data, Cube):
|
||||
return data.groupby(keys)
|
||||
|
||||
def value2hash(x):
|
||||
return value2key(keys, x)
|
||||
|
||||
def get_keys(d):
|
||||
output = Struct()
|
||||
for k in keys:
|
||||
output[k] = d[k]
|
||||
return output
|
||||
|
||||
if contiguous:
|
||||
try:
|
||||
if not data:
|
||||
return wrap([])
|
||||
|
||||
agg = StructList()
|
||||
acc = StructList()
|
||||
curr_key = value2hash(data[0])
|
||||
for d in data:
|
||||
key = value2key(keys, d)
|
||||
if key != curr_key:
|
||||
agg.append((get_keys(acc[0]), acc))
|
||||
curr_key = key
|
||||
acc = [d]
|
||||
else:
|
||||
acc.append(d)
|
||||
agg.append((get_keys(acc[0]), acc))
|
||||
return wrap(agg)
|
||||
except Exception, e:
|
||||
Log.error("Problem grouping contiguous values", e)
|
||||
|
||||
try:
|
||||
agg = {}
|
||||
for d in data:
|
||||
key = value2key(keys, d)
|
||||
pair = agg.get(key, None)
|
||||
if pair is None:
|
||||
pair = (get_keys(d), StructList())
|
||||
agg[key] = pair
|
||||
pair[1].append(d)
|
||||
|
||||
return agg.values()
|
||||
except Exception, e:
|
||||
Log.error("Problem grouping", e)
|
||||
|
||||
|
||||
def groupby_size(data, size):
|
||||
if hasattr(data, "next"):
|
||||
iterator = data
|
||||
elif hasattr(data, "__iter__"):
|
||||
iterator = data.__iter__()
|
||||
else:
|
||||
Log.error("do not know how to handle this type")
|
||||
|
||||
done = StructList()
|
||||
def more():
|
||||
output = StructList()
|
||||
for i in range(size):
|
||||
try:
|
||||
output.append(iterator.next())
|
||||
except StopIteration:
|
||||
done.append(True)
|
||||
break
|
||||
return output
|
||||
|
||||
#THIS IS LAZY
|
||||
i = 0
|
||||
while True:
|
||||
output = more()
|
||||
yield (i, output)
|
||||
if len(done) > 0:
|
||||
break
|
||||
i += 1
|
||||
|
||||
|
||||
def groupby_Multiset(data, min_size, max_size):
|
||||
# GROUP multiset BASED ON POPULATION OF EACH KEY, TRYING TO STAY IN min/max LIMITS
|
||||
if min_size == None:
|
||||
min_size = 0
|
||||
|
||||
total = 0
|
||||
i = 0
|
||||
g = list()
|
||||
for k, c in data.items():
|
||||
if total < min_size or total + c < max_size:
|
||||
total += c
|
||||
g.append(k)
|
||||
elif total < max_size:
|
||||
yield (i, g)
|
||||
i += 1
|
||||
total = c
|
||||
g = [k]
|
||||
|
||||
if total >= max_size:
|
||||
Log.error("({{min}}, {{max}}) range is too strict given step of {{increment}}", {
|
||||
"min": min_size, "max": max_size, "increment": c
|
||||
})
|
||||
|
||||
if g:
|
||||
yield (i, g)
|
||||
|
||||
|
||||
def groupby_min_max_size(data, min_size=0, max_size=None, ):
|
||||
if max_size == None:
|
||||
max_size = sys.maxint
|
||||
|
||||
if isinstance(data, (bytearray, basestring, list)):
|
||||
def _iter():
|
||||
num = (len(data) - 1) / max_size + 1
|
||||
for i in range(0, num):
|
||||
output = (i, data[i * max_size:i * max_size + max_size:])
|
||||
yield output
|
||||
|
||||
return _iter()
|
||||
|
||||
elif hasattr(data, "__iter__"):
|
||||
def _iter():
|
||||
g = 0
|
||||
out = StructList()
|
||||
for i, d in enumerate(data):
|
||||
out.append(d)
|
||||
if (i + 1) % max_size == 0:
|
||||
yield g, out
|
||||
g += 1
|
||||
out = StructList()
|
||||
if out:
|
||||
yield g, out
|
||||
|
||||
return _iter()
|
||||
elif not isinstance(data, Multiset):
|
||||
return groupby_size(data, max_size)
|
||||
else:
|
||||
return groupby_Multiset(data, min_size, max_size)
|
||||
|
|
@ -0,0 +1,174 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from ..queries.unique_index import UniqueIndex
|
||||
from ..env.logs import Log
|
||||
from ..structs.wraps import wrap, unwrap, tuplewrap
|
||||
|
||||
class Index(object):
|
||||
"""
|
||||
USING DATABASE TERMINOLOGY, THIS IS A NON-UNIQUE INDEX
|
||||
"""
|
||||
|
||||
def __init__(self, keys):
|
||||
self._data = {}
|
||||
self._keys = tuplewrap(keys)
|
||||
self.count = 0
|
||||
|
||||
def __getitem__(self, key):
|
||||
try:
|
||||
if isinstance(key, (list, tuple)) and len(key) < len(self._keys):
|
||||
# RETURN ANOTHER Index
|
||||
filter_key = tuple(self._keys[0:len(key):])
|
||||
key = value2key(filter_key, key)
|
||||
key = key[:len(filter_key)]
|
||||
d = self._data
|
||||
for k in key:
|
||||
d = d.get(k, {})
|
||||
output = Index(filter_key)
|
||||
output._data = d
|
||||
return output
|
||||
|
||||
key = value2key(self._keys, key)
|
||||
d = self._data
|
||||
for k in key:
|
||||
d = d.get(k, {})
|
||||
return wrap(list(d))
|
||||
except Exception, e:
|
||||
Log.error("something went wrong", e)
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
Log.error("Not implemented")
|
||||
|
||||
|
||||
def add(self, val):
|
||||
key = value2key(self._keys, val)
|
||||
d = self._data
|
||||
for k in key[:-1]:
|
||||
e = d.get(k, None)
|
||||
if e is None:
|
||||
e = {}
|
||||
d[k] = e
|
||||
d = e
|
||||
k = key[-1]
|
||||
e = d.get(k, None)
|
||||
if e is None:
|
||||
e = []
|
||||
d[k] = e
|
||||
e.append(unwrap(val))
|
||||
self.count += 1
|
||||
|
||||
|
||||
def __contains__(self, key):
|
||||
expected = True if self[key] else False
|
||||
testing = self._test_contains(key)
|
||||
|
||||
if testing==expected:
|
||||
return testing
|
||||
else:
|
||||
Log.error("not expected")
|
||||
|
||||
def _test_contains(self, key):
|
||||
try:
|
||||
if isinstance(key, (list, tuple)) and len(key) < len(self._keys):
|
||||
# RETURN ANOTHER Index
|
||||
length = len(key)
|
||||
key = value2key(self._keys[0:length:], key)
|
||||
d = self._data
|
||||
for k in key[:length]:
|
||||
try:
|
||||
d = d[k]
|
||||
except Exception, e:
|
||||
return False
|
||||
return True
|
||||
|
||||
key = value2key(self._keys, key)
|
||||
d = self._data
|
||||
for k in key:
|
||||
try:
|
||||
d = d[k]
|
||||
except Exception, e:
|
||||
return False
|
||||
return True
|
||||
except Exception, e:
|
||||
Log.error("something went wrong", e)
|
||||
|
||||
|
||||
|
||||
|
||||
def __nonzero__(self):
|
||||
if self._data.keys():
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def __iter__(self):
|
||||
def iter(data, depth):
|
||||
if depth == 0:
|
||||
for v in data:
|
||||
yield wrap(v)
|
||||
return
|
||||
|
||||
for v in data.values():
|
||||
for v1 in iter(v, depth - 1):
|
||||
yield wrap(v1)
|
||||
|
||||
return iter(self._data, len(self._keys))
|
||||
|
||||
def __sub__(self, other):
|
||||
output = UniqueIndex(self._keys)
|
||||
for v in self:
|
||||
if v not in other:
|
||||
output.add(v)
|
||||
return output
|
||||
|
||||
def __and__(self, other):
|
||||
output = UniqueIndex(self._keys)
|
||||
for v in self:
|
||||
if v in other:
|
||||
output.add(v)
|
||||
return output
|
||||
|
||||
def __or__(self, other):
|
||||
output = UniqueIndex(self._keys)
|
||||
for v in self:
|
||||
output.add(v)
|
||||
for v in other:
|
||||
output.add(v)
|
||||
return output
|
||||
|
||||
def __len__(self):
|
||||
if self.count == 0:
|
||||
for d in self:
|
||||
self.count += 1
|
||||
return self.count
|
||||
|
||||
def subtract(self, other):
|
||||
return self.__sub__(other)
|
||||
|
||||
def intersect(self, other):
|
||||
return self.__and__(other)
|
||||
|
||||
|
||||
def value2key(keys, val):
|
||||
if len(keys) == 1:
|
||||
if isinstance(val, dict):
|
||||
return val[keys[0]],
|
||||
elif isinstance(val, (list, tuple)):
|
||||
return val[0],
|
||||
return val,
|
||||
else:
|
||||
if isinstance(val, dict):
|
||||
return tuple(val[k] for k in keys)
|
||||
elif isinstance(val, (list, tuple)):
|
||||
return tuple(val)
|
||||
else:
|
||||
Log.error("do not know what to do here")
|
|
@ -0,0 +1,365 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from .. import struct
|
||||
from .dimensions import Dimension
|
||||
from .domains import Domain
|
||||
from ..collections import AND, reverse
|
||||
from ..env.logs import Log
|
||||
from ..queries import MVEL
|
||||
from ..queries.filters import TRUE_FILTER, simplify
|
||||
from ..struct import nvl, Struct, EmptyList, split_field, join_field, StructList
|
||||
from ..structs.wraps import wrap, unwrap, listwrap
|
||||
from .es_query_util import INDEX_CACHE
|
||||
|
||||
|
||||
class Query(object):
|
||||
def __new__(cls, query, schema=None):
|
||||
if isinstance(query, Query):
|
||||
return query
|
||||
return object.__new__(cls)
|
||||
|
||||
def __init__(self, query, schema=None):
|
||||
"""
|
||||
NORMALIZE QUERY SO IT CAN STILL BE JSON
|
||||
"""
|
||||
if isinstance(query, Query):
|
||||
return
|
||||
|
||||
object.__init__(self)
|
||||
query = wrap(query)
|
||||
|
||||
self.name = query.name
|
||||
|
||||
select = query.select
|
||||
if isinstance(select, list):
|
||||
select = wrap([unwrap(_normalize_select(s, schema=schema)) for s in select])
|
||||
elif select:
|
||||
select = _normalize_select(select, schema=schema)
|
||||
else:
|
||||
select = StructList()
|
||||
self.select2index = {} # MAP FROM NAME TO data INDEX
|
||||
for i, s in enumerate(listwrap(select)):
|
||||
self.select2index[s.name] = i
|
||||
self.select = select
|
||||
|
||||
self.edges = _normalize_edges(query.edges, schema=schema)
|
||||
self.frum = _normalize_from(query["from"], schema=schema)
|
||||
self.where = _normalize_where(query.where, schema=schema)
|
||||
|
||||
self.window = [_normalize_window(w) for w in listwrap(query.window)]
|
||||
|
||||
self.sort = _normalize_sort(query.sort)
|
||||
self.limit = query.limit
|
||||
self.isLean = query.isLean
|
||||
|
||||
|
||||
@property
|
||||
def columns(self):
|
||||
return self.select + self.edges
|
||||
|
||||
def __getitem__(self, item):
|
||||
if item == "from":
|
||||
return self.frum
|
||||
return Struct.__getitem__(self, item)
|
||||
|
||||
def copy(self):
|
||||
output = object.__new__(Query)
|
||||
source = object.__getattribute__(self, "__dict__")
|
||||
dest = object.__getattribute__(output, "__dict__")
|
||||
struct.set_default(dest, source)
|
||||
return output
|
||||
|
||||
|
||||
def _normalize_selects(selects, schema=None):
|
||||
if isinstance(selects, list):
|
||||
return wrap([_normalize_select(s, schema=schema) for s in selects])
|
||||
else:
|
||||
return _normalize_select(selects, schema=schema)
|
||||
|
||||
|
||||
def _normalize_select(select, schema=None):
|
||||
if isinstance(select, basestring):
|
||||
if schema:
|
||||
s = schema[select]
|
||||
if s:
|
||||
return s.getSelect()
|
||||
return Struct(
|
||||
name=select.rstrip("."), # TRAILING DOT INDICATES THE VALUE, BUT IS INVALID FOR THE NAME
|
||||
value=select,
|
||||
aggregate="none"
|
||||
)
|
||||
else:
|
||||
if not select.name:
|
||||
select = select.copy()
|
||||
select.name = nvl(select.value, select.aggregate)
|
||||
|
||||
select.aggregate = nvl(select.aggregate, "none")
|
||||
return select
|
||||
|
||||
|
||||
def _normalize_edges(edges, schema=None):
|
||||
return [_normalize_edge(e, schema=schema) for e in listwrap(edges)]
|
||||
|
||||
|
||||
def _normalize_edge(edge, schema=None):
|
||||
if isinstance(edge, basestring):
|
||||
if schema:
|
||||
e = schema[edge]
|
||||
if e:
|
||||
return Struct(
|
||||
name=edge,
|
||||
domain=e.getDomain()
|
||||
)
|
||||
return Struct(
|
||||
name=edge,
|
||||
value=edge,
|
||||
domain=_normalize_domain(schema=schema)
|
||||
)
|
||||
else:
|
||||
return Struct(
|
||||
name=nvl(edge.name, edge.value),
|
||||
value=edge.value,
|
||||
range=edge.range,
|
||||
allowNulls=False if edge.allowNulls is False else True,
|
||||
domain=_normalize_domain(edge.domain, schema=schema)
|
||||
)
|
||||
|
||||
|
||||
def _normalize_from(frum, schema=None):
|
||||
frum = wrap(frum)
|
||||
|
||||
if isinstance(frum, basestring):
|
||||
return Struct(name=frum)
|
||||
elif isinstance(frum, dict) and (frum["from"] or isinstance(frum["from"], (list, set))):
|
||||
return Query(frum, schema=schema)
|
||||
else:
|
||||
return frum
|
||||
|
||||
|
||||
def _normalize_domain(domain=None, schema=None):
|
||||
if not domain:
|
||||
return Domain(type="default")
|
||||
elif isinstance(domain, Dimension):
|
||||
return domain.getDomain()
|
||||
elif schema and isinstance(domain, basestring) and schema[domain]:
|
||||
return schema[domain].getDomain()
|
||||
elif isinstance(domain, Domain):
|
||||
return domain
|
||||
|
||||
if not domain.name:
|
||||
domain = domain.copy()
|
||||
domain.name = domain.type
|
||||
return Domain(**struct.unwrap(domain))
|
||||
|
||||
|
||||
def _normalize_window(window, schema=None):
|
||||
return Struct(
|
||||
name=nvl(window.name, window.value),
|
||||
value=window.value,
|
||||
edges=[_normalize_edge(e, schema) for e in listwrap(window.edges)],
|
||||
sort=_normalize_sort(window.sort),
|
||||
aggregate=window.aggregate,
|
||||
range=_normalize_range(window.range),
|
||||
where=_normalize_where(window.where, schema=schema)
|
||||
)
|
||||
|
||||
|
||||
def _normalize_range(range):
|
||||
if range == None:
|
||||
return None
|
||||
|
||||
return Struct(
|
||||
min=range.min,
|
||||
max=range.max
|
||||
)
|
||||
|
||||
|
||||
def _normalize_where(where, schema=None):
|
||||
if where == None:
|
||||
return TRUE_FILTER
|
||||
if schema == None:
|
||||
return where
|
||||
where = simplify(_where_terms(where, where, schema))
|
||||
return where
|
||||
|
||||
|
||||
def _map_term_using_schema(master, path, term, schema_edges):
|
||||
"""
|
||||
IF THE WHERE CLAUSE REFERS TO FIELDS IN THE SCHEMA, THEN EXPAND THEM
|
||||
"""
|
||||
output = StructList()
|
||||
for k, v in term.items():
|
||||
dimension = schema_edges[k]
|
||||
if isinstance(dimension, Dimension):
|
||||
domain = dimension.getDomain()
|
||||
if dimension.fields:
|
||||
if isinstance(dimension.fields, dict):
|
||||
# EXPECTING A TUPLE
|
||||
for local_field, es_field in dimension.fields.items():
|
||||
local_value = v[local_field]
|
||||
if local_value == None:
|
||||
output.append({"missing": {"field": es_field}})
|
||||
else:
|
||||
output.append({"term": {es_field: local_value}})
|
||||
continue
|
||||
|
||||
if len(dimension.fields) == 1 and MVEL.isKeyword(dimension.fields[0]):
|
||||
# SIMPLE SINGLE-VALUED FIELD
|
||||
if domain.getPartByKey(v) is domain.NULL:
|
||||
output.append({"missing": {"field": dimension.fields[0]}})
|
||||
else:
|
||||
output.append({"term": {dimension.fields[0]: v}})
|
||||
continue
|
||||
|
||||
if AND(MVEL.isKeyword(f) for f in dimension.fields):
|
||||
# EXPECTING A TUPLE
|
||||
if not isinstance(v, tuple):
|
||||
Log.error("expecing {{name}}={{value}} to be a tuple", {"name": k, "value": v})
|
||||
for i, f in enumerate(dimension.fields):
|
||||
vv = v[i]
|
||||
if vv == None:
|
||||
output.append({"missing": {"field": f}})
|
||||
else:
|
||||
output.append({"term": {f: vv}})
|
||||
continue
|
||||
if len(dimension.fields) == 1 and MVEL.isKeyword(dimension.fields[0]):
|
||||
if domain.getPartByKey(v) is domain.NULL:
|
||||
output.append({"missing": {"field": dimension.fields[0]}})
|
||||
else:
|
||||
output.append({"term": {dimension.fields[0]: v}})
|
||||
continue
|
||||
if domain.partitions:
|
||||
part = domain.getPartByKey(v)
|
||||
if part is domain.NULL or not part.esfilter:
|
||||
Log.error("not expected to get NULL")
|
||||
output.append(part.esfilter)
|
||||
continue
|
||||
else:
|
||||
Log.error("not expected")
|
||||
elif isinstance(v, dict):
|
||||
sub = _map_term_using_schema(master, path + [k], v, schema_edges[k])
|
||||
output.append(sub)
|
||||
continue
|
||||
|
||||
output.append({"term": {k: v}})
|
||||
return {"and": output}
|
||||
|
||||
def _move_nested_term(master, where, schema):
|
||||
"""
|
||||
THE WHERE CLAUSE CAN CONTAIN NESTED PROPERTY REFERENCES, THESE MUST BE MOVED
|
||||
TO A NESTED FILTER
|
||||
"""
|
||||
items = where.term.items()
|
||||
if len(items) != 1:
|
||||
Log.error("Expecting only one term")
|
||||
k, v = items[0]
|
||||
nested_path = _get_nested_path(k, schema)
|
||||
if nested_path:
|
||||
return {"nested": {
|
||||
"path": nested_path,
|
||||
"query": {"filtered": {
|
||||
"query": {"match_all": {}},
|
||||
"filter": {"and": [
|
||||
{"term": {k: v}}
|
||||
]}
|
||||
}}
|
||||
}}
|
||||
return where
|
||||
|
||||
def _get_nested_path(field, schema):
|
||||
if MVEL.isKeyword(field):
|
||||
field = join_field([schema.es.alias]+split_field(field))
|
||||
for i, f in reverse(enumerate(split_field(field))):
|
||||
path = join_field(split_field(field)[0:i+1:])
|
||||
if path in INDEX_CACHE:
|
||||
return join_field(split_field(path)[1::])
|
||||
return None
|
||||
|
||||
def _where_terms(master, where, schema):
|
||||
"""
|
||||
USE THE SCHEMA TO CONVERT DIMENSION NAMES TO ES FILTERS
|
||||
master - TOP LEVEL WHERE (FOR PLACING NESTED FILTERS)
|
||||
"""
|
||||
if isinstance(where, dict):
|
||||
if where.term:
|
||||
#MAP TERM
|
||||
try:
|
||||
output = _map_term_using_schema(master, [], where.term, schema.edges)
|
||||
return output
|
||||
except Exception, e:
|
||||
Log.error("programmer problem?", e)
|
||||
elif where.terms:
|
||||
#MAP TERM
|
||||
output = StructList()
|
||||
for k, v in where.terms.items():
|
||||
if not isinstance(v, (list, set)):
|
||||
Log.error("terms filter expects list of values")
|
||||
edge = schema.edges[k]
|
||||
if not edge:
|
||||
output.append({"terms": {k: v}})
|
||||
else:
|
||||
if isinstance(edge, basestring):
|
||||
#DIRECT FIELD REFERENCE
|
||||
return {"terms": {edge: v}}
|
||||
try:
|
||||
domain = edge.getDomain()
|
||||
except Exception, e:
|
||||
Log.error("programmer error", e)
|
||||
fields = domain.dimension.fields
|
||||
if isinstance(fields, dict):
|
||||
or_agg = []
|
||||
for vv in v:
|
||||
and_agg = []
|
||||
for local_field, es_field in fields.items():
|
||||
vvv = vv[local_field]
|
||||
if vvv != None:
|
||||
and_agg.append({"term": {es_field: vvv}})
|
||||
or_agg.append({"and": and_agg})
|
||||
output.append({"or": or_agg})
|
||||
elif isinstance(fields, list) and len(fields) == 1 and MVEL.isKeyword(fields[0]):
|
||||
output.append({"terms": {fields[0]: v}})
|
||||
elif domain.partitions:
|
||||
output.append({"or": [domain.getPartByKey(vv).esfilter for vv in v]})
|
||||
return {"and": output}
|
||||
elif where["or"]:
|
||||
return {"or": [unwrap(_where_terms(master, vv, schema)) for vv in where["or"]]}
|
||||
elif where["and"]:
|
||||
return {"and": [unwrap(_where_terms(master, vv, schema)) for vv in where["and"]]}
|
||||
elif where["not"]:
|
||||
return {"not": unwrap(_where_terms(master, where["not"], schema))}
|
||||
return where
|
||||
|
||||
|
||||
def _normalize_sort(sort=None):
|
||||
"""
|
||||
CONVERT SORT PARAMETERS TO A NORMAL FORM SO EASIER TO USE
|
||||
"""
|
||||
|
||||
if not sort:
|
||||
return EmptyList
|
||||
|
||||
output = StructList()
|
||||
for s in listwrap(sort):
|
||||
if isinstance(s, basestring):
|
||||
output.append({"field": s, "sort": 1})
|
||||
else:
|
||||
output.append({"field": nvl(s.field, s.value), "sort": nvl(sort_direction[s.sort], 1)})
|
||||
return wrap(output)
|
||||
|
||||
|
||||
sort_direction = {
|
||||
"asc": 1,
|
||||
"desc": -1,
|
||||
"none": 0,
|
||||
1: 1,
|
||||
0: 0,
|
||||
-1: -1
|
||||
}
|
|
@ -0,0 +1,29 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from ..structs.wraps import listwrap
|
||||
|
||||
|
||||
class Record(object):
|
||||
|
||||
def __init__(self, coord, cube):
|
||||
self.coord = coord
|
||||
self.cube = cube
|
||||
|
||||
def __getitem__(self, item):
|
||||
for s in listwrap(self.cube.select):
|
||||
if s.name == item:
|
||||
return self.cube.data[item]
|
||||
for i, e in enumerate(self.cube.edges):
|
||||
if e.name == item:
|
||||
return e.domain.partition[self.coord[i]]
|
||||
|
||||
def __getattr__(self, item):
|
||||
return self[item]
|
|
@ -0,0 +1,110 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from ..env.logs import Log
|
||||
from ..structs.wraps import wrap, unwrap, tuplewrap
|
||||
|
||||
class UniqueIndex(object):
|
||||
"""
|
||||
DEFINE A SET OF ATTRIBUTES THAT UNIQUELY IDENTIFIES EACH OBJECT IN A list.
|
||||
THIS ALLOWS set-LIKE COMPARISIONS (UNION, INTERSECTION, DIFFERENCE, ETC) WHILE
|
||||
STILL MAINTAINING list-LIKE FEATURES
|
||||
"""
|
||||
|
||||
def __init__(self, keys):
|
||||
self._data = {}
|
||||
self._keys = tuplewrap(keys)
|
||||
self.count = 0
|
||||
|
||||
def __getitem__(self, key):
|
||||
try:
|
||||
key = value2key(self._keys, key)
|
||||
d = self._data.get(key, None)
|
||||
return wrap(d)
|
||||
except Exception, e:
|
||||
Log.error("something went wrong", e)
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
try:
|
||||
key = value2key(self._keys, key)
|
||||
d = self._data.get(key, None)
|
||||
if d != None:
|
||||
Log.error("key already filled")
|
||||
|
||||
self._data[key] = unwrap(value)
|
||||
self.count += 1
|
||||
|
||||
except Exception, e:
|
||||
Log.error("something went wrong", e)
|
||||
|
||||
|
||||
def add(self, val):
|
||||
key = value2key(self._keys, val)
|
||||
d = self._data.get(key, None)
|
||||
if d is None:
|
||||
self._data[key] = unwrap(val)
|
||||
self.count += 1
|
||||
elif d is not val:
|
||||
Log.error("key already filled")
|
||||
|
||||
|
||||
def __contains__(self, key):
|
||||
return self[key] != None
|
||||
|
||||
def __iter__(self):
|
||||
return (wrap(v) for v in self._data.itervalues())
|
||||
|
||||
def __sub__(self, other):
|
||||
output = UniqueIndex(self._keys)
|
||||
for v in self:
|
||||
if v not in other:
|
||||
output.add(v)
|
||||
return output
|
||||
|
||||
def __and__(self, other):
|
||||
output = UniqueIndex(self._keys)
|
||||
for v in self:
|
||||
if v in other: output.add(v)
|
||||
return output
|
||||
|
||||
def __or__(self, other):
|
||||
output = UniqueIndex(self._keys)
|
||||
for v in self: output.add(v)
|
||||
for v in other: output.add(v)
|
||||
return output
|
||||
|
||||
def __len__(self):
|
||||
if self.count == 0:
|
||||
for d in self:
|
||||
self.count += 1
|
||||
return self.count
|
||||
|
||||
def subtract(self, other):
|
||||
return self.__sub__(other)
|
||||
|
||||
def intersect(self, other):
|
||||
return self.__and__(other)
|
||||
|
||||
def value2key(keys, val):
|
||||
if len(keys)==1:
|
||||
if isinstance(val, dict):
|
||||
return val[keys[0]]
|
||||
elif isinstance(val, (list, tuple)):
|
||||
return val[0]
|
||||
else:
|
||||
return val
|
||||
else:
|
||||
if isinstance(val, dict):
|
||||
return wrap({k: val[k] for k in keys})
|
||||
elif isinstance(val, (list, tuple)):
|
||||
return wrap(dict(zip(keys, val)))
|
||||
else:
|
||||
Log.error("do not know what to do here")
|
|
@ -8,10 +8,17 @@
|
|||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
from ..logs import Log
|
||||
from __future__ import unicode_literals
|
||||
import functools
|
||||
from ..struct import StructList
|
||||
from ..maths import stats
|
||||
from ..collections import MIN, MAX
|
||||
from ..env.logs import Log
|
||||
from ..maths import Math
|
||||
from ..multiset import Multiset
|
||||
from ..stats import Z_moment, stats2z_moment, z_moment2stats
|
||||
from ..collections.multiset import Multiset
|
||||
from ..maths.stats import Z_moment, z_moment2stats
|
||||
|
||||
# A VARIETY OF SLIDING WINDOW FUNCTIONS
|
||||
|
||||
|
||||
class AggregationFunction(object):
|
||||
|
@ -73,21 +80,63 @@ class WindowFunction(AggregationFunction):
|
|||
Log.error("not implemented yet")
|
||||
|
||||
|
||||
class Stats(WindowFunction):
|
||||
def __init__(self):
|
||||
object.__init__(self)
|
||||
self.total = Z_moment(0, 0, 0)
|
||||
def Stats(**kwargs):
|
||||
if not kwargs:
|
||||
return _SimpleStats
|
||||
else:
|
||||
return functools.partial(_Stats, *[], **kwargs)
|
||||
|
||||
|
||||
class _Stats(WindowFunction):
|
||||
"""
|
||||
TRACK STATS, BUT IGNORE OUTLIERS
|
||||
"""
|
||||
|
||||
def __init__(self, middle=None):
|
||||
object.__init__(self)
|
||||
self.middle = middle
|
||||
self.samples = StructList()
|
||||
|
||||
def add(self, value):
|
||||
if value == None:
|
||||
return
|
||||
self.total += stats2z_moment(value)
|
||||
self.samples.append(value)
|
||||
|
||||
def sub(self, value):
|
||||
if value == None:
|
||||
return
|
||||
self.total -= stats2z_moment(value)
|
||||
self.samples.remove(value)
|
||||
|
||||
def merge(self, agg):
|
||||
Log.error("Do not know how to handle")
|
||||
|
||||
def end(self):
|
||||
ignore = Math.ceiling(len(self.samples) * (1 - self.middle) / 2)
|
||||
if ignore * 2 >= len(self.samples):
|
||||
return stats.Stats()
|
||||
output = stats.Stats(samples=sorted(self.samples)[ignore:len(self.samples) - ignore:])
|
||||
output.samples = list(self.samples)
|
||||
return output
|
||||
|
||||
|
||||
class _SimpleStats(WindowFunction):
|
||||
"""
|
||||
AGGREGATE Stats OBJECTS, NOT JUST VALUES
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
object.__init__(self)
|
||||
self.total = Z_moment(0, 0, 0)
|
||||
|
||||
def add(self, value):
|
||||
if value == None:
|
||||
return
|
||||
self.total += Z_moment.new_instance([value])
|
||||
|
||||
def sub(self, value):
|
||||
if value == None:
|
||||
return
|
||||
self.total -= Z_moment.new_instance([value])
|
||||
|
||||
def merge(self, agg):
|
||||
self.total += agg.total
|
||||
|
@ -104,6 +153,7 @@ class Min(WindowFunction):
|
|||
|
||||
def add(self, value):
|
||||
if value == None:
|
||||
|
||||
return
|
||||
self.total.add(value)
|
||||
|
||||
|
@ -113,7 +163,7 @@ class Min(WindowFunction):
|
|||
self.total.remove(value)
|
||||
|
||||
def end(self):
|
||||
return Math.min(self.total)
|
||||
return MIN(self.total)
|
||||
|
||||
|
||||
class Max(WindowFunction):
|
||||
|
@ -133,7 +183,7 @@ class Max(WindowFunction):
|
|||
self.total.remove(value)
|
||||
|
||||
def end(self):
|
||||
return Math.max(self.total)
|
||||
return MAX(*self.total)
|
||||
|
||||
|
||||
class Count(WindowFunction):
|
||||
|
@ -174,5 +224,3 @@ class Sum(WindowFunction):
|
|||
|
||||
def end(self):
|
||||
return self.total
|
||||
|
||||
|
||||
|
|
|
@ -1,31 +0,0 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
import random
|
||||
import string
|
||||
|
||||
|
||||
SIMPLE_ALPHABET=string.ascii_letters + string.digits
|
||||
SEED=random.Random()
|
||||
|
||||
|
||||
class Random(object):
|
||||
|
||||
@staticmethod
|
||||
def string(length, alphabet=SIMPLE_ALPHABET):
|
||||
result = ''
|
||||
for i in range(0, length):
|
||||
result += SEED.choice(alphabet)
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def hex(length):
|
||||
return Random.string(length, string.digits + 'ABCDEF')
|
||||
|
||||
@staticmethod
|
||||
def int(*args):
|
||||
return random.randrange(*args)
|
||||
|
||||
@staticmethod
|
||||
def sample(data, count):
|
||||
num=len(data)
|
||||
return [data[Random.int(num)] for i in range(count)]
|
|
@ -0,0 +1,7 @@
|
|||
import re
|
||||
|
||||
|
||||
def match(pattern, text):
|
||||
result = re.match(pattern, text)
|
||||
return result.groups()
|
||||
|
|
@ -0,0 +1 @@
|
|||
__author__ = 'klahnakoski'
|
|
@ -8,20 +8,24 @@
|
|||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from datetime import datetime
|
||||
import json
|
||||
import subprocess
|
||||
from pymysql import connect
|
||||
from . import struct
|
||||
from .maths import Math
|
||||
from .strings import expand_template
|
||||
from .struct import nvl
|
||||
from .cnv import CNV
|
||||
from .logs import Log, Except
|
||||
from .queries import Q
|
||||
from .strings import indent
|
||||
from .strings import outdent
|
||||
from .files import File
|
||||
from pymysql import connect, InterfaceError
|
||||
from .. import struct
|
||||
from ..jsons import json_scrub
|
||||
from ..maths import Math
|
||||
from ..strings import expand_template
|
||||
from ..struct import nvl
|
||||
from ..structs.wraps import wrap, listwrap
|
||||
from ..cnv import CNV
|
||||
from ..env.logs import Log, Except
|
||||
from ..queries import Q
|
||||
from ..strings import indent
|
||||
from ..strings import outdent
|
||||
from ..env.files import File
|
||||
|
||||
|
||||
DEBUG = False
|
||||
|
@ -32,16 +36,26 @@ all_db = []
|
|||
|
||||
class DB(object):
|
||||
"""
|
||||
|
||||
Parameterize SQL by name rather than by position. Return records as objects
|
||||
rather than tuples.
|
||||
"""
|
||||
|
||||
def __init__(self, settings, schema=None, preamble=None):
|
||||
def __init__(self, settings, schema=None, preamble=None, readonly=False):
|
||||
"""
|
||||
OVERRIDE THE settings.schema WITH THE schema PARAMETER
|
||||
preamble WILL BE USED TO ADD COMMENTS TO THE BEGINNING OF ALL SQL
|
||||
THE INTENT IS TO HELP ADMINISTRATORS ID THE SQL RUNNING ON THE DATABASE
|
||||
|
||||
schema - NAME OF DEFAULT database/schema IN QUERIES
|
||||
|
||||
preamble - A COMMENT TO BE ADDED TO EVERY SQL STATEMENT SENT
|
||||
|
||||
readonly - USED ONLY TO INDICATE IF A TRANSACTION WILL BE OPENED UPON
|
||||
USE IN with CLAUSE, YOU CAN STILL SEND UPDATES, BUT MUST OPEN A
|
||||
TRANSACTION BEFORE YOU DO
|
||||
"""
|
||||
if settings == None:
|
||||
Log.warning("No settings provided")
|
||||
return
|
||||
|
||||
all_db.append(self)
|
||||
|
@ -50,7 +64,7 @@ class DB(object):
|
|||
settings = settings.settings
|
||||
|
||||
self.settings = settings.copy()
|
||||
self.settings.schema = nvl(schema, self.settings.schema)
|
||||
self.settings.schema = nvl(schema, self.settings.schema, self.settings.database)
|
||||
|
||||
preamble = nvl(preamble, self.settings.preamble)
|
||||
if preamble == None:
|
||||
|
@ -58,6 +72,7 @@ class DB(object):
|
|||
else:
|
||||
self.preamble = indent(preamble, "# ").strip() + "\n"
|
||||
|
||||
self.readonly = readonly
|
||||
self.debug = nvl(self.settings.debug, DEBUG)
|
||||
self._open()
|
||||
|
||||
|
@ -74,7 +89,10 @@ class DB(object):
|
|||
use_unicode=True
|
||||
)
|
||||
except Exception, e:
|
||||
Log.error(u"Failure to connect", e)
|
||||
if self.settings.host.find("://") == -1:
|
||||
Log.error(u"Failure to connect", e)
|
||||
else:
|
||||
Log.error(u"Failure to connect. PROTOCOL PREFIX IS PROBABLY BAD", e)
|
||||
self.cursor = None
|
||||
self.partial_rollback = False
|
||||
self.transaction_level = 0
|
||||
|
@ -82,17 +100,22 @@ class DB(object):
|
|||
|
||||
|
||||
def __enter__(self):
|
||||
self.begin()
|
||||
if not self.readonly:
|
||||
self.begin()
|
||||
return self
|
||||
|
||||
def __exit__(self, type, value, traceback):
|
||||
if self.readonly:
|
||||
self.close()
|
||||
return
|
||||
|
||||
if isinstance(value, BaseException):
|
||||
try:
|
||||
if self.cursor: self.cursor.close()
|
||||
self.cursor = None
|
||||
self.rollback()
|
||||
except Exception, e:
|
||||
Log.warning(u"can not rollback()", e)
|
||||
Log.warning(u"can not rollback()", [value, e])
|
||||
finally:
|
||||
self.close()
|
||||
return
|
||||
|
@ -112,14 +135,15 @@ class DB(object):
|
|||
return Transaction(self)
|
||||
|
||||
def begin(self):
|
||||
if self.transaction_level == 0: self.cursor = self.db.cursor()
|
||||
if self.transaction_level == 0:
|
||||
self.cursor = self.db.cursor()
|
||||
self.transaction_level += 1
|
||||
self.execute("SET TIME_ZONE='+00:00'")
|
||||
|
||||
|
||||
def close(self):
|
||||
if self.transaction_level > 0:
|
||||
Log.error(u"expecting commit() or rollback() before close")
|
||||
Log.error("expecting commit() or rollback() before close")
|
||||
self.cursor = None #NOT NEEDED
|
||||
try:
|
||||
self.db.close()
|
||||
|
@ -127,7 +151,7 @@ class DB(object):
|
|||
if e.message.find("Already closed") >= 0:
|
||||
return
|
||||
|
||||
Log.warning(u"can not close()", e)
|
||||
Log.warning("can not close()", e)
|
||||
finally:
|
||||
all_db.remove(self)
|
||||
|
||||
|
@ -139,17 +163,17 @@ class DB(object):
|
|||
self.rollback()
|
||||
except Exception:
|
||||
pass
|
||||
Log.error(u"Error while processing backlog", e)
|
||||
Log.error("Error while processing backlog", e)
|
||||
|
||||
if self.transaction_level == 0:
|
||||
Log.error(u"No transaction has begun")
|
||||
Log.error("No transaction has begun")
|
||||
elif self.transaction_level == 1:
|
||||
if self.partial_rollback:
|
||||
try:
|
||||
self.rollback()
|
||||
except Exception:
|
||||
pass
|
||||
Log.error(u"Commit after nested rollback is not allowed")
|
||||
Log.error("Commit after nested rollback is not allowed")
|
||||
else:
|
||||
if self.cursor: self.cursor.close()
|
||||
self.cursor = None
|
||||
|
@ -161,18 +185,18 @@ class DB(object):
|
|||
try:
|
||||
self.commit()
|
||||
except Exception, e:
|
||||
Log.error(u"Can not flush", e)
|
||||
Log.error("Can not flush", e)
|
||||
|
||||
try:
|
||||
self.begin()
|
||||
except Exception, e:
|
||||
Log.error(u"Can not flush", e)
|
||||
Log.error("Can not flush", e)
|
||||
|
||||
|
||||
def rollback(self):
|
||||
self.backlog = [] #YAY! FREE!
|
||||
if self.transaction_level == 0:
|
||||
Log.error(u"No transaction has begun")
|
||||
Log.error("No transaction has begun")
|
||||
elif self.transaction_level == 1:
|
||||
self.transaction_level -= 1
|
||||
if self.cursor != None:
|
||||
|
@ -182,7 +206,7 @@ class DB(object):
|
|||
else:
|
||||
self.transaction_level -= 1
|
||||
self.partial_rollback = True
|
||||
Log.warning(u"Can not perform partial rollback!")
|
||||
Log.warning("Can not perform partial rollback!")
|
||||
|
||||
|
||||
def call(self, proc_name, params):
|
||||
|
@ -193,23 +217,29 @@ class DB(object):
|
|||
self.cursor.close()
|
||||
self.cursor = self.db.cursor()
|
||||
except Exception, e:
|
||||
Log.error(u"Problem calling procedure " + proc_name, e)
|
||||
Log.error("Problem calling procedure " + proc_name, e)
|
||||
|
||||
|
||||
def query(self, sql, param=None):
|
||||
"""
|
||||
RETURN RESULTS IN [row_num][column] GRID
|
||||
"""
|
||||
self._execute_backlog()
|
||||
try:
|
||||
old_cursor = self.cursor
|
||||
if not old_cursor: #ALLOW NON-TRANSACTIONAL READS
|
||||
self.cursor = self.db.cursor()
|
||||
self.cursor.execute("SET TIME_ZONE='+00:00'")
|
||||
self.cursor.close()
|
||||
self.cursor = self.db.cursor()
|
||||
|
||||
if param: sql = expand_template(sql, self.quote_param(param))
|
||||
if param:
|
||||
sql = expand_template(sql, self.quote_param(param))
|
||||
sql = self.preamble + outdent(sql)
|
||||
if self.debug:
|
||||
Log.note(u"Execute SQL:\n{{sql}}", {u"sql": indent(sql)})
|
||||
Log.note("Execute SQL:\n{{sql}}", {"sql": indent(sql)})
|
||||
|
||||
self.cursor.execute(sql)
|
||||
|
||||
columns = [utf8_to_unicode(d[0]) for d in nvl(self.cursor.description, [])]
|
||||
fixed = [[utf8_to_unicode(c) for c in row] for row in self.cursor]
|
||||
result = CNV.table2list(columns, fixed)
|
||||
|
@ -220,9 +250,45 @@ class DB(object):
|
|||
|
||||
return result
|
||||
except Exception, e:
|
||||
if e.message.find("InterfaceError") >= 0:
|
||||
Log.error(u"Did you close the db connection?", e)
|
||||
Log.error(u"Problem executing SQL:\n" + indent(sql.strip()), e, offset=1)
|
||||
if isinstance(e, InterfaceError) or e.message.find("InterfaceError") >= 0:
|
||||
Log.error("Did you close the db connection?", e)
|
||||
Log.error("Problem executing SQL:\n" + indent(sql.strip()), e, offset=1)
|
||||
|
||||
def column_query(self, sql, param=None):
|
||||
"""
|
||||
RETURN RESULTS IN [column][row_num] GRID
|
||||
"""
|
||||
self._execute_backlog()
|
||||
try:
|
||||
old_cursor = self.cursor
|
||||
if not old_cursor: #ALLOW NON-TRANSACTIONAL READS
|
||||
self.cursor = self.db.cursor()
|
||||
self.cursor.execute("SET TIME_ZONE='+00:00'")
|
||||
self.cursor.close()
|
||||
self.cursor = self.db.cursor()
|
||||
|
||||
if param:
|
||||
sql = expand_template(sql, self.quote_param(param))
|
||||
sql = self.preamble + outdent(sql)
|
||||
if self.debug:
|
||||
Log.note("Execute SQL:\n{{sql}}", {"sql": indent(sql)})
|
||||
|
||||
self.cursor.execute(sql)
|
||||
grid = [[utf8_to_unicode(c) for c in row] for row in self.cursor]
|
||||
# columns = [utf8_to_unicode(d[0]) for d in nvl(self.cursor.description, [])]
|
||||
result = zip(*grid)
|
||||
|
||||
if not old_cursor: #CLEANUP AFTER NON-TRANSACTIONAL READS
|
||||
self.cursor.close()
|
||||
self.cursor = None
|
||||
|
||||
return result
|
||||
except Exception, e:
|
||||
if isinstance(e, InterfaceError) or e.message.find("InterfaceError") >= 0:
|
||||
Log.error("Did you close the db connection?", e)
|
||||
Log.error("Problem executing SQL:\n" + indent(sql.strip()), e, offset=1)
|
||||
|
||||
|
||||
|
||||
|
||||
# EXECUTE GIVEN METHOD FOR ALL ROWS RETURNED
|
||||
|
@ -236,30 +302,31 @@ class DB(object):
|
|||
if not old_cursor: #ALLOW NON-TRANSACTIONAL READS
|
||||
self.cursor = self.db.cursor()
|
||||
|
||||
if param: sql = expand_template(sql, self.quote_param(param))
|
||||
if param:
|
||||
sql = expand_template(sql, self.quote_param(param))
|
||||
sql = self.preamble + outdent(sql)
|
||||
if self.debug:
|
||||
Log.note(u"Execute SQL:\n{{sql}}", {u"sql": indent(sql)})
|
||||
Log.note("Execute SQL:\n{{sql}}", {"sql": indent(sql)})
|
||||
self.cursor.execute(sql)
|
||||
|
||||
columns = tuple([utf8_to_unicode(d[0]) for d in self.cursor.description])
|
||||
for r in self.cursor:
|
||||
num += 1
|
||||
_execute(struct.wrap(dict(zip(columns, [utf8_to_unicode(c) for c in r]))))
|
||||
_execute(wrap(dict(zip(columns, [utf8_to_unicode(c) for c in r]))))
|
||||
|
||||
if not old_cursor: #CLEANUP AFTER NON-TRANSACTIONAL READS
|
||||
self.cursor.close()
|
||||
self.cursor = None
|
||||
|
||||
except Exception, e:
|
||||
Log.error(u"Problem executing SQL:\n" + indent(sql.strip()), e, offset=1)
|
||||
Log.error("Problem executing SQL:\n" + indent(sql.strip()), e, offset=1)
|
||||
|
||||
return num
|
||||
|
||||
|
||||
def execute(self, sql, param=None):
|
||||
if self.transaction_level == 0:
|
||||
Log.error(u"Expecting transaction to be started before issuing queries")
|
||||
Log.error("Expecting transaction to be started before issuing queries")
|
||||
|
||||
if param:
|
||||
sql = expand_template(sql, self.quote_param(param))
|
||||
|
@ -284,29 +351,33 @@ class DB(object):
|
|||
# MWe have no way to execute an entire SQL file in bulk, so we
|
||||
# have to shell out to the commandline client.
|
||||
args = [
|
||||
u"mysql",
|
||||
u"-h{0}".format(settings.host),
|
||||
u"-u{0}".format(settings.username),
|
||||
u"-p{0}".format(settings.password),
|
||||
u"{0}".format(settings.schema)
|
||||
"mysql",
|
||||
"-h{0}".format(settings.host),
|
||||
"-u{0}".format(settings.username),
|
||||
"-p{0}".format(settings.password),
|
||||
"{0}".format(settings.schema)
|
||||
]
|
||||
|
||||
proc = subprocess.Popen(
|
||||
args,
|
||||
stdin=subprocess.PIPE,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT,
|
||||
bufsize=-1
|
||||
)
|
||||
(output, _) = proc.communicate(sql)
|
||||
try:
|
||||
proc = subprocess.Popen(
|
||||
args,
|
||||
stdin=subprocess.PIPE,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT,
|
||||
bufsize=-1
|
||||
)
|
||||
if isinstance(sql, unicode):
|
||||
sql = sql.encode("utf8")
|
||||
(output, _) = proc.communicate(sql)
|
||||
except Exception, e:
|
||||
Log.error("Can not call \"mysql\"", e)
|
||||
|
||||
if proc.returncode:
|
||||
if len(sql) > 10000:
|
||||
sql = u"<" + unicode(len(sql)) + u" bytes of sql>"
|
||||
Log.error(u"Unable to execute sql: return code {{return_code}}, {{output}}:\n {{sql}}\n", {
|
||||
u"sql": indent(sql),
|
||||
u"return_code": proc.returncode,
|
||||
u"output": output
|
||||
sql = "<" + unicode(len(sql)) + " bytes of sql>"
|
||||
Log.error("Unable to execute sql: return code {{return_code}}, {{output}}:\n {{sql}}\n", {
|
||||
"sql": indent(sql),
|
||||
"return_code": proc.returncode,
|
||||
"output": output
|
||||
})
|
||||
|
||||
@staticmethod
|
||||
|
@ -321,31 +392,31 @@ class DB(object):
|
|||
if not self.backlog: return
|
||||
|
||||
(backlog, self.backlog) = (self.backlog, [])
|
||||
if self.db.__module__.startswith(u"pymysql"):
|
||||
if self.db.__module__.startswith("pymysql"):
|
||||
# BUG IN PYMYSQL: CAN NOT HANDLE MULTIPLE STATEMENTS
|
||||
# https://github.com/PyMySQL/PyMySQL/issues/157
|
||||
for b in backlog:
|
||||
sql = self.preamble + b
|
||||
try:
|
||||
if self.debug:
|
||||
Log.note(u"Execute SQL:\n{{sql|indent}}", {u"sql": sql})
|
||||
Log.note("Execute SQL:\n{{sql|indent}}", {"sql": sql})
|
||||
self.cursor.execute(b)
|
||||
except Exception, e:
|
||||
Log.error(u"Can not execute sql:\n{{sql}}", {u"sql": sql}, e)
|
||||
Log.error("Can not execute sql:\n{{sql}}", {"sql": sql}, e)
|
||||
|
||||
self.cursor.close()
|
||||
self.cursor = self.db.cursor()
|
||||
else:
|
||||
for i, g in Q.groupby(backlog, size=MAX_BATCH_SIZE):
|
||||
sql = self.preamble + u";\n".join(g)
|
||||
sql = self.preamble + ";\n".join(g)
|
||||
try:
|
||||
if self.debug:
|
||||
Log.note(u"Execute block of SQL:\n{{sql|indent}}", {u"sql": sql})
|
||||
Log.note("Execute block of SQL:\n{{sql|indent}}", {"sql": sql})
|
||||
self.cursor.execute(sql)
|
||||
self.cursor.close()
|
||||
self.cursor = self.db.cursor()
|
||||
except Exception, e:
|
||||
Log.error(u"Problem executing SQL:\n{{sql}}", {u"sql": indent(sql.strip())}, e, offset=1)
|
||||
Log.error("Problem executing SQL:\n{{sql}}", {"sql": indent(sql.strip())}, e, offset=1)
|
||||
|
||||
|
||||
## Insert dictionary of values into table
|
||||
|
@ -353,28 +424,28 @@ class DB(object):
|
|||
keys = record.keys()
|
||||
|
||||
try:
|
||||
command = u"INSERT INTO " + self.quote_column(table_name) + u"(" + \
|
||||
u",".join([self.quote_column(k) for k in keys]) + \
|
||||
u") VALUES (" + \
|
||||
u",".join([self.quote_value(record[k]) for k in keys]) + \
|
||||
u")"
|
||||
command = "INSERT INTO " + self.quote_column(table_name) + "(" + \
|
||||
",".join([self.quote_column(k) for k in keys]) + \
|
||||
") VALUES (" + \
|
||||
",".join([self.quote_value(record[k]) for k in keys]) + \
|
||||
")"
|
||||
|
||||
self.execute(command)
|
||||
except Exception, e:
|
||||
Log.error(u"problem with record: {{record}}", {u"record": record}, e)
|
||||
Log.error("problem with record: {{record}}", {"record": record}, e)
|
||||
|
||||
# candidate_key IS LIST OF COLUMNS THAT CAN BE USED AS UID (USUALLY PRIMARY KEY)
|
||||
# ONLY INSERT IF THE candidate_key DOES NOT EXIST YET
|
||||
def insert_new(self, table_name, candidate_key, new_record):
|
||||
candidate_key = struct.listwrap(candidate_key)
|
||||
candidate_key = listwrap(candidate_key)
|
||||
|
||||
condition = u" AND\n".join([self.quote_column(k) + u"=" + self.quote_value(new_record[k]) if new_record[k] != None else self.quote_column(k) + u" IS Null" for k in candidate_key])
|
||||
command = u"INSERT INTO " + self.quote_column(table_name) + u" (" + \
|
||||
u",".join([self.quote_column(k) for k in new_record.keys()]) + \
|
||||
u")\n" + \
|
||||
u"SELECT a.* FROM (SELECT " + u",".join([self.quote_value(v) + u" " + self.quote_column(k) for k, v in new_record.items()]) + u" FROM DUAL) a\n" + \
|
||||
u"LEFT JOIN " + \
|
||||
u"(SELECT 'dummy' exist FROM " + self.quote_column(table_name) + u" WHERE " + condition + u" LIMIT 1) b ON 1=1 WHERE exist IS Null"
|
||||
condition = " AND\n".join([self.quote_column(k) + "=" + self.quote_value(new_record[k]) if new_record[k] != None else self.quote_column(k) + " IS Null" for k in candidate_key])
|
||||
command = "INSERT INTO " + self.quote_column(table_name) + " (" + \
|
||||
",".join([self.quote_column(k) for k in new_record.keys()]) + \
|
||||
")\n" + \
|
||||
"SELECT a.* FROM (SELECT " + ",".join([self.quote_value(v) + " " + self.quote_column(k) for k, v in new_record.items()]) + " FROM DUAL) a\n" + \
|
||||
"LEFT JOIN " + \
|
||||
"(SELECT 'dummy' exist FROM " + self.quote_column(table_name) + " WHERE " + condition + " LIMIT 1) b ON 1=1 WHERE exist IS Null"
|
||||
self.execute(command, {})
|
||||
|
||||
|
||||
|
@ -395,15 +466,15 @@ class DB(object):
|
|||
|
||||
try:
|
||||
command = \
|
||||
u"INSERT INTO " + self.quote_column(table_name) + u"(" + \
|
||||
u",".join([self.quote_column(k) for k in keys]) + \
|
||||
u") VALUES " + ",".join([
|
||||
"(" + u",".join([self.quote_value(r[k]) for k in keys]) + u")"
|
||||
"INSERT INTO " + self.quote_column(table_name) + "(" + \
|
||||
",".join([self.quote_column(k) for k in keys]) + \
|
||||
") VALUES " + ",\n".join([
|
||||
"(" + ",".join([self.quote_value(r[k]) for k in keys]) + ")"
|
||||
for r in records
|
||||
])
|
||||
self.execute(command)
|
||||
except Exception, e:
|
||||
Log.error(u"problem with record: {{record}}", {u"record": records}, e)
|
||||
Log.error("problem with record: {{record}}", {"record": records}, e)
|
||||
|
||||
|
||||
def update(self, table_name, where_slice, new_values):
|
||||
|
@ -412,15 +483,15 @@ class DB(object):
|
|||
"""
|
||||
new_values = self.quote_param(new_values)
|
||||
|
||||
where_clause = u" AND\n".join([
|
||||
self.quote_column(k) + u"=" + self.quote_value(v) if v != None else self.quote_column(k) + " IS NULL"
|
||||
where_clause = " AND\n".join([
|
||||
self.quote_column(k) + "=" + self.quote_value(v) if v != None else self.quote_column(k) + " IS NULL"
|
||||
for k, v in where_slice.items()]
|
||||
)
|
||||
|
||||
command = u"UPDATE " + self.quote_column(table_name) + u"\n" + \
|
||||
u"SET " + \
|
||||
u",\n".join([self.quote_column(k) + u"=" + v for k, v in new_values.items()]) + u"\n" + \
|
||||
u"WHERE " + \
|
||||
command = "UPDATE " + self.quote_column(table_name) + "\n" + \
|
||||
"SET " + \
|
||||
",\n".join([self.quote_column(k) + "=" + v for k, v in new_values.items()]) + "\n" + \
|
||||
"WHERE " + \
|
||||
where_clause
|
||||
self.execute(command, {})
|
||||
|
||||
|
@ -445,17 +516,17 @@ class DB(object):
|
|||
elif isinstance(value, basestring):
|
||||
return self.db.literal(value)
|
||||
elif isinstance(value, datetime):
|
||||
return u"str_to_date('" + value.strftime(u"%Y%m%d%H%M%S") + u"', '%Y%m%d%H%i%s')"
|
||||
return "str_to_date('" + value.strftime("%Y%m%d%H%M%S") + "', '%Y%m%d%H%i%s')"
|
||||
elif hasattr(value, '__iter__'):
|
||||
return self.db.literal(CNV.object2JSON(value))
|
||||
return self.db.literal(json_encode(value))
|
||||
elif isinstance(value, dict):
|
||||
return self.db.literal(CNV.object2JSON(value))
|
||||
return self.db.literal(json_encode(value))
|
||||
elif Math.is_number(value):
|
||||
return unicode(value)
|
||||
else:
|
||||
return self.db.literal(value)
|
||||
except Exception, e:
|
||||
Log.error(u"problem quoting SQL", e)
|
||||
Log.error("problem quoting SQL", e)
|
||||
|
||||
|
||||
def quote_sql(self, value, param=None):
|
||||
|
@ -471,121 +542,42 @@ class DB(object):
|
|||
elif isinstance(value, basestring):
|
||||
return value
|
||||
elif isinstance(value, dict):
|
||||
return self.db.literal(CNV.object2JSON(value))
|
||||
return self.db.literal(json_encode(value))
|
||||
elif hasattr(value, '__iter__'):
|
||||
return u"(" + u",".join([self.quote_sql(vv) for vv in value]) + u")"
|
||||
return "(" + ",".join([self.quote_sql(vv) for vv in value]) + ")"
|
||||
else:
|
||||
return unicode(value)
|
||||
except Exception, e:
|
||||
Log.error(u"problem quoting SQL", e)
|
||||
Log.error("problem quoting SQL", e)
|
||||
|
||||
def quote_column(self, column_name, table=None):
|
||||
if isinstance(column_name, basestring):
|
||||
if table:
|
||||
column_name = table + "." + column_name
|
||||
return SQL(u"`" + column_name.replace(u".", u"`.`") + u"`") #MY SQL QUOTE OF COLUMN NAMES
|
||||
return SQL("`" + column_name.replace(".", "`.`") + "`") #MY SQL QUOTE OF COLUMN NAMES
|
||||
elif isinstance(column_name, list):
|
||||
if table:
|
||||
return SQL(u", ".join([self.quote_column(table + "." + c) for c in column_name]))
|
||||
return SQL(u", ".join([self.quote_column(c) for c in column_name]))
|
||||
return SQL(", ".join([self.quote_column(table + "." + c) for c in column_name]))
|
||||
return SQL(", ".join([self.quote_column(c) for c in column_name]))
|
||||
else:
|
||||
#ASSUME {u"name":name, u"value":value} FORM
|
||||
return SQL(column_name.value + u" AS " + self.quote_column(column_name.name))
|
||||
#ASSUME {"name":name, "value":value} FORM
|
||||
return SQL(column_name.value + " AS " + self.quote_column(column_name.name))
|
||||
|
||||
def sort2sqlorderby(self, sort):
|
||||
sort = Q.normalize_sort(sort)
|
||||
return u",\n".join([self.quote_column(s.field) + (" DESC" if s.sort == -1 else " ASC") for s in sort])
|
||||
sort = Q.normalize_sort_parameters(sort)
|
||||
return ",\n".join([self.quote_column(s.field) + (" DESC" if s.sort == -1 else " ASC") for s in sort])
|
||||
|
||||
def esfilter2sqlwhere(self, esfilter):
|
||||
return SQL(self._filter2where(esfilter))
|
||||
|
||||
def isolate(self, separator, list):
|
||||
if len(list) > 1:
|
||||
return u"(\n" + indent((" " + separator + "\n").join(list)) + u"\n)"
|
||||
else:
|
||||
return list[0]
|
||||
|
||||
def _filter2where(self, esfilter):
|
||||
esfilter = struct.wrap(esfilter)
|
||||
|
||||
if esfilter[u"and"]:
|
||||
return self.isolate("AND", [self._filter2where(a) for a in esfilter[u"and"]])
|
||||
elif esfilter[u"or"]:
|
||||
return self.isolate("OR", [self._filter2where(a) for a in esfilter[u"or"]])
|
||||
elif esfilter[u"not"]:
|
||||
return u"NOT (" + self._filter2where(esfilter[u"not"]) + u")"
|
||||
elif esfilter.term:
|
||||
return self.isolate("AND", [self.quote_column(col) + u"=" + self.quote_value(val) for col, val in esfilter.term.items()])
|
||||
elif esfilter.terms:
|
||||
for col, v in esfilter.terms.items():
|
||||
try:
|
||||
int_list = CNV.value2intlist(v)
|
||||
has_null = False
|
||||
for vv in v:
|
||||
if vv == None:
|
||||
has_null = True
|
||||
break
|
||||
if int_list:
|
||||
filter = int_list_packer(col, int_list)
|
||||
if has_null:
|
||||
return self._filter2where({"or": [{"missing": col}, filter]})
|
||||
else:
|
||||
return self._filter2where(filter)
|
||||
else:
|
||||
if has_null:
|
||||
return self._filter2where({"missing": col})
|
||||
else:
|
||||
return "false"
|
||||
except Exception, e:
|
||||
if not hasattr(e, "contains") or not e.contains("no packing possible"):
|
||||
Log.warning("Not an int-list: {{list}}", {"list": v}, e)
|
||||
return self.quote_column(col) + u" in (" + ", ".join([self.quote_value(val) for val in v]) + ")"
|
||||
elif esfilter.script:
|
||||
return u"(" + esfilter.script + u")"
|
||||
elif esfilter.range:
|
||||
name2sign = {
|
||||
u"gt": u">",
|
||||
u"gte": u">=",
|
||||
u"lte": u"<=",
|
||||
u"lt": u"<"
|
||||
}
|
||||
|
||||
def single(col, r):
|
||||
min = nvl(r["gte"], r[">="])
|
||||
max = nvl(r["lte"], r["<="])
|
||||
if min and max:
|
||||
#SPECIAL CASE (BETWEEN)
|
||||
return self.quote_column(col) + u" BETWEEN " + self.quote_value(min) + u" AND " + self.quote_value(max)
|
||||
else:
|
||||
return " AND ".join(
|
||||
self.quote_column(col) + name2sign[sign] + self.quote_value(value)
|
||||
for sign, value in r.items()
|
||||
)
|
||||
|
||||
output = self.isolate("AND", [single(col, ranges) for col, ranges in esfilter.range.items()])
|
||||
return output
|
||||
elif esfilter.missing:
|
||||
if isinstance(esfilter.missing, basestring):
|
||||
return u"(" + self.quote_column(esfilter.missing) + u" IS Null)"
|
||||
else:
|
||||
return u"(" + self.quote_column(esfilter.missing.field) + u" IS Null)"
|
||||
elif esfilter.exists:
|
||||
if isinstance(esfilter.exists, basestring):
|
||||
return u"(" + self.quote_column(esfilter.exists) + u" IS NOT Null)"
|
||||
else:
|
||||
return u"(" + self.quote_column(esfilter.exists.field) + u" IS NOT Null)"
|
||||
else:
|
||||
Log.error(u"Can not convert esfilter to SQL: {{esfilter}}", {u"esfilter": esfilter})
|
||||
|
||||
|
||||
def utf8_to_unicode(v):
|
||||
try:
|
||||
if isinstance(v, str):
|
||||
return v.decode(u"utf8")
|
||||
return v.decode("utf8")
|
||||
else:
|
||||
return v
|
||||
except Exception, e:
|
||||
Log.error(u"not expected", e)
|
||||
Log.error("not expected", e)
|
||||
|
||||
|
||||
#ACTUAL SQL, DO NOT QUOTE THIS STRING
|
||||
|
@ -595,8 +587,12 @@ class SQL(unicode):
|
|||
self.template = template
|
||||
self.param = param
|
||||
|
||||
@property
|
||||
def sql(self):
|
||||
return expand_template(self.template, self.param)
|
||||
|
||||
def __str__(self):
|
||||
Log.error(u"do not do this")
|
||||
Log.error("do not do this")
|
||||
|
||||
|
||||
def int_list_packer(term, values):
|
||||
|
@ -616,7 +612,7 @@ def int_list_packer(term, values):
|
|||
curr_start = last
|
||||
curr_excl = set()
|
||||
|
||||
for v in sorted[1:]:
|
||||
for v in sorted[1::]:
|
||||
if v <= last + 1:
|
||||
pass
|
||||
elif v - last > 3:
|
||||
|
@ -693,3 +689,25 @@ class Transaction(object):
|
|||
self.db.rollback()
|
||||
else:
|
||||
self.db.commit()
|
||||
|
||||
|
||||
json_encoder = json.JSONEncoder(
|
||||
skipkeys=False,
|
||||
ensure_ascii=False, # DIFF FROM DEFAULTS
|
||||
check_circular=True,
|
||||
allow_nan=True,
|
||||
indent=None,
|
||||
separators=None,
|
||||
encoding='utf-8',
|
||||
default=None,
|
||||
sort_keys=True # <-- SEE?! sort_keys==True
|
||||
)
|
||||
|
||||
|
||||
def json_encode(value):
|
||||
"""
|
||||
FOR PUTTING JSON INTO DATABASE (sort_keys=True)
|
||||
dicts CAN BE USED AS KEYS
|
||||
"""
|
||||
return unicode(json_encoder.encode(json_scrub(value)))
|
||||
|
|
@ -8,26 +8,29 @@
|
|||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
|
||||
from dzAlerts.util import struct
|
||||
from __future__ import unicode_literals
|
||||
from ..queries.db_query import esfilter2sqlwhere
|
||||
from ..structs.wraps import wrap
|
||||
|
||||
|
||||
def find_holes(db, table_name, column_name, filter, _range):
|
||||
def find_holes(db, table_name, column_name, _range, filter=None):
|
||||
"""
|
||||
FIND HOLES IN A DENSE COLUMN OF INTEGERS
|
||||
RETURNS A LIST OF {"min"min, "max":max} OBJECTS
|
||||
"""
|
||||
if not filter:
|
||||
filter = {"match_all": {}}
|
||||
|
||||
_range = struct.wrap(_range)
|
||||
_range = wrap(_range)
|
||||
params = {
|
||||
"min": _range.min,
|
||||
"max": _range.max - 1,
|
||||
"column_name": db.quote_column(column_name),
|
||||
"table_name": db.quote_column(table_name),
|
||||
"filter": db.esfilter2sqlwhere(filter)
|
||||
"filter": esfilter2sqlwhere(db, filter)
|
||||
}
|
||||
|
||||
min_max=db.query("""
|
||||
min_max = db.query("""
|
||||
SELECT
|
||||
min({{column_name}}) `min`,
|
||||
max({{column_name}})+1 `max`
|
||||
|
@ -38,7 +41,6 @@ def find_holes(db, table_name, column_name, filter, _range):
|
|||
{{filter}}
|
||||
""", params)[0]
|
||||
|
||||
|
||||
db.execute("SET @last={{min}}-1", {"min": _range.min})
|
||||
ranges = db.query("""
|
||||
SELECT
|
||||
|
@ -61,7 +63,6 @@ def find_holes(db, table_name, column_name, filter, _range):
|
|||
diff>1
|
||||
""", params)
|
||||
|
||||
|
||||
if ranges:
|
||||
ranges.append({"min": min_max.max, "max": _range.max})
|
||||
else:
|
||||
|
@ -71,5 +72,4 @@ def find_holes(db, table_name, column_name, filter, _range):
|
|||
else:
|
||||
ranges.append(_range)
|
||||
|
||||
|
||||
return ranges
|
|
@ -1,77 +0,0 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
import argparse
|
||||
import struct
|
||||
from .struct import listwrap
|
||||
from .cnv import CNV
|
||||
from .logs import Log
|
||||
from .files import File
|
||||
|
||||
|
||||
#PARAMETERS MATCH argparse.ArgumentParser.add_argument()
|
||||
#http://docs.python.org/dev/library/argparse.html#the-add-argument-method
|
||||
#name or flags - Either a name or a list of option strings, e.g. foo or -f, --foo.
|
||||
#action - The basic type of action to be taken when this argument is encountered at the command line.
|
||||
#nargs - The number of command-line arguments that should be consumed.
|
||||
#const - A constant value required by some action and nargs selections.
|
||||
#default - The value produced if the argument is absent from the command line.
|
||||
#type - The type to which the command-line argument should be converted.
|
||||
#choices - A container of the allowable values for the argument.
|
||||
#required - Whether or not the command-line option may be omitted (optionals only).
|
||||
#help - A brief description of what the argument does.
|
||||
#metavar - A name for the argument in usage messages.
|
||||
#dest - The name of the attribute to be added to the object returned by parse_args().
|
||||
|
||||
def _argparse(defs):
|
||||
parser = argparse.ArgumentParser()
|
||||
for d in listwrap(defs):
|
||||
args = d.copy()
|
||||
name = args.name
|
||||
args.name = None
|
||||
parser.add_argument(*listwrap(name).list, **args.dict)
|
||||
namespace = parser.parse_args()
|
||||
output = {k: getattr(namespace, k) for k in vars(namespace)}
|
||||
return struct.wrap(output)
|
||||
|
||||
|
||||
def read_settings(filename=None, defs=None):
|
||||
# READ SETTINGS
|
||||
if filename:
|
||||
settings_file = File(filename)
|
||||
if not settings_file.exists:
|
||||
Log.error("Can not file settings file {{filename}}", {
|
||||
"filename": settings_file.abspath
|
||||
})
|
||||
json = settings_file.read()
|
||||
settings = CNV.JSON2object(json, flexible=True)
|
||||
if defs:
|
||||
settings.args = _argparse(defs)
|
||||
return settings
|
||||
else:
|
||||
defs = listwrap(defs)
|
||||
defs.append({
|
||||
"name": ["--settings", "--settings-file", "--settings_file"],
|
||||
"help": "path to JSON file with settings",
|
||||
"type": str,
|
||||
"dest": "filename",
|
||||
"default": "./settings.json",
|
||||
"required": False
|
||||
})
|
||||
args = _argparse(defs)
|
||||
settings_file = File(args.filename)
|
||||
if not settings_file.exists:
|
||||
Log.error("Can not file settings file {{filename}}", {
|
||||
"filename": settings_file.abspath
|
||||
})
|
||||
json = settings_file.read()
|
||||
settings = CNV.JSON2object(json, flexible=True)
|
||||
settings.args = args
|
||||
return settings
|
|
@ -1,185 +0,0 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
from math import sqrt
|
||||
from .cnv import CNV
|
||||
from .struct import nvl, Struct, Null
|
||||
from .logs import Log
|
||||
|
||||
|
||||
DEBUG=True
|
||||
EPSILON=0.000001
|
||||
|
||||
def stats2z_moment(stats):
|
||||
# MODIFIED FROM http://statsmodels.sourceforge.net/devel/_modules/statsmodels/stats/moment_helpers.html
|
||||
# ADDED count
|
||||
# FIXED ERROR IN COEFFICIENTS
|
||||
mc0, mc1, mc2, skew, kurt = (stats.count, stats.mean, stats.variance, stats.skew, stats.kurtosis)
|
||||
|
||||
mz0 = mc0
|
||||
mz1 = mc1 * mc0
|
||||
mz2 = (mc2 + mc1*mc1)*mc0
|
||||
mc3 = skew*(mc2**1.5) # 3rd central moment
|
||||
mz3 = (mc3 + 3*mc1*mc2 - mc1**3)*mc0 # 3rd non-central moment
|
||||
mc4 = (kurt+3.0)*(mc2**2.0) # 4th central moment
|
||||
mz4 = (mc4 + 4*mc1*mc3 + 6*mc1*mc1*mc2 + mc1**4) * mc0
|
||||
|
||||
m=Z_moment(stats.count, mz1, mz2, mz3, mz4)
|
||||
if DEBUG:
|
||||
v = z_moment2stats(m, unbiased=False)
|
||||
if not closeEnough(v.count, stats.count): Log.error("convertion error")
|
||||
if not closeEnough(v.mean, stats.mean): Log.error("convertion error")
|
||||
if not closeEnough(v.variance, stats.variance):
|
||||
Log.error("convertion error")
|
||||
|
||||
return m
|
||||
|
||||
def closeEnough(a, b):
|
||||
if abs(a-b)<=EPSILON*(abs(a)+abs(b)+1): return True
|
||||
return False
|
||||
|
||||
|
||||
def z_moment2stats(z_moment, unbiased=True):
|
||||
free=0
|
||||
if unbiased: free=1
|
||||
N=z_moment.S[0]
|
||||
|
||||
if N==0: return Stats()
|
||||
|
||||
return Stats(
|
||||
count=N,
|
||||
mean=z_moment.S[1] / N if N > 0 else float('nan'),
|
||||
variance=(z_moment.S[2] - (z_moment.S[1] ** 2) / N) / (N - free) if N - free > 0 else float('nan'),
|
||||
unbiased=unbiased
|
||||
)
|
||||
|
||||
|
||||
|
||||
class Stats(Struct):
|
||||
|
||||
def __init__(self, **args):
|
||||
Struct.__init__(self)
|
||||
if "count" not in args:
|
||||
self.count=0
|
||||
self.mean=0
|
||||
self.variance=0
|
||||
self.skew=0
|
||||
self.kurtosis=0
|
||||
elif "mean" not in args:
|
||||
self.count=args["count"]
|
||||
self.mean=0
|
||||
self.variance=0
|
||||
self.skew=0
|
||||
self.kurtosis=0
|
||||
elif "variance" not in args and "std" not in args:
|
||||
self.count=args["count"]
|
||||
self.mean=args["mean"]
|
||||
self.variance=0
|
||||
self.skew=0
|
||||
self.kurtosis=0
|
||||
elif "skew" not in args:
|
||||
self.count=args["count"]
|
||||
self.mean=args["mean"]
|
||||
self.variance=args["variance"] if "variance" in args else args["std"]**2
|
||||
self.skew=0
|
||||
self.kurtosis=0
|
||||
elif "kurtosis" not in args:
|
||||
self.count=args["count"]
|
||||
self.mean=args["mean"]
|
||||
self.variance=args["variance"] if "variance" in args else args["std"]**2
|
||||
self.skew=args["skew"]
|
||||
self.kurtosis=0
|
||||
else:
|
||||
self.count=args["count"]
|
||||
self.mean=args["mean"]
|
||||
self.variance=args["variance"] if "variance" in args else args["std"]**2
|
||||
self.skew=args["skew"]
|
||||
self.kurtosis=args["kurtosis"]
|
||||
|
||||
self.unbiased=\
|
||||
args["unbiased"] if "unbiased" in args else \
|
||||
not args["biased"] if "biased" in args else \
|
||||
False
|
||||
|
||||
|
||||
@property
|
||||
def std(self):
|
||||
return sqrt(self.variance)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class Z_moment(object):
|
||||
"""
|
||||
ZERO-CENTERED MOMENTS
|
||||
"""
|
||||
def __init__(self, *args):
|
||||
self.S=tuple(args)
|
||||
|
||||
def __add__(self, other):
|
||||
return Z_moment(*map(add, self.S, other.S))
|
||||
|
||||
def __sub__(self, other):
|
||||
return Z_moment(*map(sub, self.S, other.S))
|
||||
|
||||
@property
|
||||
def tuple(self):
|
||||
#RETURN AS ORDERED TUPLE
|
||||
return self.S
|
||||
|
||||
@property
|
||||
def dict(self):
|
||||
#RETURN HASH OF SUMS
|
||||
return {"s"+unicode(i): m for i, m in enumerate(self.S)}
|
||||
|
||||
|
||||
@staticmethod
|
||||
def new_instance(values=None):
|
||||
if values == None: return Z_moment()
|
||||
values=[float(v) for v in values if v != None]
|
||||
|
||||
return Z_moment(
|
||||
len(values),
|
||||
sum([n for n in values]),
|
||||
sum([pow(n, 2) for n in values]),
|
||||
sum([pow(n, 3) for n in values]),
|
||||
sum([pow(n, 4) for n in values])
|
||||
)
|
||||
|
||||
|
||||
def add(a,b):
|
||||
return nvl(a, 0)+nvl(b,0)
|
||||
|
||||
def sub(a,b):
|
||||
return nvl(a, 0)-nvl(b,0)
|
||||
|
||||
|
||||
def z_moment2dict(z):
|
||||
#RETURN HASH OF SUMS
|
||||
return {"s" + unicode(i): m for i, m in enumerate(z.S)}
|
||||
|
||||
|
||||
setattr(CNV, "z_moment2dict", staticmethod(z_moment2dict))
|
||||
|
||||
|
||||
def median(values):
|
||||
try:
|
||||
if not values:
|
||||
return Null
|
||||
|
||||
l = len(values)
|
||||
_sorted = sorted(values)
|
||||
if l % 2 == 0:
|
||||
return (_sorted[l / 2 - 1] + _sorted[l / 2]) / 2
|
||||
else:
|
||||
return _sorted[l / 2]
|
||||
except Exception, e:
|
||||
Log.error("problem with median", e)
|
|
@ -8,106 +8,235 @@
|
|||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from datetime import timedelta, date
|
||||
from datetime import datetime as builtin_datetime
|
||||
import re
|
||||
from .jsons import json_encoder
|
||||
import struct
|
||||
from .struct import Struct
|
||||
|
||||
from . import struct
|
||||
import math
|
||||
import __builtin__
|
||||
from .structs.wraps import unwrap, wrap
|
||||
|
||||
|
||||
def datetime(value):
|
||||
from .cnv import CNV
|
||||
|
||||
if isinstance(value, (date, builtin_datetime)):
|
||||
pass
|
||||
elif value < 10000000000:
|
||||
value = CNV.unix2datetime(value)
|
||||
else:
|
||||
value = CNV.milli2datetime(value)
|
||||
|
||||
return CNV.datetime2string(value, "%Y-%m-%d %H:%M:%S")
|
||||
|
||||
|
||||
def unix(value):
|
||||
from .cnv import CNV
|
||||
|
||||
if isinstance(value, (date, builtin_datetime)):
|
||||
pass
|
||||
elif value < 10000000000:
|
||||
value = CNV.unix2datetime(value)
|
||||
else:
|
||||
value = CNV.milli2datetime(value)
|
||||
|
||||
return str(CNV.datetime2unix(value))
|
||||
|
||||
def upper(value):
|
||||
return value.upper()
|
||||
|
||||
def lower(value):
|
||||
return value.lower()
|
||||
|
||||
|
||||
def newline(value):
|
||||
"""
|
||||
ADD NEWLINE, IF SOMETHING
|
||||
"""
|
||||
return "\n"+value.lstrip("\n")
|
||||
return "\n" + toString(value).lstrip("\n")
|
||||
|
||||
def replace(value, find, replace):
|
||||
return value.replace(find, replace)
|
||||
|
||||
def json(value):
|
||||
from .cnv import CNV
|
||||
|
||||
return CNV.object2JSON(value)
|
||||
|
||||
|
||||
def indent(value, prefix=u"\t", indent=None):
|
||||
if indent != None:
|
||||
prefix=prefix*indent
|
||||
prefix = prefix * indent
|
||||
|
||||
value = toString(value)
|
||||
try:
|
||||
content=value.rstrip()
|
||||
suffix=value[len(content):]
|
||||
lines=content.splitlines()
|
||||
return prefix+(u"\n"+prefix).join(lines)+suffix
|
||||
content = value.rstrip()
|
||||
suffix = value[len(content):]
|
||||
lines = content.splitlines()
|
||||
return prefix + (u"\n" + prefix).join(lines) + suffix
|
||||
except Exception, e:
|
||||
raise Exception(u"Problem with indent of value ("+e.message+u")\n"+unicode(value))
|
||||
raise Exception(u"Problem with indent of value (" + e.message + u")\n" + unicode(toString(value)))
|
||||
|
||||
|
||||
def outdent(value):
|
||||
try:
|
||||
num=100
|
||||
lines=value.splitlines()
|
||||
num = 100
|
||||
lines = toString(value).splitlines()
|
||||
for l in lines:
|
||||
trim=len(l.lstrip())
|
||||
if trim>0: num=min(num, len(l)-len(l.lstrip()))
|
||||
trim = len(l.lstrip())
|
||||
if trim > 0:
|
||||
num = min(num, len(l) - len(l.lstrip()))
|
||||
return u"\n".join([l[num:] for l in lines])
|
||||
except Exception, e:
|
||||
from .logs import Log
|
||||
from .env.logs import Log
|
||||
|
||||
Log.error("can not outdent value", e)
|
||||
|
||||
def between(value, prefix, suffix):
|
||||
s = value.find(prefix)
|
||||
if s==-1: return None
|
||||
s+=len(prefix)
|
||||
def round(value, decimal=None, digits=None):
|
||||
if digits != None:
|
||||
m = pow(10, math.ceil(math.log10(abs(value))))
|
||||
return __builtin__.round(value / m, digits) * m
|
||||
|
||||
e=value.find(suffix, s)
|
||||
if e==-1:
|
||||
return __builtin__.round(value, decimal)
|
||||
|
||||
def percent(value, decimal=None, digits=None):
|
||||
per = round(value*100, decimal, digits)
|
||||
return str(per)+"%"
|
||||
|
||||
def between(value, prefix, suffix):
|
||||
value = toString(value)
|
||||
s = value.find(prefix)
|
||||
if s == -1: return None
|
||||
s += len(prefix)
|
||||
|
||||
e = value.find(suffix, s)
|
||||
if e == -1:
|
||||
return None
|
||||
|
||||
s=value.rfind(prefix, 0, e)+len(prefix) #WE KNOW THIS EXISTS, BUT THERE MAY BE A RIGHT-MORE ONE
|
||||
s = value.rfind(prefix, 0, e) + len(prefix) #WE KNOW THIS EXISTS, BUT THERE MAY BE A RIGHT-MORE ONE
|
||||
return value[s:e]
|
||||
|
||||
|
||||
def right(value, len):
|
||||
if len<=0: return u""
|
||||
if len <= 0:
|
||||
return u""
|
||||
return value[-len:]
|
||||
|
||||
def left(value, len):
|
||||
if len <= 0:
|
||||
return u""
|
||||
return value[0:len]
|
||||
|
||||
|
||||
def find_first(value, find_arr, start=0):
|
||||
i=len(value)
|
||||
i = len(value)
|
||||
for f in find_arr:
|
||||
temp=value.find(f, start)
|
||||
if temp==-1: continue
|
||||
i=min(i, temp)
|
||||
if i==len(value): return -1
|
||||
temp = value.find(f, start)
|
||||
if temp == -1: continue
|
||||
i = min(i, temp)
|
||||
if i == len(value): return -1
|
||||
return i
|
||||
|
||||
|
||||
pattern = re.compile(r"\{\{([\w_\.]+(\|[^\}^\|]+)*)\}\}")
|
||||
|
||||
def expand_template(template, value):
|
||||
"""
|
||||
template IS A STRING WITH {{variable_name}} INSTANCES, WHICH WILL
|
||||
BE EXPANDED TO WHAT IS IS IN THE value dict
|
||||
"""
|
||||
value = wrap(value)
|
||||
if isinstance(template, basestring):
|
||||
return _simple_expand(template, (value,))
|
||||
|
||||
return _expand(template, (value,))
|
||||
|
||||
|
||||
pattern=re.compile(r"\{\{([\w_\.]+(\|[\w_]+)*)\}\}")
|
||||
def expand_template(template, values):
|
||||
values=struct.wrap(values)
|
||||
def _expand(template, seq):
|
||||
"""
|
||||
seq IS TUPLE OF OBJECTS IN PATH ORDER INTO THE DATA TREE
|
||||
"""
|
||||
if isinstance(template, basestring):
|
||||
return _simple_expand(template, seq)
|
||||
elif isinstance(template, dict):
|
||||
template = wrap(template)
|
||||
assert template["from"], "Expecting template to have 'from' attribute"
|
||||
assert template.template, "Expecting template to have 'template' attribute"
|
||||
|
||||
data = seq[-1][template["from"]]
|
||||
output = []
|
||||
for d in data:
|
||||
s = seq + (d,)
|
||||
output.append(_expand(template.template, s))
|
||||
return struct.nvl(template.separator, "").join(output)
|
||||
elif isinstance(template, list):
|
||||
return "".join(_expand(t, seq) for t in template)
|
||||
else:
|
||||
from .env.logs import Log
|
||||
|
||||
Log.error("can not handle")
|
||||
|
||||
|
||||
def _simple_expand(template, seq):
|
||||
"""
|
||||
seq IS TUPLE OF OBJECTS IN PATH ORDER INTO THE DATA TREE
|
||||
seq[-1] IS THE CURRENT CONTEXT
|
||||
"""
|
||||
|
||||
def replacer(found):
|
||||
seq=found.group(1).split("|")
|
||||
ops = found.group(1).split("|")
|
||||
|
||||
var=seq[0]
|
||||
path = ops[0]
|
||||
var = path.lstrip(".")
|
||||
depth = min(len(seq), max(1, len(path) - len(var)))
|
||||
try:
|
||||
val=values[var]
|
||||
val=toString(val)
|
||||
for filter in seq[1:]:
|
||||
val=eval(filter+"(val)")
|
||||
val = seq[-depth][var]
|
||||
for filter in ops[1:]:
|
||||
parts = filter.split('(')
|
||||
if len(parts) > 1:
|
||||
val = eval(parts[0] + "(val, " + ("(".join(parts[1::])))
|
||||
else:
|
||||
val = eval(filter + "(val)")
|
||||
val = toString(val)
|
||||
return val
|
||||
except Exception, e:
|
||||
try:
|
||||
if e.message.find(u"is not JSON serializable"):
|
||||
if e.message.find("is not JSON serializable"):
|
||||
#WORK HARDER
|
||||
val=toString(val)
|
||||
val = toString(val)
|
||||
return val
|
||||
except Exception:
|
||||
raise Exception(u"Can not expand "+"|".join(seq)+u" in template:\n"+indent(template), e)
|
||||
except Exception, f:
|
||||
from .env.logs import Log
|
||||
|
||||
Log.warning("Can not expand " + "|".join(ops) + " in template: {{template|json}}", {
|
||||
"template": template
|
||||
}, e)
|
||||
return "[template expansion error: ("+str(e.message)+")]"
|
||||
|
||||
return pattern.sub(replacer, template)
|
||||
|
||||
|
||||
def toString(val):
|
||||
if isinstance(val, Struct):
|
||||
return json_encoder.encode(val.dict, pretty=True)
|
||||
elif isinstance(val, dict) or isinstance(val, list) or isinstance(val, set):
|
||||
val=json_encoder.encode(val, pretty=True)
|
||||
return val
|
||||
return unicode(val)
|
||||
if val == None:
|
||||
return ""
|
||||
elif isinstance(val, (dict, list, set)):
|
||||
from .jsons import json_encoder
|
||||
|
||||
return json_encoder(val, pretty=True)
|
||||
elif hasattr(val, "__json__"):
|
||||
return val.__json__()
|
||||
elif isinstance(val, timedelta):
|
||||
duration = val.total_seconds()
|
||||
return unicode(round(duration, 3))+" seconds"
|
||||
|
||||
try:
|
||||
return unicode(val)
|
||||
except Exception, e:
|
||||
from .env.logs import Log
|
||||
|
||||
Log.error(str(type(val))+" type can not be converted to unicode", e)
|
||||
|
||||
|
||||
def edit_distance(s1, s2):
|
||||
|
@ -132,4 +261,68 @@ def edit_distance(s1, s2):
|
|||
current_row.append(min(insertions, deletions, substitutions))
|
||||
previous_row = current_row
|
||||
|
||||
return float(previous_row[-1])/len(s1)
|
||||
return float(previous_row[-1]) / len(s1)
|
||||
|
||||
|
||||
DIFF_PREFIX = re.compile(r"@@ -(\d+(?:\s*,\d+)?) \+(\d+(?:\s*,\d+)?) @@")
|
||||
def apply_diff(text, diff, reverse=False):
|
||||
"""
|
||||
SOME EXAMPLES OF diff
|
||||
#@@ -1 +1 @@
|
||||
#-before china goes live, the content team will have to manually update the settings for the china-ready apps currently in marketplace.
|
||||
#+before china goes live (end January developer release, June general audience release) , the content team will have to manually update the settings for the china-ready apps currently in marketplace.
|
||||
@@ -0,0 +1,3 @@
|
||||
+before china goes live, the content team will have to manually update the settings for the china-ready apps currently in marketplace.
|
||||
+
|
||||
+kward has the details.
|
||||
@@ -1 +1 @@
|
||||
-before china goes live (end January developer release, June general audience release), the content team will have to manually update the settings for the china-ready apps currently in marketplace.
|
||||
+before china goes live , the content team will have to manually update the settings for the china-ready apps currently in marketplace.
|
||||
@@ -3 +3 ,6 @@
|
||||
-kward has the details.+kward has the details.
|
||||
+
|
||||
+Target Release Dates :
|
||||
+https://mana.mozilla.org/wiki/display/PM/Firefox+OS+Wave+Launch+Cross+Functional+View
|
||||
+
|
||||
+Content Team Engagement & Tasks : https://appreview.etherpad.mozilla.org/40
|
||||
"""
|
||||
if not diff:
|
||||
return text
|
||||
if diff[0].strip() == "":
|
||||
return text
|
||||
|
||||
matches = DIFF_PREFIX.match(diff[0].strip())
|
||||
if not matches:
|
||||
from .env.logs import Log
|
||||
|
||||
Log.error("Can not handle {{diff}}\n", {"diff": diff[0]})
|
||||
|
||||
remove = [int(i.strip()) for i in matches.group(1).split(",")]
|
||||
if len(remove) == 1:
|
||||
remove = [remove[0], 1] # DEFAULT 1
|
||||
add = [int(i.strip()) for i in matches.group(2).split(",")]
|
||||
if len(add) == 1:
|
||||
add = [add[0], 1]
|
||||
|
||||
# UNUSUAL CASE WHERE @@ -x +x, n @@ AND FIRST LINE HAS NOT CHANGED
|
||||
half = len(diff[1]) / 2
|
||||
first_half = diff[1][:half]
|
||||
last_half = diff[1][half:half * 2]
|
||||
if remove[1] == 1 and add[0] == remove[0] and first_half[1:] == last_half[1:]:
|
||||
diff[1] = first_half
|
||||
diff.insert(2, last_half)
|
||||
|
||||
if not reverse:
|
||||
if remove[1] != 0:
|
||||
text = text[:remove[0] - 1] + text[remove[0] + remove[1] - 1:]
|
||||
text = text[:add[0] - 1] + [d[1:] for d in diff[1 + remove[1]:1 + remove[1] + add[1]]] + text[add[0] - 1:]
|
||||
text = apply_diff(text, diff[add[1]+remove[1]+1:], reverse=reverse)
|
||||
else:
|
||||
text = apply_diff(text, diff[add[1]+remove[1]+1:], reverse=reverse)
|
||||
if add[1] != 0:
|
||||
text = text[:add[0] - 1] + text[add[0] + add[1] - 1:]
|
||||
text = text[:remove[0] - 1] + [d[1:] for d in diff[1:1 + remove[1]]] + text[remove[0] - 1:]
|
||||
|
||||
return text
|
||||
|
||||
|
||||
|
|
|
@ -7,7 +7,12 @@
|
|||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
SPECIAL = ["keys", "values", "items", "iteritems", "dict", "copy"]
|
||||
from __future__ import unicode_literals
|
||||
|
||||
_get = object.__getattribute__
|
||||
_set = object.__setattr__
|
||||
|
||||
DEBUG = False
|
||||
|
||||
|
||||
class Struct(dict):
|
||||
|
@ -15,65 +20,102 @@ class Struct(dict):
|
|||
Struct is an anonymous class with some properties good for manipulating JSON
|
||||
|
||||
0) a.b==a["b"]
|
||||
1) the IDE does tab completion, so my spelling mistakes get found at "compile time"
|
||||
2) it deals with missing keys gracefully, so I can put it into set operations (database operations) without choking
|
||||
1) the IDE does tab completion, and my spelling mistakes get found at "compile time"
|
||||
2) it deals with missing keys gracefully, so I can put it into set operations (database
|
||||
operations) without choking
|
||||
a = wrap({})
|
||||
> a == {}
|
||||
a.b is Null
|
||||
> True
|
||||
a.b.c == None
|
||||
> True
|
||||
2b) missing keys is important when dealing with JSON, which is often almost anything
|
||||
3) also, which I hardly use, is storing JSON paths in a variable, so : a["b.c"]==a.b.c
|
||||
3) you can access paths as a variable: a["b.c"]==a.b.c
|
||||
4) you can set paths to values, missing objects along the path are created:
|
||||
a = wrap({})
|
||||
> a == {}
|
||||
a["b.c"] = 42
|
||||
> a == {"b": {"c": 42}}
|
||||
5) attribute names (keys) are corrected to unicode - it appears Python object.getattribute()
|
||||
is called with str() even when using from __future__ import unicode_literals
|
||||
|
||||
MORE ON MISSING VALUES: http://www.numpy.org/NA-overview.html
|
||||
IT ONLY CONSIDERS THE LEGITIMATE-FIELD-WITH-MISSING-VALUE (Statistical Null)
|
||||
AND DOES NOT LOOK AT FIELD-DOES-NOT-EXIST-IN-THIS-CONTEXT (Database Null)
|
||||
|
||||
|
||||
This is a common pattern in many frameworks (I am still working on this list)
|
||||
The Struct is a common pattern in many frameworks (I am still working on this list)
|
||||
|
||||
jinja2.environment.Environment.getattr()
|
||||
argparse.Environment() - code performs setattr(e, name, value) on instances of Environment
|
||||
collections.namedtuple() - gives attribute names to tuple indicies
|
||||
|
||||
"""
|
||||
|
||||
|
||||
def __init__(self, **map):
|
||||
"""
|
||||
THIS WILL MAKE A COPY, WHICH IS UNLIKELY TO BE USEFUL
|
||||
USE struct.wrap() INSTEAD
|
||||
CALLING Struct(**something) WILL RESULT IN A COPY OF something, WHICH IS UNLIKELY TO BE USEFUL
|
||||
USE wrap() INSTEAD
|
||||
"""
|
||||
dict.__init__(self)
|
||||
object.__setattr__(self, "__dict__", map) #map IS A COPY OF THE PARAMETERS
|
||||
if DEBUG:
|
||||
d = _get(self, "__dict__")
|
||||
for k, v in map.items():
|
||||
d[literal_field(k)] = unwrap(v)
|
||||
else:
|
||||
if map:
|
||||
_set(self, "__dict__", map)
|
||||
|
||||
def __bool__(self):
|
||||
return True
|
||||
|
||||
def __nonzero__(self):
|
||||
return True
|
||||
d = _get(self, "__dict__")
|
||||
return True if d else False
|
||||
|
||||
def __str__(self):
|
||||
return dict.__str__(object.__getattribute__(self, "__dict__"))
|
||||
try:
|
||||
return "Struct("+dict.__str__(_get(self, "__dict__"))+")"
|
||||
except Exception, e:
|
||||
return "{}"
|
||||
|
||||
def __repr__(self):
|
||||
try:
|
||||
return "Struct("+dict.__repr__(_get(self, "__dict__"))+")"
|
||||
except Exception, e:
|
||||
return "Struct{}"
|
||||
|
||||
def __contains__(self, item):
|
||||
if Struct.__getitem__(self, item):
|
||||
return True
|
||||
return False
|
||||
|
||||
def __getitem__(self, key):
|
||||
if not isinstance(key, str):
|
||||
key = key.encode("utf-8")
|
||||
if isinstance(key, str):
|
||||
key = key.decode("utf8")
|
||||
|
||||
d = object.__getattribute__(self, "__dict__")
|
||||
d = _get(self, "__dict__")
|
||||
|
||||
if key.find(".") >= 0:
|
||||
key = key.replace("\.", "\a")
|
||||
seq = [k.replace("\a", ".") for k in key.split(".")]
|
||||
seq = split_field(key)
|
||||
for n in seq:
|
||||
d = getdefault(d, n)
|
||||
d = _getdefault(d, n)
|
||||
return wrap(d)
|
||||
|
||||
return wrap(getdefault(d, key))
|
||||
|
||||
def __setattr__(self, key, value):
|
||||
Struct.__setitem__(self, key, value)
|
||||
o = d.get(key, None)
|
||||
if o == None:
|
||||
return NullType(d, key)
|
||||
return wrap(o)
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
if not isinstance(key, str):
|
||||
key = key.encode("utf-8")
|
||||
if key == "":
|
||||
from .env.logs import Log
|
||||
|
||||
Log.error("key is empty string. Probably a bad idea")
|
||||
if isinstance(key, str):
|
||||
key = key.decode("utf8")
|
||||
|
||||
try:
|
||||
d = object.__getattribute__(self, "__dict__")
|
||||
d = _get(self, "__dict__")
|
||||
value = unwrap(value)
|
||||
if key.find(".") == -1:
|
||||
if value is None:
|
||||
|
@ -82,10 +124,9 @@ class Struct(dict):
|
|||
d[key] = value
|
||||
return self
|
||||
|
||||
key = key.replace("\.", "\a")
|
||||
seq = [k.replace("\a", ".") for k in key.split(".")]
|
||||
seq = split_field(key)
|
||||
for k in seq[:-1]:
|
||||
d = getdefault(d, k)
|
||||
d = _getdefault(d, k)
|
||||
if value == None:
|
||||
d.pop(seq[-1], None)
|
||||
else:
|
||||
|
@ -95,71 +136,148 @@ class Struct(dict):
|
|||
raise e
|
||||
|
||||
def __getattribute__(self, key):
|
||||
if not isinstance(key, str):
|
||||
key = key.encode("utf-8")
|
||||
try:
|
||||
output = _get(self, key)
|
||||
return wrap(output)
|
||||
except Exception:
|
||||
d = _get(self, "__dict__")
|
||||
if isinstance(key, str):
|
||||
key = key.decode("utf8")
|
||||
|
||||
d = object.__getattribute__(self, "__dict__")
|
||||
if key not in SPECIAL:
|
||||
return wrap(getdefault(d, key))
|
||||
return NullType(d, key)
|
||||
|
||||
#SOME dict FUNCTIONS
|
||||
if key == "items":
|
||||
def temp():
|
||||
_is = dict.__getattribute__(d, "items")
|
||||
return [(k, wrap(v)) for k, v in _is()]
|
||||
def __setattr__(self, key, value):
|
||||
if isinstance(key, str):
|
||||
ukey = key.decode("utf8")
|
||||
else:
|
||||
ukey = key
|
||||
|
||||
return temp
|
||||
if key == "iteritems":
|
||||
#LOW LEVEL ITERATION
|
||||
return d.iteritems
|
||||
if key == "keys":
|
||||
def temp():
|
||||
k = dict.__getattribute__(d, "keys")
|
||||
return set(k())
|
||||
value = unwrap(value)
|
||||
if value is None:
|
||||
d = _get(self, "__dict__")
|
||||
d.pop(key, None)
|
||||
else:
|
||||
_set(self, ukey, value)
|
||||
return self
|
||||
|
||||
return temp
|
||||
if key == "values":
|
||||
def temp():
|
||||
vs = dict.__getattribute__(d, "values")
|
||||
return [wrap(v) for v in vs()]
|
||||
def __hash__(self):
|
||||
d = _get(self, "__dict__")
|
||||
return hash_value(d)
|
||||
|
||||
return temp
|
||||
if key == "dict":
|
||||
return d
|
||||
if key == "copy":
|
||||
o = wrap({k: v for k, v in d.items()})
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, dict):
|
||||
return False
|
||||
e = unwrap(other)
|
||||
d = _get(self, "__dict__")
|
||||
for k, v in d.items():
|
||||
if e.get(k, None) != v:
|
||||
return False
|
||||
for k, v in e.items():
|
||||
if d.get(k, None) != v:
|
||||
return False
|
||||
return True
|
||||
|
||||
def output():
|
||||
return o
|
||||
def __ne__(self, other):
|
||||
return not self.__eq__(other)
|
||||
|
||||
return output
|
||||
def get(self, key, default):
|
||||
d = _get(self, "__dict__")
|
||||
return d.get(key, default)
|
||||
|
||||
def items(self):
|
||||
d = _get(self, "__dict__")
|
||||
return ((k, wrap(v)) for k, v in d.items())
|
||||
|
||||
def all_items(self):
|
||||
"""
|
||||
GET ALL KEY-VALUES OF LEAF NODES IN Struct
|
||||
"""
|
||||
d = _get(self, "__dict__")
|
||||
output = []
|
||||
for k, v in d.items():
|
||||
if isinstance(v, dict):
|
||||
_all_items(output, k, v)
|
||||
else:
|
||||
output.append((k, v))
|
||||
return output
|
||||
|
||||
def iteritems(self):
|
||||
#LOW LEVEL ITERATION, NO WRAPPING
|
||||
d = _get(self, "__dict__")
|
||||
return d.iteritems()
|
||||
|
||||
def keys(self):
|
||||
d = _get(self, "__dict__")
|
||||
return set(d.keys())
|
||||
|
||||
def values(self):
|
||||
d = _get(self, "__dict__")
|
||||
return (wrap(v) for v in d.values())
|
||||
|
||||
def copy(self):
|
||||
d = _get(self, "__dict__")
|
||||
return Struct(**d)
|
||||
|
||||
def __delitem__(self, key):
|
||||
if not isinstance(key, str):
|
||||
key = key.encode("utf-8")
|
||||
|
||||
d = object.__getattribute__(self, "__dict__")
|
||||
if isinstance(key, str):
|
||||
key = key.decode("utf8")
|
||||
|
||||
if key.find(".") == -1:
|
||||
d = _get(self, "__dict__")
|
||||
d.pop(key, None)
|
||||
return
|
||||
|
||||
key = key.replace("\.", "\a")
|
||||
seq = [k.replace("\a", ".") for k in key.split(".")]
|
||||
d = _get(self, "__dict__")
|
||||
seq = split_field(key)
|
||||
for k in seq[:-1]:
|
||||
d = d[k]
|
||||
d.pop(seq[-1], None)
|
||||
|
||||
def __delattr__(self, key):
|
||||
if isinstance(key, str):
|
||||
key = key.decode("utf8")
|
||||
|
||||
d = _get(self, "__dict__")
|
||||
d.pop(key, None)
|
||||
|
||||
def keys(self):
|
||||
d = object.__getattribute__(self, "__dict__")
|
||||
d = _get(self, "__dict__")
|
||||
return d.keys()
|
||||
|
||||
def setdefault(self, k, d=None):
|
||||
if self[k] == None:
|
||||
self[k] = d
|
||||
|
||||
# KEEP TRACK OF WHAT ATTRIBUTES ARE REQUESTED, MAYBE SOME (BUILTIN) ARE STILL USEFUL
|
||||
requested = set()
|
||||
|
||||
|
||||
def setdefault(obj, key, value):
|
||||
def _all_items(output, key, d):
|
||||
for k, v in d:
|
||||
if isinstance(v, dict):
|
||||
_all_items(output, key+"."+k, v)
|
||||
else:
|
||||
output.append((key+"."+k, v))
|
||||
|
||||
|
||||
def _str(value, depth):
|
||||
"""
|
||||
FOR DEBUGGING POSSIBLY RECURSIVE STRUCTURES
|
||||
"""
|
||||
output = []
|
||||
if depth >0 and isinstance(value, dict):
|
||||
for k, v in value.items():
|
||||
output.append(str(k) + "=" + _str(v, depth - 1))
|
||||
return "{" + ",\n".join(output) + "}"
|
||||
elif depth >0 and isinstance(value, list):
|
||||
for v in value:
|
||||
output.append(_str(v, depth-1))
|
||||
return "[" + ",\n".join(output) + "]"
|
||||
else:
|
||||
return str(type(value))
|
||||
|
||||
|
||||
def _setdefault(obj, key, value):
|
||||
"""
|
||||
DO NOT USE __dict__.setdefault(obj, key, value), IT DOES NOT CHECK FOR obj[key] == None
|
||||
"""
|
||||
|
@ -170,39 +288,91 @@ def setdefault(obj, key, value):
|
|||
return v
|
||||
|
||||
|
||||
def getdefault(obj, key):
|
||||
o = obj.get(key, None)
|
||||
if o == None:
|
||||
return NullStruct(obj, key)
|
||||
return unwrap(o)
|
||||
|
||||
|
||||
def _assign(null, key, value, force=True):
|
||||
def set_default(*params):
|
||||
"""
|
||||
value IS ONLY ASSIGNED IF self.obj[self.path][key] DOES NOT EXIST
|
||||
I+NPUT dicts IN PRIORITY ORDER
|
||||
UPDATES FIRST dict WITH THE MERGE RESULT, WHERE MERGE RESULT IS DEFINED AS:
|
||||
FOR EACH LEAF, RETURN THE HIGHEST PRIORITY LEAF VALUE
|
||||
"""
|
||||
d = object.__getattribute__(null, "__dict__")
|
||||
o = d["obj"]
|
||||
if isinstance(o, NullStruct):
|
||||
o = _assign(o, d["path"], {}, False)
|
||||
else:
|
||||
o = setdefault(o, d["path"], {})
|
||||
agg = params[0] if params[0] != None else {}
|
||||
for p in params[1:]:
|
||||
p = unwrap(p)
|
||||
if p is None:
|
||||
continue
|
||||
_all_default(agg, p)
|
||||
return wrap(agg)
|
||||
|
||||
if force:
|
||||
o[key] = value
|
||||
else:
|
||||
value = setdefault(o, key, value)
|
||||
return value
|
||||
|
||||
class NullStruct(object):
|
||||
def _all_default(d, default):
|
||||
"""
|
||||
ANY VALUE NOT SET WILL BE SET BY THE default
|
||||
THIS IS RECURSIVE
|
||||
"""
|
||||
if default is None:
|
||||
return
|
||||
for k, default_value in default.items():
|
||||
existing_value = d.get(k, None)
|
||||
if existing_value is None:
|
||||
d[k] = default_value
|
||||
elif isinstance(existing_value, dict) and isinstance(default_value, dict):
|
||||
_all_default(existing_value, default_value)
|
||||
|
||||
|
||||
def _getdefault(obj, key):
|
||||
"""
|
||||
TRY BOTH ATTRIBUTE AND ITEM ACCESS, OR RETURN Null
|
||||
"""
|
||||
try:
|
||||
return obj.__getattribute__(key)
|
||||
except Exception, e:
|
||||
try:
|
||||
return obj[key]
|
||||
except Exception, f:
|
||||
return NullType(obj, key)
|
||||
|
||||
|
||||
def _assign(obj, path, value, force=True):
|
||||
"""
|
||||
value IS ASSIGNED TO obj[self.path][key]
|
||||
force=False IF YOU PREFER TO use setDefault()
|
||||
"""
|
||||
if isinstance(obj, NullType):
|
||||
d = _get(obj, "__dict__")
|
||||
o = d["obj"]
|
||||
p = d["path"]
|
||||
s = split_field(p)+path
|
||||
return _assign(o, s, value)
|
||||
|
||||
path0 = path[0]
|
||||
|
||||
if len(path) == 1:
|
||||
if force:
|
||||
obj[path0] = value
|
||||
else:
|
||||
_setdefault(obj, path0, value)
|
||||
return
|
||||
|
||||
old_value = obj.get(path0, None)
|
||||
if old_value == None:
|
||||
if value == None:
|
||||
return
|
||||
else:
|
||||
old_value = {}
|
||||
obj[path0] = old_value
|
||||
_assign(old_value, path[1:], value)
|
||||
|
||||
|
||||
class NullType(object):
|
||||
"""
|
||||
Structural Null provides closure under the dot (.) operator
|
||||
Null[x] == Null
|
||||
Null.x == Null
|
||||
|
||||
Null INSTANCES WILL TRACK THE
|
||||
"""
|
||||
|
||||
def __init__(self, obj=None, path=None):
|
||||
d = object.__getattribute__(self, "__dict__")
|
||||
d = _get(self, "__dict__")
|
||||
d["obj"] = obj
|
||||
d["path"] = path
|
||||
|
||||
|
@ -212,6 +382,33 @@ class NullStruct(object):
|
|||
def __nonzero__(self):
|
||||
return False
|
||||
|
||||
def __add__(self, other):
|
||||
return Null
|
||||
|
||||
def __radd__(self, other):
|
||||
return Null
|
||||
|
||||
def __sub__(self, other):
|
||||
return Null
|
||||
|
||||
def __rsub__(self, other):
|
||||
return Null
|
||||
|
||||
def __neg__(self):
|
||||
return Null
|
||||
|
||||
def __mul__(self, other):
|
||||
return Null
|
||||
|
||||
def __rmul__(self, other):
|
||||
return Null
|
||||
|
||||
def __div__(self, other):
|
||||
return Null
|
||||
|
||||
def __rdiv__(self, other):
|
||||
return Null
|
||||
|
||||
def __gt__(self, other):
|
||||
return False
|
||||
|
||||
|
@ -225,13 +422,13 @@ class NullStruct(object):
|
|||
return False
|
||||
|
||||
def __eq__(self, other):
|
||||
return other is None or isinstance(other, NullStruct)
|
||||
return other is None or isinstance(other, NullType)
|
||||
|
||||
def __ne__(self, other):
|
||||
return other is not None and not isinstance(other, NullStruct)
|
||||
return other is not None and not isinstance(other, NullType)
|
||||
|
||||
def __getitem__(self, key):
|
||||
return NullStruct(self, key)
|
||||
return NullType(self, key)
|
||||
|
||||
def __len__(self):
|
||||
return 0
|
||||
|
@ -239,79 +436,73 @@ class NullStruct(object):
|
|||
def __iter__(self):
|
||||
return ZeroList.__iter__()
|
||||
|
||||
def last(self):
|
||||
"""
|
||||
IN CASE self IS INTERPRETED AS A list
|
||||
"""
|
||||
return Null
|
||||
|
||||
def right(self, num=None):
|
||||
return EmptyList
|
||||
|
||||
def __getattribute__(self, key):
|
||||
if key not in SPECIAL:
|
||||
return NullStruct(self, key)
|
||||
|
||||
#SOME dict FUNCTIONS
|
||||
if key == "items":
|
||||
def temp():
|
||||
return ZeroList
|
||||
|
||||
return temp
|
||||
if key == "iteritems":
|
||||
#LOW LEVEL ITERATION
|
||||
return self.__iter__()
|
||||
if key == "keys":
|
||||
def temp():
|
||||
return ZeroList
|
||||
|
||||
return temp
|
||||
if key == "values":
|
||||
def temp():
|
||||
return ZeroList
|
||||
|
||||
return temp
|
||||
if key == "dict":
|
||||
return Null
|
||||
if key == "copy":
|
||||
#THE INTENT IS USUALLY PREPARE FOR UPDATES
|
||||
def output():
|
||||
return Struct()
|
||||
|
||||
try:
|
||||
output = _get(self, key)
|
||||
return output
|
||||
except Exception, e:
|
||||
return NullType(self, key)
|
||||
|
||||
def __setattr__(self, key, value):
|
||||
NullStruct.__setitem__(self, key, value)
|
||||
NullType.__setitem__(self, key, value)
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
try:
|
||||
value = unwrap(value)
|
||||
if key.find(".") == -1:
|
||||
_assign(self, key, value)
|
||||
return self
|
||||
d = _get(self, "__dict__")
|
||||
o = d["obj"]
|
||||
path = d["path"]
|
||||
seq = split_field(path)+split_field(key)
|
||||
|
||||
key = key.replace("\.", "\a")
|
||||
seq = [k.replace("\a", ".") for k in key.split(".")]
|
||||
d = _assign(self, seq[0], {}, False)
|
||||
for k in seq[1:-1]:
|
||||
o = {}
|
||||
d[k] = o
|
||||
d = o
|
||||
d[seq[-1]] = value
|
||||
return self
|
||||
_assign(o, seq, value)
|
||||
except Exception, e:
|
||||
raise e
|
||||
|
||||
def keys(self):
|
||||
return set()
|
||||
|
||||
def items(self):
|
||||
return []
|
||||
|
||||
def pop(self, key, default=None):
|
||||
return None
|
||||
return Null
|
||||
|
||||
def __str__(self):
|
||||
return "None"
|
||||
|
||||
def __repr__(self):
|
||||
return "Null"
|
||||
|
||||
Null = NullStruct()
|
||||
|
||||
Null = NullType()
|
||||
EmptyList = Null
|
||||
|
||||
ZeroList = []
|
||||
def return_zero_list():
|
||||
return []
|
||||
|
||||
def return_zero_set():
|
||||
return set()
|
||||
|
||||
|
||||
class StructList(list):
|
||||
"""
|
||||
ENCAPSULATES HANDING OF Nulls BY wrapING ALL MEMBERS AS NEEDED
|
||||
ENCAPSULATES FLAT SLICES ([::]) FOR USE IN WINDOW FUNCTIONS
|
||||
"""
|
||||
EMPTY = None
|
||||
|
||||
def __init__(self, vals=None):
|
||||
""" USE THE vals, NOT A COPY """
|
||||
list.__init__(self)
|
||||
# list.__init__(self)
|
||||
if vals == None:
|
||||
self.list = []
|
||||
elif isinstance(vals, StructList):
|
||||
|
@ -320,88 +511,145 @@ class StructList(list):
|
|||
self.list = vals
|
||||
|
||||
def __getitem__(self, index):
|
||||
if index < 0 or len(self.list) <= index:
|
||||
if isinstance(index, slice):
|
||||
# IMPLEMENT FLAT SLICES (for i not in range(0, len(self)): assert self[i]==None)
|
||||
if index.step is not None:
|
||||
from .env.logs import Log
|
||||
Log.error("slice step must be None, do not know how to deal with values")
|
||||
length = len(_get(self, "list"))
|
||||
|
||||
i = index.start
|
||||
i = min(max(i, 0), length)
|
||||
j = index.stop
|
||||
if j is None:
|
||||
j = length
|
||||
else:
|
||||
j = max(min(j, length), 0)
|
||||
return StructList(_get(self, "list")[i:j])
|
||||
|
||||
if index < 0 or len(_get(self, "list")) <= index:
|
||||
return Null
|
||||
return wrap(self.list[index])
|
||||
return wrap(_get(self, "list")[index])
|
||||
|
||||
def __setitem__(self, i, y):
|
||||
self.list[i] = unwrap(y)
|
||||
_get(self, "list")[i] = unwrap(y)
|
||||
|
||||
def __getattribute__(self, key):
|
||||
try:
|
||||
if key != "index": # WE DO NOT WANT TO IMPLEMENT THE index METHOD
|
||||
output = _get(self, key)
|
||||
return output
|
||||
except Exception, e:
|
||||
if key[0:2] == "__": # SYSTEM LEVEL ATTRIBUTES CAN NOT BE USED FOR SELECT
|
||||
raise e
|
||||
return StructList.select(self, key)
|
||||
|
||||
def select(self, key):
|
||||
output = []
|
||||
for v in _get(self, "list"):
|
||||
try:
|
||||
output.append(v.__getattribute__(key))
|
||||
except Exception, e:
|
||||
try:
|
||||
output.append(v.__getitem__(key))
|
||||
except Exception, f:
|
||||
output.append(None)
|
||||
|
||||
return StructList(output)
|
||||
|
||||
def __iter__(self):
|
||||
return (wrap(v) for v in self.list)
|
||||
return (wrap(v) for v in _get(self, "list"))
|
||||
|
||||
def __contains__(self, item):
|
||||
return list.__contains__(_get(self, "list"), item)
|
||||
|
||||
def append(self, val):
|
||||
self.list.append(unwrap(val))
|
||||
_get(self, "list").append(unwrap(val))
|
||||
return self
|
||||
|
||||
def __str__(self):
|
||||
return self.list.__str__()
|
||||
return _get(self, "list").__str__()
|
||||
|
||||
def __len__(self):
|
||||
return self.list.__len__()
|
||||
return _get(self, "list").__len__()
|
||||
|
||||
def __getslice__(self, i, j):
|
||||
return wrap(self.list[i:j])
|
||||
from .env.logs import Log
|
||||
|
||||
Log.error("slicing is broken in Python 2.7: a[i:j] == a[i+len(a), j] sometimes. Use [start:stop:step]")
|
||||
|
||||
def copy(self):
|
||||
return StructList(list(_get(self, "list")))
|
||||
|
||||
def remove(self, x):
|
||||
self.list.remove(x)
|
||||
_get(self, "list").remove(x)
|
||||
return self
|
||||
|
||||
def extend(self, values):
|
||||
for v in values:
|
||||
self.list.append(unwrap(v))
|
||||
_get(self, "list").append(unwrap(v))
|
||||
return self
|
||||
|
||||
def pop(self):
|
||||
return self.list.pop()
|
||||
return wrap(_get(self, "list").pop())
|
||||
|
||||
def __add__(self, value):
|
||||
output = list(self.list)
|
||||
output = list(_get(self, "list"))
|
||||
output.extend(value)
|
||||
return StructList(vals=output)
|
||||
|
||||
def __or__(self, value):
|
||||
output = list(self.list)
|
||||
output = list(_get(self, "list"))
|
||||
output.append(value)
|
||||
return StructList(vals=output)
|
||||
|
||||
def right(self, num=None):
|
||||
if num == None:
|
||||
return StructList(vals=[self.list[-1]])
|
||||
if num == 0:
|
||||
return StructList()
|
||||
return StructList(vals=self.list[-num])
|
||||
def __radd__(self, other):
|
||||
output = list(other)
|
||||
output.extend(_get(self, "list"))
|
||||
return StructList(vals=output)
|
||||
|
||||
def right(self, num=None):
|
||||
"""
|
||||
WITH SLICES BEING FLAT, WE NEED A SIMPLE WAY TO SLICE FROM THE RIGHT [-num:]
|
||||
"""
|
||||
if num == None:
|
||||
return StructList([_get(self, "list")[-1]])
|
||||
if num <= 0:
|
||||
return EmptyList
|
||||
|
||||
return StructList(_get(self, "list")[-num:])
|
||||
|
||||
def leftBut(self, num):
|
||||
"""
|
||||
WITH SLICES BEING FLAT, WE NEED A SIMPLE WAY TO SLICE FROM THE LEFT [:-num:]
|
||||
"""
|
||||
if num == None:
|
||||
return StructList([_get(self, "list")[:-1:]])
|
||||
if num <= 0:
|
||||
return EmptyList
|
||||
|
||||
return StructList(_get(self, "list")[:-num:])
|
||||
|
||||
def last(self):
|
||||
"""
|
||||
RETURN LAST ELEMENT IN StructList
|
||||
RETURN LAST ELEMENT IN StructList [-1]
|
||||
"""
|
||||
return self.list[-1]
|
||||
|
||||
|
||||
def wrap(v):
|
||||
if v is None:
|
||||
lst = _get(self, "list")
|
||||
if lst:
|
||||
return wrap(lst[-1])
|
||||
return Null
|
||||
if isinstance(v, (Struct, NullStruct, StructList)):
|
||||
return v
|
||||
if isinstance(v, dict):
|
||||
m = Struct()
|
||||
object.__setattr__(m, "__dict__", v) #INJECT m.__dict__=v SO THERE IS NO COPY
|
||||
return m
|
||||
if isinstance(v, list):
|
||||
return StructList(v)
|
||||
return v
|
||||
|
||||
def map(self, oper, includeNone=True):
|
||||
if includeNone:
|
||||
return StructList([oper(v) for v in _get(self, "list")])
|
||||
else:
|
||||
return StructList([oper(v) for v in _get(self, "list") if v != None])
|
||||
|
||||
|
||||
StructList.EMPTY = StructList()
|
||||
|
||||
|
||||
|
||||
def unwrap(v):
|
||||
if isinstance(v, Struct):
|
||||
return object.__getattribute__(v, "__dict__")
|
||||
if isinstance(v, StructList):
|
||||
return v.list
|
||||
if v == None:
|
||||
return None
|
||||
return v
|
||||
|
||||
|
||||
def inverse(d):
|
||||
|
@ -419,36 +667,88 @@ def nvl(*args):
|
|||
#pick the first none-null value
|
||||
for a in args:
|
||||
if a != None:
|
||||
return a
|
||||
return wrap(a)
|
||||
return Null
|
||||
|
||||
def zip(keys, values):
|
||||
output = Struct()
|
||||
for i, k in enumerate(keys):
|
||||
output[k] = values[i]
|
||||
return output
|
||||
|
||||
def listwrap(value):
|
||||
|
||||
|
||||
def literal_field(field):
|
||||
"""
|
||||
OFTEN IT IS NICE TO ALLOW FUNCTION PARAMETERS TO BE ASSIGNED A VALUE,
|
||||
OR A list-OF-VALUES, OR NULL. CHECKING FOR THIS IS TEDIOUS AND WE WANT TO CAST
|
||||
FROM THOSE THREE CASES TO THE SINGLE CASE OF A LIST
|
||||
|
||||
Null -> []
|
||||
value -> [value]
|
||||
[...] -> [...] (unchanged list)
|
||||
|
||||
#BEFORE
|
||||
if a is not None:
|
||||
if not isinstance(a, list):
|
||||
a=[a]
|
||||
for x in a:
|
||||
#do something
|
||||
|
||||
|
||||
#AFTER
|
||||
for x in listwrap(a):
|
||||
#do something
|
||||
|
||||
RETURN SAME WITH . ESCAPED
|
||||
"""
|
||||
if value == None:
|
||||
return []
|
||||
elif isinstance(value, list):
|
||||
return wrap(value)
|
||||
try:
|
||||
return field.replace(".", "\.")
|
||||
except Exception, e:
|
||||
from .env.logs import Log
|
||||
|
||||
Log.error("bad literal", e)
|
||||
|
||||
def cpython_split_field(field):
|
||||
"""
|
||||
RETURN field AS ARRAY OF DOT-SEPARATED FIELDS
|
||||
"""
|
||||
if field.find(".") >= 0:
|
||||
field = field.replace("\.", "\a")
|
||||
return [k.replace("\a", ".") for k in field.split(".")]
|
||||
else:
|
||||
return wrap([value])
|
||||
return [field]
|
||||
|
||||
def pypy_split_field(field):
|
||||
"""
|
||||
RETURN field AS ARRAY OF DOT-SEPARATED FIELDS
|
||||
"""
|
||||
from .jsons import UnicodeBuilder
|
||||
|
||||
if not field:
|
||||
return []
|
||||
|
||||
output = []
|
||||
curr = UnicodeBuilder()
|
||||
i = 0
|
||||
while i < len(field):
|
||||
c = field[i]
|
||||
i += 1
|
||||
if c == "\\":
|
||||
c = field[i]
|
||||
i += 1
|
||||
if c == ".":
|
||||
curr.append(".")
|
||||
else:
|
||||
curr.append("\\")
|
||||
curr.append(c)
|
||||
elif c == ".":
|
||||
output.append(curr.build())
|
||||
curr = UnicodeBuilder()
|
||||
output.append(curr.build())
|
||||
return output
|
||||
|
||||
# try:
|
||||
# import __pypy__
|
||||
# split_field = pypy_split_field
|
||||
# except ImportError:
|
||||
split_field = cpython_split_field
|
||||
|
||||
|
||||
def join_field(field):
|
||||
"""
|
||||
RETURN field SEQUENCE AS STRING
|
||||
"""
|
||||
return ".".join([f.replace(".", "\.") for f in field])
|
||||
|
||||
|
||||
def hash_value(v):
|
||||
if isinstance(v, (set, tuple, list)):
|
||||
return hash(tuple(hash_value(vv) for vv in v))
|
||||
elif not isinstance(v, dict):
|
||||
return hash(v)
|
||||
else:
|
||||
return hash(tuple(sorted(hash_value(vv) for vv in v.values())))
|
||||
|
||||
|
||||
from .structs.wraps import unwrap, wrap
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
__author__ = 'klahnakoski'
|
|
@ -0,0 +1,36 @@
|
|||
from types import GeneratorType
|
||||
from ..struct import StructList, Struct
|
||||
|
||||
|
||||
_get = object.__getattribute__
|
||||
_set = object.__setattr__
|
||||
|
||||
|
||||
def slow_wrap(v):
|
||||
return wrapper.get(_get(v, "__class__"), _no_wrap)(v)
|
||||
|
||||
|
||||
def _wrap_dict(v):
|
||||
m = Struct()
|
||||
_set(m, "__dict__", v) # INJECT m.__dict__=v SO THERE IS NO COPY
|
||||
return m
|
||||
|
||||
|
||||
def _wrap_list(v):
|
||||
return StructList(v)
|
||||
|
||||
|
||||
def _wrap_generator(v):
|
||||
return (slow_wrap(vv) for vv in v)
|
||||
|
||||
|
||||
def _no_wrap(v):
|
||||
return v
|
||||
|
||||
|
||||
wrapper = {
|
||||
dict: _wrap_dict,
|
||||
list: _wrap_list,
|
||||
GeneratorType: _wrap_generator
|
||||
}
|
||||
|
|
@ -0,0 +1,139 @@
|
|||
from types import NoneType, GeneratorType
|
||||
from ..struct import Null, StructList, Struct
|
||||
|
||||
|
||||
_get = object.__getattribute__
|
||||
_set = object.__setattr__
|
||||
|
||||
|
||||
def wrap(v):
|
||||
"""
|
||||
THIS IS THE CANDIDATE WE ARE TESTING TO WRAP FASTER, BUT DOES NOT SEEM TO BE
|
||||
"""
|
||||
type_ = _get(v, "__class__")
|
||||
|
||||
if type_ is dict:
|
||||
m = Struct()
|
||||
_set(m, "__dict__", v) # INJECT m.__dict__=v SO THERE IS NO COPY
|
||||
return m
|
||||
elif type_ is list:
|
||||
return StructList(v)
|
||||
elif type_ is GeneratorType:
|
||||
return (wrap(vv) for vv in v)
|
||||
elif type_ is NoneType:
|
||||
return Null
|
||||
else:
|
||||
return v
|
||||
|
||||
|
||||
def wrap_dot(value):
|
||||
"""
|
||||
dict WITH DOTS IN KEYS IS INTERPRETED AS A PATH
|
||||
"""
|
||||
return wrap(_wrap_dot(value))
|
||||
|
||||
|
||||
def _wrap_dot(value):
|
||||
if value == None:
|
||||
return None
|
||||
if isinstance(value, (basestring, int, float)):
|
||||
return value
|
||||
if isinstance(value, dict):
|
||||
if isinstance(value, Struct):
|
||||
value = unwrap(value)
|
||||
|
||||
output = {}
|
||||
for key, value in value.iteritems():
|
||||
value = _wrap_dot(value)
|
||||
|
||||
if key == "":
|
||||
from ..env.logs import Log
|
||||
|
||||
Log.error("key is empty string. Probably a bad idea")
|
||||
if isinstance(key, str):
|
||||
key = key.decode("utf8")
|
||||
|
||||
d = output
|
||||
if key.find(".") == -1:
|
||||
if value is None:
|
||||
d.pop(key, None)
|
||||
else:
|
||||
d[key] = value
|
||||
else:
|
||||
seq = split_field(key)
|
||||
for k in seq[:-1]:
|
||||
e = d.get(k, None)
|
||||
if e is None:
|
||||
d[k] = {}
|
||||
e = d[k]
|
||||
d = e
|
||||
if value == None:
|
||||
d.pop(seq[-1], None)
|
||||
else:
|
||||
d[seq[-1]] = value
|
||||
return output
|
||||
if hasattr(value, '__iter__'):
|
||||
output = []
|
||||
for v in value:
|
||||
v = wrap_dot(v)
|
||||
output.append(v)
|
||||
return output
|
||||
return value
|
||||
|
||||
|
||||
def unwrap(v):
|
||||
_type = _get(v, "__class__")
|
||||
if _type is Struct:
|
||||
d = _get(v, "__dict__")
|
||||
return d
|
||||
elif _type is StructList:
|
||||
return v.list
|
||||
elif _type is NullType:
|
||||
return None
|
||||
elif _type is GeneratorType:
|
||||
return (unwrap(vv) for vv in v)
|
||||
else:
|
||||
return v
|
||||
|
||||
|
||||
def listwrap(value):
|
||||
"""
|
||||
OFTEN IT IS NICE TO ALLOW FUNCTION PARAMETERS TO BE ASSIGNED A VALUE,
|
||||
OR A list-OF-VALUES, OR NULL. CHECKING FOR THIS IS TEDIOUS AND WE WANT TO CAST
|
||||
FROM THOSE THREE CASES TO THE SINGLE CASE OF A LIST
|
||||
|
||||
Null -> []
|
||||
value -> [value]
|
||||
[...] -> [...] (unchanged list)
|
||||
|
||||
#BEFORE
|
||||
if a is not None:
|
||||
if not isinstance(a, list):
|
||||
a=[a]
|
||||
for x in a:
|
||||
#do something
|
||||
|
||||
|
||||
#AFTER
|
||||
for x in listwrap(a):
|
||||
#do something
|
||||
|
||||
"""
|
||||
if value == None:
|
||||
return []
|
||||
elif isinstance(value, list):
|
||||
return wrap(value)
|
||||
else:
|
||||
return wrap([unwrap(value)])
|
||||
|
||||
|
||||
def tuplewrap(value):
|
||||
"""
|
||||
INTENDED TO TURN lists INTO tuples FOR USE AS KEYS
|
||||
"""
|
||||
if isinstance(value, (list, set, tuple, GeneratorType)):
|
||||
return tuple(tuplewrap(v) if isinstance(v, (list, tuple, GeneratorType)) else v for v in value)
|
||||
return unwrap(value),
|
||||
|
||||
|
||||
from ..struct import StructList, Struct, split_field, NullType
|
|
@ -0,0 +1 @@
|
|||
__author__ = 'klahnakoski'
|
|
@ -0,0 +1,83 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
from .util import struct
|
||||
from .util.cnv import CNV
|
||||
from .util.env.elasticsearch import ElasticSearch
|
||||
from .util.env.logs import Log
|
||||
from .util.env.files import File
|
||||
from .util.queries import Q
|
||||
from .util.struct import Struct
|
||||
from .util.structs.wraps import unwrap, wrap
|
||||
|
||||
def make_test_instance(name, settings):
|
||||
if settings.filename:
|
||||
File(settings.filename).delete()
|
||||
return open_test_instance(name, settings)
|
||||
|
||||
|
||||
def open_test_instance(name, settings):
|
||||
if settings.filename:
|
||||
Log.note("Using {{filename}} as {{type}}", {
|
||||
"filename": settings.filename,
|
||||
"type": name
|
||||
})
|
||||
return Fake_ES(settings)
|
||||
else:
|
||||
Log.note("Using ES cluster at {{host}} as {{type}}", {
|
||||
"host": settings.host,
|
||||
"type": name
|
||||
})
|
||||
|
||||
ElasticSearch.delete_index(settings)
|
||||
|
||||
schema = CNV.JSON2object(File(settings.schema_file).read(), flexible=True, paths=True)
|
||||
es = ElasticSearch.create_index(settings, schema, limit_replicas=True)
|
||||
return es
|
||||
|
||||
|
||||
|
||||
|
||||
class Fake_ES():
|
||||
def __init__(self, settings):
|
||||
self.settings = wrap({"host":"fake", "index":"fake"})
|
||||
self.filename = settings.filename
|
||||
try:
|
||||
self.data = CNV.JSON2object(File(self.filename).read())
|
||||
except IOError:
|
||||
self.data = Struct()
|
||||
|
||||
|
||||
def search(self, query):
|
||||
query=wrap(query)
|
||||
f = CNV.esfilter2where(query.query.filtered.filter)
|
||||
filtered=wrap([{"_id": i, "_source": d} for i, d in self.data.items() if f(d)])
|
||||
if query.fields:
|
||||
return wrap({"hits": {"total":len(filtered), "hits": [{"_id":d._id, "fields":unwrap(Q.select([unwrap(d._source)], query.fields)[0])} for d in filtered]}})
|
||||
else:
|
||||
return wrap({"hits": {"total":len(filtered), "hits": filtered}})
|
||||
|
||||
def extend(self, records):
|
||||
"""
|
||||
JUST SO WE MODEL A Queue
|
||||
"""
|
||||
records = {v["id"]: v["value"] for v in records}
|
||||
|
||||
struct.unwrap(self.data).update(records)
|
||||
|
||||
data_as_json = CNV.object2JSON(self.data, pretty=True)
|
||||
|
||||
File(self.filename).write(data_as_json)
|
||||
Log.note("{{num}} items added", {"num": len(records)})
|
||||
|
||||
def add(self, record):
|
||||
if isinstance(record, list):
|
||||
Log.error("no longer accepting lists, use extend()")
|
||||
return self.extend([record])
|
||||
|
||||
def delete_record(self, filter):
|
||||
f = CNV.esfilter2where(filter)
|
||||
self.data = wrap({k: v for k, v in self.data.items() if not f(v)})
|
||||
|
||||
def set_refresh_interval(self, seconds):
|
||||
pass
|
||||
|
|
@ -0,0 +1 @@
|
|||
__author__ = 'klahnakoski'
|
|
@ -6,8 +6,9 @@
|
|||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from multiprocessing.queues import Queue
|
||||
from .logs import Log
|
||||
from ..env.logs import Log
|
||||
|
||||
|
||||
class worker(object):
|
|
@ -0,0 +1,173 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from collections import Iterable
|
||||
from types import GeneratorType
|
||||
from ..struct import nvl
|
||||
from ..env.logs import Log
|
||||
from ..thread.threads import Queue, Thread
|
||||
|
||||
DEBUG = False
|
||||
|
||||
|
||||
class Multithread(object):
|
||||
"""
|
||||
SIMPLE SEMANTICS FOR SYMMETRIC MULTITHREADING
|
||||
PASS A SET OF functions TO BE EXECUTED (ONE PER THREAD)
|
||||
SET outbound==False TO SIMPLY THROW AWAY RETURN VALUES, IF ANY
|
||||
threads - IF functions IS NOT AN ARRAY, THEN threads IS USED TO MAKE AN ARRAY
|
||||
THE inbound QUEUE IS EXPECTING dicts, EACH dict IS USED AS kwargs TO GIVEN functions
|
||||
"""
|
||||
|
||||
def __init__(self, functions, threads=None, outbound=None, silent_queues=None):
|
||||
if outbound is None:
|
||||
self.outbound = Queue(silent=silent_queues)
|
||||
elif outbound is False:
|
||||
self.outbound = None
|
||||
else:
|
||||
self.outbound = outbound
|
||||
|
||||
self.inbound = Queue(silent=silent_queues)
|
||||
|
||||
#MAKE THREADS
|
||||
if isinstance(functions, Iterable):
|
||||
if threads:
|
||||
Log.error("do not know how to handle an array of functions AND a thread multiplier")
|
||||
self.threads = []
|
||||
for t, f in enumerate(functions):
|
||||
thread = worker_thread("worker " + unicode(t), self.inbound, self.outbound, f)
|
||||
self.threads.append(thread)
|
||||
else:
|
||||
#ASSUME functions IS A SINGLE FUNCTION
|
||||
self.threads = []
|
||||
for t in range(nvl(threads, 1)):
|
||||
thread = worker_thread("worker " + unicode(t), self.inbound, self.outbound, functions)
|
||||
self.threads.append(thread)
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
#WAIT FOR ALL QUEUED WORK TO BE DONE BEFORE RETURNING
|
||||
def __exit__(self, type, value, traceback):
|
||||
try:
|
||||
if isinstance(value, Exception):
|
||||
self.inbound.close()
|
||||
self.inbound.add(Thread.STOP)
|
||||
self.join()
|
||||
except Exception, e:
|
||||
Log.warning("Problem sending stops", e)
|
||||
|
||||
|
||||
#IF YOU SENT A stop(), OR Thread.STOP, YOU MAY WAIT FOR SHUTDOWN
|
||||
def join(self):
|
||||
try:
|
||||
#WAIT FOR FINISH
|
||||
for t in self.threads:
|
||||
t.join()
|
||||
except (KeyboardInterrupt, SystemExit):
|
||||
Log.note("Shutdow Started, please be patient")
|
||||
except Exception, e:
|
||||
Log.error("Unusual shutdown!", e)
|
||||
finally:
|
||||
for t in self.threads:
|
||||
t.keep_running = False
|
||||
self.inbound.close()
|
||||
if self.outbound: self.outbound.close()
|
||||
for t in self.threads:
|
||||
t.join()
|
||||
|
||||
|
||||
def execute(self, requests):
|
||||
"""
|
||||
RETURN A GENERATOR THAT HAS len(requests) RESULTS (ANY ORDER)
|
||||
EXPECTING requests TO BE A list OF dicts, EACH dict IS USED AS kwargs TO GIVEN functions
|
||||
"""
|
||||
if not isinstance(requests,(list, tuple, GeneratorType)):
|
||||
Log.error("Expecting requests to be a list or generator", offset=1)
|
||||
|
||||
#FILL QUEUE WITH WORK
|
||||
self.inbound.extend(requests)
|
||||
|
||||
num = len(requests)
|
||||
|
||||
def output():
|
||||
for i in xrange(num):
|
||||
result = self.outbound.pop()
|
||||
if "exception" in result:
|
||||
raise result["exception"]
|
||||
else:
|
||||
yield result["response"]
|
||||
|
||||
if self.outbound is not None:
|
||||
return output()
|
||||
else:
|
||||
return
|
||||
|
||||
#EXTERNAL COMMAND THAT RETURNS IMMEDIATELY
|
||||
def stop(self):
|
||||
self.inbound.close() #SEND STOPS TO WAKE UP THE WORKERS WAITING ON inbound.pop()
|
||||
for t in self.threads:
|
||||
t.keep_running = False
|
||||
|
||||
|
||||
class worker_thread(Thread):
|
||||
#in_queue MUST CONTAIN HASH OF PARAMETERS FOR load()
|
||||
def __init__(self, name, in_queue, out_queue, function):
|
||||
Thread.__init__(self, name, self.event_loop)
|
||||
self.in_queue = in_queue
|
||||
self.out_queue = out_queue
|
||||
self.function = function
|
||||
self.num_runs = 0
|
||||
self.start()
|
||||
|
||||
def event_loop(self, please_stop):
|
||||
got_stop = False
|
||||
while not please_stop.is_go():
|
||||
request = self.in_queue.pop()
|
||||
if request == Thread.STOP:
|
||||
got_stop = True
|
||||
if self.in_queue.queue:
|
||||
Log.warning("programmer error, queue not empty. {{num}} requests lost:\n{{requests}}", {
|
||||
"num": len(self.in_queue.queue),
|
||||
"requests": self.in_queue.queue[:5:] + self.in_queue.queue[-5::]
|
||||
})
|
||||
break
|
||||
if please_stop.is_go():
|
||||
break
|
||||
|
||||
try:
|
||||
if DEBUG and hasattr(self.function, "func_name"):
|
||||
Log.note("run {{function}}", {"function": self.function.func_name})
|
||||
result = self.function(**request)
|
||||
if self.out_queue != None:
|
||||
self.out_queue.add({"response": result})
|
||||
except Exception, e:
|
||||
Log.warning("Can not execute with params={{params}}", {"params": request}, e)
|
||||
if self.out_queue != None:
|
||||
self.out_queue.add({"exception": e})
|
||||
finally:
|
||||
self.num_runs += 1
|
||||
|
||||
please_stop.go()
|
||||
del self.function
|
||||
|
||||
if self.num_runs == 0:
|
||||
Log.warning("{{name}} thread did no work", {"name": self.name})
|
||||
if DEBUG and self.num_runs != 1:
|
||||
Log.note("{{name}} thread did {{num}} units of work", {
|
||||
"name": self.name,
|
||||
"num": self.num_runs
|
||||
})
|
||||
if got_stop and self.in_queue.queue:
|
||||
Log.warning("multithread programmer error, queue not empty. {{num}} requests lost", {"num": len(self.in_queue.queue)})
|
||||
if DEBUG:
|
||||
Log.note("{{thread}} DONE", {"thread": self.name})
|
||||
|
|
@ -7,13 +7,18 @@
|
|||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
import threading
|
||||
import thread
|
||||
import threading
|
||||
import time
|
||||
from .struct import nvl
|
||||
import sys
|
||||
from ..struct import nvl, Struct
|
||||
|
||||
# THIS THREADING MODULE IS PERMEATED BY THE please_stop SIGNAL.
|
||||
# THIS SIGNAL IS IMPORTANT FOR PROPER SIGNALLING WHICH ALLOWS
|
||||
# FOR FAST AND PREDICTABLE SHUTDOWN AND CLEANUP OF THREADS
|
||||
|
||||
DEBUG = True
|
||||
|
||||
|
@ -21,9 +26,10 @@ class Lock(object):
|
|||
"""
|
||||
SIMPLE LOCK (ACTUALLY, A PYTHON threadind.Condition() WITH notify() BEFORE EVERY RELEASE)
|
||||
"""
|
||||
|
||||
def __init__(self, name=""):
|
||||
self.monitor=threading.Condition()
|
||||
self.name=name
|
||||
self.monitor = threading.Condition()
|
||||
self.name = name
|
||||
|
||||
def __enter__(self):
|
||||
self.monitor.acquire()
|
||||
|
@ -35,10 +41,10 @@ class Lock(object):
|
|||
|
||||
def wait(self, timeout=None, till=None):
|
||||
if till:
|
||||
timeout=(datetime.utcnow()-till).total_seconds()
|
||||
if timeout<0:
|
||||
timeout = (datetime.utcnow() - till).total_seconds()
|
||||
if timeout < 0:
|
||||
return
|
||||
self.monitor.wait(timeout=timeout)
|
||||
self.monitor.wait(timeout=float(timeout) if timeout else None)
|
||||
|
||||
def notify_all(self):
|
||||
self.monitor.notify_all()
|
||||
|
@ -48,39 +54,70 @@ class Queue(object):
|
|||
"""
|
||||
SIMPLE MESSAGE QUEUE, multiprocessing.Queue REQUIRES SERIALIZATION, WHICH IS HARD TO USE JUST BETWEEN THREADS
|
||||
"""
|
||||
def __init__(self, max=None):
|
||||
|
||||
def __init__(self, max=None, silent=False):
|
||||
"""
|
||||
max - LIMIT THE NUMBER IN THE QUEUE, IF TOO MANY add() AND extend() WILL BLOCK
|
||||
silent - COMPLAIN IF THE READERS ARE TOO SLOW
|
||||
"""
|
||||
self.max = nvl(max, 2**30)
|
||||
self.max = nvl(max, 2 ** 10)
|
||||
self.silent = silent
|
||||
self.keep_running = True
|
||||
self.lock = Lock("lock for queue")
|
||||
self.queue = []
|
||||
self.next_warning=datetime.utcnow() # FOR DEBUGGING
|
||||
|
||||
def __iter__(self):
|
||||
while self.keep_running:
|
||||
try:
|
||||
value=self.pop()
|
||||
if value!=Thread.STOP:
|
||||
value = self.pop()
|
||||
if value is not Thread.STOP:
|
||||
yield value
|
||||
except Exception, e:
|
||||
from .logs import Log
|
||||
from ..env.logs import Log
|
||||
|
||||
Log.warning("Tell me about what happened here", e)
|
||||
|
||||
def add(self, value):
|
||||
with self.lock:
|
||||
self.wait_for_queue_space()
|
||||
if self.keep_running:
|
||||
self.queue.append(value)
|
||||
while self.keep_running and len(self.queue) > self.max:
|
||||
self.lock.wait()
|
||||
return self
|
||||
|
||||
def extend(self, values):
|
||||
with self.lock:
|
||||
# ONCE THE queue IS BELOW LIMIT, ALLOW ADDING MORE
|
||||
self.wait_for_queue_space()
|
||||
if self.keep_running:
|
||||
self.queue.extend(values)
|
||||
while self.keep_running and len(self.queue) > self.max:
|
||||
return self
|
||||
|
||||
def wait_for_queue_space(self):
|
||||
"""
|
||||
EXPECT THE self.lock TO BE HAD, WAITS FOR self.queue TO HAVE A LITTLE SPACE
|
||||
"""
|
||||
wait_time = 5
|
||||
|
||||
now = datetime.utcnow()
|
||||
if self.next_warning < now:
|
||||
self.next_warning = now + timedelta(seconds=wait_time)
|
||||
|
||||
while self.keep_running and len(self.queue) > self.max:
|
||||
if self.silent:
|
||||
self.lock.wait()
|
||||
else:
|
||||
self.lock.wait(wait_time)
|
||||
if len(self.queue) > self.max:
|
||||
now = datetime.utcnow()
|
||||
if self.next_warning < now:
|
||||
self.next_warning = now + timedelta(seconds=wait_time)
|
||||
from ..env.logs import Log
|
||||
|
||||
Log.warning("Queue is full ({{num}}} items), thread(s) have been waiting {{wait_time}} sec", {
|
||||
"num": len(self.queue),
|
||||
"wait_time": wait_time
|
||||
})
|
||||
|
||||
def __len__(self):
|
||||
with self.lock:
|
||||
|
@ -90,9 +127,9 @@ class Queue(object):
|
|||
with self.lock:
|
||||
while self.keep_running:
|
||||
if self.queue:
|
||||
value=self.queue.pop(0)
|
||||
if value==Thread.STOP: #SENDING A STOP INTO THE QUEUE IS ALSO AN OPTION
|
||||
self.keep_running=False
|
||||
value = self.queue.pop(0)
|
||||
if value is Thread.STOP: #SENDING A STOP INTO THE QUEUE IS ALSO AN OPTION
|
||||
self.keep_running = False
|
||||
return value
|
||||
self.lock.wait()
|
||||
return Thread.STOP
|
||||
|
@ -108,7 +145,7 @@ class Queue(object):
|
|||
return []
|
||||
|
||||
for v in self.queue:
|
||||
if v == Thread.STOP: #SENDING A STOP INTO THE QUEUE IS ALSO AN OPTION
|
||||
if v is Thread.STOP: #SENDING A STOP INTO THE QUEUE IS ALSO AN OPTION
|
||||
self.keep_running = False
|
||||
|
||||
output = list(self.queue)
|
||||
|
@ -117,8 +154,7 @@ class Queue(object):
|
|||
|
||||
def close(self):
|
||||
with self.lock:
|
||||
self.keep_running=False
|
||||
|
||||
self.keep_running = False
|
||||
|
||||
|
||||
class AllThread(object):
|
||||
|
@ -127,7 +163,7 @@ class AllThread(object):
|
|||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.threads=[]
|
||||
self.threads = []
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
@ -137,31 +173,35 @@ class AllThread(object):
|
|||
self.join()
|
||||
|
||||
def join(self):
|
||||
exceptions=[]
|
||||
exceptions = []
|
||||
try:
|
||||
for t in self.threads:
|
||||
response=t.join()
|
||||
response = t.join()
|
||||
if "exception" in response:
|
||||
exceptions.append(response["exception"])
|
||||
except Exception, e:
|
||||
from .logs import Log
|
||||
from ..env.logs import Log
|
||||
|
||||
Log.warning("Problem joining", e)
|
||||
|
||||
if exceptions:
|
||||
from .logs import Log
|
||||
Log.error("Problem in child threads", exceptions)
|
||||
from ..env.logs import Log
|
||||
|
||||
Log.error("Problem in child threads", exceptions)
|
||||
|
||||
|
||||
def add(self, target, *args, **kwargs):
|
||||
"""
|
||||
target IS THE FUNCTION TO EXECUTE IN THE THREAD
|
||||
"""
|
||||
t=Thread.run(target.__name__, target, *args, **kwargs)
|
||||
t = Thread.run(target.__name__, target, *args, **kwargs)
|
||||
self.threads.append(t)
|
||||
|
||||
|
||||
|
||||
ALL_LOCK = Lock()
|
||||
MAIN_THREAD = Struct(name="Main Thread", id=thread.get_ident())
|
||||
ALL = dict()
|
||||
ALL[thread.get_ident()] = MAIN_THREAD
|
||||
|
||||
|
||||
class Thread(object):
|
||||
|
@ -170,22 +210,22 @@ class Thread(object):
|
|||
run() ENHANCED TO CAPTURE EXCEPTIONS
|
||||
"""
|
||||
|
||||
num_threads=0
|
||||
STOP="stop"
|
||||
TIMEOUT="TIMEOUT"
|
||||
|
||||
num_threads = 0
|
||||
STOP = "stop"
|
||||
TIMEOUT = "TIMEOUT"
|
||||
|
||||
|
||||
def __init__(self, name, target, *args, **kwargs):
|
||||
self.id = -1
|
||||
self.name = name
|
||||
self.target = target
|
||||
self.response = None
|
||||
self.synch_lock=Lock()
|
||||
self.synch_lock = Lock()
|
||||
self.args = args
|
||||
|
||||
#ENSURE THERE IS A SHARED please_stop SIGNAL
|
||||
self.kwargs = kwargs.copy()
|
||||
self.kwargs["please_stop"]=self.kwargs.get("please_stop", Signal())
|
||||
self.kwargs["please_stop"] = self.kwargs.get("please_stop", Signal())
|
||||
self.please_stop = self.kwargs["please_stop"]
|
||||
|
||||
self.stopped = Signal()
|
||||
|
@ -206,38 +246,49 @@ class Thread(object):
|
|||
|
||||
def start(self):
|
||||
try:
|
||||
self.thread=thread.start_new_thread(Thread._run, (self, ))
|
||||
thread.start_new_thread(Thread._run, (self, ))
|
||||
except Exception, e:
|
||||
from .logs import Log
|
||||
from ..env.logs import Log
|
||||
|
||||
Log.error("Can not start thread", e)
|
||||
|
||||
def stop(self):
|
||||
self.please_stop.go()
|
||||
|
||||
def _run(self):
|
||||
self.id = thread.get_ident()
|
||||
with ALL_LOCK:
|
||||
ALL[self.id] = self
|
||||
|
||||
try:
|
||||
if self.target is not None:
|
||||
response=self.target(*self.args, **self.kwargs)
|
||||
response = self.target(*self.args, **self.kwargs)
|
||||
with self.synch_lock:
|
||||
self.response={"response":response}
|
||||
self.response = Struct(response=response)
|
||||
except Exception, e:
|
||||
with self.synch_lock:
|
||||
self.response={"exception":e}
|
||||
from .logs import Log
|
||||
Log.error("Problem in thread", e)
|
||||
self.response = Struct(exception=e)
|
||||
try:
|
||||
from ..env.logs import Log
|
||||
|
||||
Log.fatal("Problem in thread {{name}}", {"name": self.name}, e)
|
||||
except Exception, f:
|
||||
sys.stderr.write("ERROR in thread: " + str(self.name) + " " + str(e) + "\n")
|
||||
finally:
|
||||
self.stopped.go()
|
||||
del self.target, self.args, self.kwargs
|
||||
with ALL_LOCK:
|
||||
del ALL[self.id]
|
||||
|
||||
def is_alive(self):
|
||||
return not self.stopped
|
||||
|
||||
def join(self, timeout=None, till=None):
|
||||
"""
|
||||
RETURN THE RESULT OF THE THREAD EXECUTION (INCLUDING EXCEPTION)
|
||||
RETURN THE RESULT {"response":r, "exception":e} OF THE THREAD EXECUTION (INCLUDING EXCEPTION, IF EXISTS)
|
||||
"""
|
||||
if not till and timeout:
|
||||
till=datetime.utcnow()+timedelta(seconds=timeout)
|
||||
till = datetime.utcnow() + timedelta(seconds=timeout)
|
||||
|
||||
if till is None:
|
||||
while True:
|
||||
|
@ -247,22 +298,25 @@ class Thread(object):
|
|||
return self.response
|
||||
self.synch_lock.wait(0.5)
|
||||
|
||||
from .logs import Log
|
||||
if DEBUG:
|
||||
Log.note("Waiting on thread {{thread}}", {"thread":self.name})
|
||||
from ..env.logs import Log
|
||||
|
||||
Log.note("Waiting on thread {{thread|json}}", {"thread": self.name})
|
||||
else:
|
||||
self.stopped.wait_for_go(till=till)
|
||||
if self.stopped:
|
||||
return self.response
|
||||
else:
|
||||
from logs import Except
|
||||
from ..env.logs import Except
|
||||
|
||||
raise Except(type=Thread.TIMEOUT)
|
||||
|
||||
@staticmethod
|
||||
def run(name, target, *args, **kwargs):
|
||||
#ENSURE target HAS please_stop ARGUMENT
|
||||
if "please_stop" not in target.__code__.co_varnames:
|
||||
from logs import Log
|
||||
from ..env.logs import Log
|
||||
|
||||
Log.error("function must have please_stop argument for signalling emergency shutdown")
|
||||
|
||||
Thread.num_threads += 1
|
||||
|
@ -280,6 +334,14 @@ class Thread(object):
|
|||
if duration > 0:
|
||||
time.sleep(duration)
|
||||
|
||||
@staticmethod
|
||||
def current():
|
||||
id = thread.get_ident()
|
||||
with ALL_LOCK:
|
||||
try:
|
||||
return ALL[id]
|
||||
except KeyError, e:
|
||||
return MAIN_THREAD
|
||||
|
||||
|
||||
class Signal(object):
|
||||
|
@ -290,7 +352,7 @@ class Signal(object):
|
|||
def __init__(self):
|
||||
self.lock = Lock()
|
||||
self._go = False
|
||||
self.job_queue=[]
|
||||
self.job_queue = []
|
||||
|
||||
|
||||
def __bool__(self):
|
||||
|
@ -315,8 +377,8 @@ class Signal(object):
|
|||
return
|
||||
|
||||
self._go = True
|
||||
jobs=self.job_queue
|
||||
self.job_queue=[]
|
||||
jobs = self.job_queue
|
||||
self.job_queue = []
|
||||
self.lock.notify_all()
|
||||
|
||||
for j in jobs:
|
||||
|
@ -337,37 +399,39 @@ class Signal(object):
|
|||
self.job_queue.append(target)
|
||||
|
||||
|
||||
|
||||
|
||||
class ThreadedQueue(Queue):
|
||||
"""
|
||||
TODO: Check that this queue is not dropping items at shutdown
|
||||
DISPATCH TO ANOTHER (SLOWER) queue IN BATCHES OF GIVEN size
|
||||
"""
|
||||
def __init__(self, queue, size, max=None):
|
||||
|
||||
def __init__(self, queue, size=None, max=None, period=None, silent=False):
|
||||
if max == None:
|
||||
#REASONABLE DEFAULT
|
||||
max = size*2
|
||||
max = size * 2
|
||||
|
||||
Queue.__init__(self, max=max)
|
||||
Queue.__init__(self, max=max, silent=silent)
|
||||
|
||||
def size_pusher(please_stop):
|
||||
please_stop.on_go(lambda: self.add(Thread.STOP))
|
||||
|
||||
#queue IS A MULTI-THREADED QUEUE, SO THIS WILL BLOCK UNTIL THE size ARE READY
|
||||
from .queries import Q
|
||||
from ..queries import Q
|
||||
|
||||
for i, g in Q.groupby(self, size=size):
|
||||
try:
|
||||
queue.extend(g)
|
||||
if please_stop:
|
||||
from logs import Log
|
||||
from ..env.logs import Log
|
||||
|
||||
Log.warning("ThreadedQueue stopped early, with {{num}} items left in queue", {
|
||||
"num":len(self)
|
||||
"num": len(self)
|
||||
})
|
||||
return
|
||||
except Exception, e:
|
||||
from logs import Log
|
||||
Log.warning("Can not push data to given queue", e)
|
||||
from ..env.logs import Log
|
||||
|
||||
Log.warning("Problem with pushing {{num}} items to data sink", {"num": len(g)}, e)
|
||||
|
||||
self.thread = Thread.run("threaded queue", size_pusher)
|
||||
|
|
@ -1,48 +0,0 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
import time
|
||||
from .strings import expand_template
|
||||
from .logs import Log
|
||||
|
||||
|
||||
class Timer:
|
||||
"""
|
||||
USAGE:
|
||||
with Timer("doing hard time"):
|
||||
something_that_takes_long()
|
||||
OUTPUT:
|
||||
doing hard time took 45.468 sec
|
||||
"""
|
||||
|
||||
def __init__(self, description, param=None):
|
||||
self.description=expand_template(description, param) #WE WOULD LIKE TO KEEP THIS TEMPLATE, AND PASS IT TO THE LOGGER ON __exit__(), WE FAKE IT FOR NOW
|
||||
|
||||
def __enter__(self):
|
||||
Log.note("Timer start: {{description}}", {
|
||||
"description":self.description
|
||||
})
|
||||
|
||||
|
||||
self.start = time.clock()
|
||||
return self
|
||||
|
||||
def __exit__(self, type, value, traceback):
|
||||
self.end = time.clock()
|
||||
self.interval = self.end - self.start
|
||||
Log.note("Timer end : {{description}} (took {{duration}} sec)", {
|
||||
"description":self.description,
|
||||
"duration":round(self.interval, 3)
|
||||
})
|
||||
|
||||
|
||||
|
||||
@property
|
||||
def duration(self):
|
||||
return self.interval
|
|
@ -0,0 +1 @@
|
|||
__author__ = 'klahnakoski'
|
|
@ -0,0 +1,299 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .. import regex
|
||||
from ..cnv import CNV
|
||||
from ..collections import MIN
|
||||
from ..env.logs import Log
|
||||
from ..maths import Math
|
||||
from ..structs.wraps import unwrap, wrap
|
||||
|
||||
|
||||
class Duration(object):
|
||||
|
||||
def __new__(cls, obj=None):
|
||||
output = object.__new__(cls)
|
||||
if obj == None:
|
||||
return None
|
||||
if Math.is_number(obj):
|
||||
output.milli = obj
|
||||
output.month = 0
|
||||
return output
|
||||
elif isinstance(obj, basestring):
|
||||
return parse(obj)
|
||||
elif isinstance(obj, Duration):
|
||||
output.milli = obj.milli
|
||||
output.month = obj.month
|
||||
return output
|
||||
elif Math.is_nan(obj):
|
||||
return None
|
||||
else:
|
||||
Log.error("Do not know type of object (" + CNV.object2JSON(obj) + ")of to make a Duration")
|
||||
|
||||
|
||||
def __add__(self, other):
|
||||
output = Duration(0)
|
||||
output.milli = self.milli + other.milli
|
||||
output.month = self.month + other.month
|
||||
return output
|
||||
|
||||
def __mul__(self, amount):
|
||||
output = Duration(0)
|
||||
output.milli = self.milli * amount
|
||||
output.month = self.month * amount
|
||||
return output
|
||||
|
||||
def __rmul__(self, amount):
|
||||
output = Duration(0)
|
||||
output.milli = self.milli * amount
|
||||
output.month = self.month * amount
|
||||
return output
|
||||
|
||||
def __div__(self, amount):
|
||||
if isinstance(amount, Duration) and not amount.month:
|
||||
m = self.month
|
||||
r = self.milli
|
||||
|
||||
# DO NOT CONSIDER TIME OF DAY
|
||||
tod = r % MILLI_VALUES.day
|
||||
r = r - tod
|
||||
|
||||
if m == 0 and r > (MILLI_VALUES.year / 3):
|
||||
m = Math.floor(12 * self.milli / MILLI_VALUES.year)
|
||||
r -= (m / 12) * MILLI_VALUES.year
|
||||
else:
|
||||
r = r - (self.month * MILLI_VALUES.month)
|
||||
if r >= MILLI_VALUES.day * 31:
|
||||
Log.error("Do not know how to handle")
|
||||
r = MIN(29 / 30, (r + tod) / (MILLI_VALUES.day * 30))
|
||||
|
||||
output = Math.floor(m / amount.month) + r
|
||||
return output
|
||||
elif Math.is_number(amount):
|
||||
output = Duration(0)
|
||||
output.milli = self.milli / amount
|
||||
output.month = self.month / amount
|
||||
return output
|
||||
else:
|
||||
return self.milli / amount.milli
|
||||
|
||||
|
||||
def __sub__(self, duration):
|
||||
output = Duration(0)
|
||||
output.milli = self.milli - duration.milli
|
||||
output.month = self.month - duration.month
|
||||
return output
|
||||
|
||||
def floor(self, interval=None):
|
||||
if not isinstance(interval, Duration):
|
||||
Log.error("Expecting an interval as a Duration object")
|
||||
|
||||
output = Duration(0)
|
||||
if interval.month:
|
||||
if self.month:
|
||||
output.month = Math.floor(self.month / interval.month) * interval.month
|
||||
output.milli = output.month * MILLI_VALUES.month
|
||||
return output
|
||||
|
||||
# A MONTH OF DURATION IS BIGGER THAN A CANONICAL MONTH
|
||||
output.month = Math.floor(self.milli * 12 / MILLI_VALUES["year"] / interval.month) * interval.month
|
||||
output.milli = output.month * MILLI_VALUES.month
|
||||
else:
|
||||
output.milli = Math.floor(self.milli / (interval.milli)) * (interval.milli)
|
||||
return output
|
||||
|
||||
def __str__(self):
|
||||
if not self.milli:
|
||||
return "zero"
|
||||
|
||||
output = ""
|
||||
rest = (self.milli - (MILLI_VALUES.month * self.month)) # DO NOT INCLUDE THE MONTH'S MILLIS
|
||||
isNegative = (rest < 0)
|
||||
rest = Math.abs(rest)
|
||||
|
||||
# MILLI
|
||||
rem = rest % 1000
|
||||
if rem != 0:
|
||||
output = "+" + rem + "milli" + output
|
||||
rest = Math.floor(rest / 1000)
|
||||
|
||||
# SECOND
|
||||
rem = rest % 60
|
||||
if rem != 0:
|
||||
output = "+" + rem + "second" + output
|
||||
rest = Math.floor(rest / 60)
|
||||
|
||||
# MINUTE
|
||||
rem = rest % 60
|
||||
if rem != 0:
|
||||
output = "+" + rem + "minute" + output
|
||||
rest = Math.floor(rest / 60)
|
||||
|
||||
# HOUR
|
||||
rem = rest % 24
|
||||
if rem != 0:
|
||||
output = "+" + rem + "hour" + output
|
||||
rest = Math.floor(rest / 24)
|
||||
|
||||
# DAY
|
||||
if rest < 11 and rest != 7:
|
||||
rem = rest
|
||||
rest = 0
|
||||
else:
|
||||
rem = rest % 7
|
||||
rest = Math.floor(rest / 7)
|
||||
|
||||
if rem != 0:
|
||||
output = "+" + rem + "day" + output
|
||||
|
||||
# WEEK
|
||||
if rest != 0:
|
||||
output = "+" + rest + "week" + output
|
||||
|
||||
if isNegative:
|
||||
output = output.replace("+", "-")
|
||||
|
||||
# MONTH AND YEAR
|
||||
if self.month:
|
||||
sign = "-" if self.month < 0 else "+"
|
||||
month = Math.abs(self.month)
|
||||
|
||||
if month <= 18 and month != 12:
|
||||
output = sign + month + "month" + output
|
||||
else:
|
||||
m = month % 12
|
||||
if m != 0:
|
||||
output = sign + m + "month" + output
|
||||
y = Math.floor(month / 12)
|
||||
output = sign + y + "year" + output
|
||||
|
||||
if output[0] == "+":
|
||||
output = output[1::]
|
||||
if output[0] == '1' and not Math.is_number(output[1]):
|
||||
output = output[1::]
|
||||
return output
|
||||
|
||||
|
||||
def format(self, interval, rounding):
|
||||
return self.round(Duration.newInstance(interval), rounding) + interval
|
||||
|
||||
def round(self, interval, rounding=0):
|
||||
output = self / interval
|
||||
output = Math.round(output, rounding)
|
||||
return output
|
||||
|
||||
|
||||
def _string2Duration(text):
|
||||
"""
|
||||
CONVERT SIMPLE <float><type> TO A DURATION OBJECT
|
||||
"""
|
||||
if text == "" or text == "zero":
|
||||
return Duration(0)
|
||||
|
||||
amount, interval = regex.match(r"([\d\.]*)(.*)", text)
|
||||
amount = CNV.value2int(amount) if amount else 0
|
||||
|
||||
if MILLI_VALUES[interval] == None:
|
||||
Log.error(interval + " is not a recognized duration type (did you use the pural form by mistake?")
|
||||
|
||||
output = Duration(0)
|
||||
if MONTH_VALUES[interval] == 0:
|
||||
output.milli = amount * MILLI_VALUES[interval]
|
||||
else:
|
||||
output.milli = amount * MONTH_VALUES[interval] * MILLI_VALUES.month
|
||||
output.month = amount * MONTH_VALUES[interval]
|
||||
|
||||
return output
|
||||
|
||||
|
||||
def parse(value):
|
||||
output = Duration(0)
|
||||
|
||||
# EXPECTING CONCAT OF <sign><integer><type>
|
||||
plist = value.split("+")
|
||||
for p, pplist in enumerate(plist):
|
||||
mlist = pplist.split("-")
|
||||
output = output + _string2Duration(mlist[0])
|
||||
for m in mlist[1::]:
|
||||
output = output.subtract(_string2Duration(m))
|
||||
return output
|
||||
|
||||
|
||||
MILLI_VALUES = wrap({
|
||||
"year": 52 * 7 * 24 * 60 * 60 * 1000, # 52weeks
|
||||
"quarter": 13 * 7 * 24 * 60 * 60 * 1000, # 13weeks
|
||||
"month": 28 * 24 * 60 * 60 * 1000, # 4weeks
|
||||
"week": 7 * 24 * 60 * 60 * 1000,
|
||||
"day": 24 * 60 * 60 * 1000,
|
||||
"hour": 60 * 60 * 1000,
|
||||
"minute": 60 * 1000,
|
||||
"second": 1000,
|
||||
"milli": 1
|
||||
})
|
||||
|
||||
MONTH_VALUES = wrap({
|
||||
"year": 12,
|
||||
"quarter": 3,
|
||||
"month": 1,
|
||||
"week": 0,
|
||||
"day": 0,
|
||||
"hour": 0,
|
||||
"minute": 0,
|
||||
"second": 0,
|
||||
"milli": 0
|
||||
})
|
||||
|
||||
# A REAL MONTH IS LARGER THAN THE CANONICAL MONTH
|
||||
MONTH_SKEW = MILLI_VALUES["year"] / 12 - MILLI_VALUES.month
|
||||
|
||||
|
||||
def compare(a, b):
|
||||
return a.milli - b.milli
|
||||
|
||||
|
||||
DOMAIN = {
|
||||
"type": "duration",
|
||||
"compare": compare
|
||||
}
|
||||
|
||||
ZERO = Duration(0)
|
||||
SECOND = Duration("second")
|
||||
MINUTE = Duration("minute")
|
||||
HOUR = Duration("hour")
|
||||
DAY = Duration("day")
|
||||
WEEK = Duration("week")
|
||||
MONTH = Duration("month")
|
||||
QUARTER = Duration("quarter")
|
||||
YEAR = Duration("year")
|
||||
|
||||
COMMON_INTERVALS = [
|
||||
Duration("second"),
|
||||
Duration("15second"),
|
||||
Duration("30second"),
|
||||
Duration("minute"),
|
||||
Duration("5minute"),
|
||||
Duration("15minute"),
|
||||
Duration("30minute"),
|
||||
Duration("hour"),
|
||||
Duration("2hour"),
|
||||
Duration("3hour"),
|
||||
Duration("6hour"),
|
||||
Duration("12hour"),
|
||||
Duration("day"),
|
||||
Duration("2day"),
|
||||
Duration("week"),
|
||||
Duration("2week"),
|
||||
Duration("month"),
|
||||
Duration("2month"),
|
||||
Duration("quarter"),
|
||||
Duration("6month"),
|
||||
Duration("year")
|
||||
]
|
|
@ -0,0 +1,49 @@
|
|||
# encoding: utf-8
|
||||
#
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||
#
|
||||
from datetime import timedelta
|
||||
from time import clock
|
||||
|
||||
from ..struct import nvl, Struct
|
||||
from ..structs.wraps import wrap
|
||||
from ..env.logs import Log
|
||||
|
||||
|
||||
class Timer:
|
||||
"""
|
||||
USAGE:
|
||||
with Timer("doing hard time"):
|
||||
something_that_takes_long()
|
||||
OUTPUT:
|
||||
doing hard time took 45.468 sec
|
||||
"""
|
||||
|
||||
def __init__(self, description, param=None, debug=True):
|
||||
self.template = description
|
||||
self.param = nvl(wrap(param), Struct())
|
||||
self.debug = debug
|
||||
|
||||
def __enter__(self):
|
||||
if self.debug:
|
||||
Log.note("Timer start: " + self.template, self.param, stack_depth=1)
|
||||
self.start = clock()
|
||||
|
||||
return self
|
||||
|
||||
def __exit__(self, type, value, traceback):
|
||||
if self.debug:
|
||||
self.end = clock()
|
||||
self.interval = self.end - self.start
|
||||
param = wrap(self.param)
|
||||
param.duration = timedelta(seconds=self.interval)
|
||||
Log.note("Timer end : " + self.template + " (took {{duration}})", self.param, stack_depth=1)
|
||||
|
||||
@property
|
||||
def duration(self):
|
||||
return self.interval
|
|
@ -0,0 +1 @@
|
|||
__author__ = 'klahnakoski'
|
|
@ -0,0 +1,9 @@
|
|||
|
||||
About
|
||||
-----
|
||||
|
||||
**DO NOT USE THIS LIBRARY IF REAL ENCRYPTION IS REQUIRED**, THIS VENDOR LIBRARY HAS NOT BEEN VETTED FOR CORRECTNESS.
|
||||
|
||||
* Snagged from [https://github.com/caller9/pythonaes](https://github.com/caller9/pythonaes)
|
||||
* Licensed under the MIT license [http://www.opensource.org/licenses/mit-license.php](http://www.opensource.org/licenses/mit-license.php)
|
||||
|
|
@ -0,0 +1,151 @@
|
|||
#!/usr/bin/env python
|
||||
"""
|
||||
AES Block Cipher.
|
||||
|
||||
Performs single block cipher decipher operations on a 16 element list of integers.
|
||||
These integers represent 8 bit bytes in a 128 bit block.
|
||||
The result of cipher or decipher operations is the transformed 16 element list of integers.
|
||||
|
||||
Running this file as __main__ will result in a self-test of the algorithm.
|
||||
|
||||
Algorithm per NIST FIPS-197 http://csrc.nist.gov/publications/fips/fips197/fips-197.pdf
|
||||
|
||||
Thanks to serprex for many optimizations in this code. For even more, see his github fork of this project.
|
||||
|
||||
Copyright (c) 2010, Adam Newman http://www.caller9.com/
|
||||
Demur Rumed https://github.com/serprex
|
||||
Licensed under the MIT license http://www.opensource.org/licenses/mit-license.php
|
||||
"""
|
||||
__author__ = "Adam Newman"
|
||||
|
||||
#Normally use relative import. In test mode use local import.
|
||||
try:
|
||||
from . import aes_tables
|
||||
except ValueError:
|
||||
import aes_tables
|
||||
|
||||
class AESCipher:
|
||||
"""Perform single block AES cipher/decipher"""
|
||||
|
||||
def __init__ (self, expanded_key):
|
||||
#Store epanded key
|
||||
self._expanded_key = expanded_key
|
||||
|
||||
#Number of rounds determined by expanded key length
|
||||
self._Nr = int(len(expanded_key) / 16) - 1
|
||||
|
||||
def _sub_bytes (self, state):
|
||||
#Run state through sbox
|
||||
for i,s in enumerate(state):state[i]=aes_tables.sbox[s]
|
||||
|
||||
def _i_sub_bytes (self, state):
|
||||
#Run state through inverted sbox
|
||||
for i,s in enumerate(state):state[i]=aes_tables.i_sbox[s]
|
||||
|
||||
def _shift_row (self, row, shift):
|
||||
#Circular shift row left by shift amount
|
||||
row+=row[:shift]
|
||||
del row[:shift]
|
||||
return row
|
||||
|
||||
def _i_shift_row (self, row, shift):
|
||||
#Circular shift row left by shift amount
|
||||
row+=row[:shift]
|
||||
del row[:4+shift]
|
||||
return row
|
||||
|
||||
def _shift_rows (self, state):
|
||||
#Extract rows as every 4th item starting at [1..3]
|
||||
#Replace row with shift_row operation
|
||||
for i in 1,2,3:
|
||||
state[i::4] = self._shift_row(state[i::4],i)
|
||||
|
||||
def _i_shift_rows (self, state):
|
||||
#Extract rows as every 4th item starting at [1..3]
|
||||
#Replace row with inverse shift_row operation
|
||||
for i in 1,2,3:
|
||||
state[i::4] = self._i_shift_row(state[i::4],-i)
|
||||
|
||||
def _mix_column (self, column, inverse):
|
||||
#Use galois lookup tables instead of performing complicated operations
|
||||
#If inverse, use matrix with inverse values
|
||||
g0,g1,g2,g3=aes_tables.galI if inverse else aes_tables.galNI
|
||||
c0,c1,c2,c3=column
|
||||
return (
|
||||
g0[c0]^g1[c1]^g2[c2]^g3[c3],
|
||||
g3[c0]^g0[c1]^g1[c2]^g2[c3],
|
||||
g2[c0]^g3[c1]^g0[c2]^g1[c3],
|
||||
g1[c0]^g2[c1]^g3[c2]^g0[c3])
|
||||
|
||||
def _mix_columns (self, state, inverse):
|
||||
#Perform mix_column for each column in the state
|
||||
for i,j in (0,4),(4,8),(8,12),(12,16):
|
||||
state[i:j] = self._mix_column(state[i:j], inverse)
|
||||
|
||||
def _add_round_key (self, state, round):
|
||||
#XOR the state with the current round key
|
||||
for k,(i,j) in enumerate(zip(state, self._expanded_key[round*16:(round+1)*16])):state[k]=i^j
|
||||
|
||||
def cipher_block (self, state):
|
||||
"""Perform AES block cipher on input"""
|
||||
#PKCS7 Padding
|
||||
state=state+[16-len(state)]*(16-len(state))#Fails test if it changes the input with +=
|
||||
|
||||
self._add_round_key(state, 0)
|
||||
|
||||
for i in range(1, self._Nr):
|
||||
self._sub_bytes(state)
|
||||
self._shift_rows(state)
|
||||
self._mix_columns(state, False)
|
||||
self._add_round_key(state, i)
|
||||
|
||||
self._sub_bytes(state)
|
||||
self._shift_rows(state)
|
||||
self._add_round_key(state, self._Nr)
|
||||
return state
|
||||
|
||||
def decipher_block (self, state):
|
||||
"""Perform AES block decipher on input"""
|
||||
#null padding. Padding actually should not be needed here with valid input.
|
||||
state=state+[0]*(16-len(state))
|
||||
|
||||
self._add_round_key(state, self._Nr)
|
||||
|
||||
for i in range(self._Nr - 1, 0, -1):
|
||||
self._i_shift_rows(state)
|
||||
self._i_sub_bytes(state)
|
||||
self._add_round_key(state, i)
|
||||
self._mix_columns(state, True)
|
||||
|
||||
self._i_shift_rows(state)
|
||||
self._i_sub_bytes(state)
|
||||
self._add_round_key(state, 0)
|
||||
return state
|
||||
|
||||
import unittest
|
||||
class TestCipher(unittest.TestCase):
|
||||
def test_cipher(self):
|
||||
"""Test AES cipher with all key lengths"""
|
||||
try:
|
||||
from . import test_keys, key_expander
|
||||
except:
|
||||
import test_keys, key_expander
|
||||
|
||||
test_data = test_keys.TestKeys()
|
||||
|
||||
for key_size in 128, 192, 256:
|
||||
test_key_expander = key_expander.KeyExpander(key_size)
|
||||
test_expanded_key = test_key_expander.expand(test_data.test_key[key_size])
|
||||
test_cipher = AESCipher(test_expanded_key)
|
||||
test_result_ciphertext = test_cipher.cipher_block(test_data.test_block_plaintext)
|
||||
self.assertEquals(len([i for i, j in zip(test_result_ciphertext, test_data.test_block_ciphertext_validated[key_size]) if i == j]),
|
||||
16,
|
||||
msg='Test %d bit cipher'%key_size)
|
||||
|
||||
test_result_plaintext = test_cipher.decipher_block(test_data.test_block_ciphertext_validated[key_size])
|
||||
self.assertEquals(len([i for i, j in zip(test_result_plaintext, test_data.test_block_plaintext) if i == j]),
|
||||
16,
|
||||
msg='Test %d bit decipher'%key_size)
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
|
@ -0,0 +1,167 @@
|
|||
"""
|
||||
Instantiate AES tables for rcon,sbox,i_sbox,and galois_lookup.
|
||||
|
||||
Copyright (c) 2010,Adam Newman http://www.caller9.com/
|
||||
Licensed under the MIT license http://www.opensource.org/licenses/mit-license.php
|
||||
"""
|
||||
__author__ = "Adam Newman"
|
||||
|
||||
rcon=(
|
||||
0x8d,0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80,0x1b,0x36,0x6c,0xd8,0xab,0x4d,0x9a,
|
||||
0x2f,0x5e,0xbc,0x63,0xc6,0x97,0x35,0x6a,0xd4,0xb3,0x7d,0xfa,0xef,0xc5,0x91,0x39,
|
||||
0x72,0xe4,0xd3,0xbd,0x61,0xc2,0x9f,0x25,0x4a,0x94,0x33,0x66,0xcc,0x83,0x1d,0x3a,
|
||||
0x74,0xe8,0xcb,0x8d,0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80,0x1b,0x36,0x6c,0xd8,
|
||||
0xab,0x4d,0x9a,0x2f,0x5e,0xbc,0x63,0xc6,0x97,0x35,0x6a,0xd4,0xb3,0x7d,0xfa,0xef,
|
||||
0xc5,0x91,0x39,0x72,0xe4,0xd3,0xbd,0x61,0xc2,0x9f,0x25,0x4a,0x94,0x33,0x66,0xcc,
|
||||
0x83,0x1d,0x3a,0x74,0xe8,0xcb,0x8d,0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80,0x1b,
|
||||
0x36,0x6c,0xd8,0xab,0x4d,0x9a,0x2f,0x5e,0xbc,0x63,0xc6,0x97,0x35,0x6a,0xd4,0xb3,
|
||||
0x7d,0xfa,0xef,0xc5,0x91,0x39,0x72,0xe4,0xd3,0xbd,0x61,0xc2,0x9f,0x25,0x4a,0x94,
|
||||
0x33,0x66,0xcc,0x83,0x1d,0x3a,0x74,0xe8,0xcb,0x8d,0x01,0x02,0x04,0x08,0x10,0x20,
|
||||
0x40,0x80,0x1b,0x36,0x6c,0xd8,0xab,0x4d,0x9a,0x2f,0x5e,0xbc,0x63,0xc6,0x97,0x35,
|
||||
0x6a,0xd4,0xb3,0x7d,0xfa,0xef,0xc5,0x91,0x39,0x72,0xe4,0xd3,0xbd,0x61,0xc2,0x9f,
|
||||
0x25,0x4a,0x94,0x33,0x66,0xcc,0x83,0x1d,0x3a,0x74,0xe8,0xcb,0x8d,0x01,0x02,0x04,
|
||||
0x08,0x10,0x20,0x40,0x80,0x1b,0x36,0x6c,0xd8,0xab,0x4d,0x9a,0x2f,0x5e,0xbc,0x63,
|
||||
0xc6,0x97,0x35,0x6a,0xd4,0xb3,0x7d,0xfa,0xef,0xc5,0x91,0x39,0x72,0xe4,0xd3,0xbd,
|
||||
0x61,0xc2,0x9f,0x25,0x4a,0x94,0x33,0x66,0xcc,0x83,0x1d,0x3a,0x74,0xe8,0xcb)
|
||||
|
||||
sbox=(
|
||||
0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5,0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76,
|
||||
0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0,0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0,
|
||||
0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc,0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15,
|
||||
0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a,0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75,
|
||||
0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0,0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84,
|
||||
0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b,0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf,
|
||||
0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85,0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8,
|
||||
0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5,0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2,
|
||||
0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17,0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73,
|
||||
0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88,0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb,
|
||||
0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c,0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79,
|
||||
0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9,0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08,
|
||||
0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6,0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a,
|
||||
0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e,0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e,
|
||||
0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94,0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf,
|
||||
0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68,0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16)
|
||||
|
||||
i_sbox=(
|
||||
0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38,0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb,
|
||||
0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87,0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb,
|
||||
0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d,0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e,
|
||||
0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2,0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25,
|
||||
0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16,0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92,
|
||||
0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda,0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84,
|
||||
0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a,0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06,
|
||||
0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02,0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b,
|
||||
0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea,0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73,
|
||||
0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85,0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e,
|
||||
0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89,0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b,
|
||||
0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20,0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4,
|
||||
0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31,0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f,
|
||||
0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d,0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef,
|
||||
0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0,0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61,
|
||||
0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26,0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d)
|
||||
|
||||
gal1=tuple(range(256))
|
||||
gal2=(
|
||||
0x00,0x02,0x04,0x06,0x08,0x0a,0x0c,0x0e,0x10,0x12,0x14,0x16,0x18,0x1a,0x1c,0x1e,
|
||||
0x20,0x22,0x24,0x26,0x28,0x2a,0x2c,0x2e,0x30,0x32,0x34,0x36,0x38,0x3a,0x3c,0x3e,
|
||||
0x40,0x42,0x44,0x46,0x48,0x4a,0x4c,0x4e,0x50,0x52,0x54,0x56,0x58,0x5a,0x5c,0x5e,
|
||||
0x60,0x62,0x64,0x66,0x68,0x6a,0x6c,0x6e,0x70,0x72,0x74,0x76,0x78,0x7a,0x7c,0x7e,
|
||||
0x80,0x82,0x84,0x86,0x88,0x8a,0x8c,0x8e,0x90,0x92,0x94,0x96,0x98,0x9a,0x9c,0x9e,
|
||||
0xa0,0xa2,0xa4,0xa6,0xa8,0xaa,0xac,0xae,0xb0,0xb2,0xb4,0xb6,0xb8,0xba,0xbc,0xbe,
|
||||
0xc0,0xc2,0xc4,0xc6,0xc8,0xca,0xcc,0xce,0xd0,0xd2,0xd4,0xd6,0xd8,0xda,0xdc,0xde,
|
||||
0xe0,0xe2,0xe4,0xe6,0xe8,0xea,0xec,0xee,0xf0,0xf2,0xf4,0xf6,0xf8,0xfa,0xfc,0xfe,
|
||||
0x1b,0x19,0x1f,0x1d,0x13,0x11,0x17,0x15,0x0b,0x09,0x0f,0x0d,0x03,0x01,0x07,0x05,
|
||||
0x3b,0x39,0x3f,0x3d,0x33,0x31,0x37,0x35,0x2b,0x29,0x2f,0x2d,0x23,0x21,0x27,0x25,
|
||||
0x5b,0x59,0x5f,0x5d,0x53,0x51,0x57,0x55,0x4b,0x49,0x4f,0x4d,0x43,0x41,0x47,0x45,
|
||||
0x7b,0x79,0x7f,0x7d,0x73,0x71,0x77,0x75,0x6b,0x69,0x6f,0x6d,0x63,0x61,0x67,0x65,
|
||||
0x9b,0x99,0x9f,0x9d,0x93,0x91,0x97,0x95,0x8b,0x89,0x8f,0x8d,0x83,0x81,0x87,0x85,
|
||||
0xbb,0xb9,0xbf,0xbd,0xb3,0xb1,0xb7,0xb5,0xab,0xa9,0xaf,0xad,0xa3,0xa1,0xa7,0xa5,
|
||||
0xdb,0xd9,0xdf,0xdd,0xd3,0xd1,0xd7,0xd5,0xcb,0xc9,0xcf,0xcd,0xc3,0xc1,0xc7,0xc5,
|
||||
0xfb,0xf9,0xff,0xfd,0xf3,0xf1,0xf7,0xf5,0xeb,0xe9,0xef,0xed,0xe3,0xe1,0xe7,0xe5)
|
||||
gal3=(
|
||||
0x00,0x03,0x06,0x05,0x0c,0x0f,0x0a,0x09,0x18,0x1b,0x1e,0x1d,0x14,0x17,0x12,0x11,
|
||||
0x30,0x33,0x36,0x35,0x3c,0x3f,0x3a,0x39,0x28,0x2b,0x2e,0x2d,0x24,0x27,0x22,0x21,
|
||||
0x60,0x63,0x66,0x65,0x6c,0x6f,0x6a,0x69,0x78,0x7b,0x7e,0x7d,0x74,0x77,0x72,0x71,
|
||||
0x50,0x53,0x56,0x55,0x5c,0x5f,0x5a,0x59,0x48,0x4b,0x4e,0x4d,0x44,0x47,0x42,0x41,
|
||||
0xc0,0xc3,0xc6,0xc5,0xcc,0xcf,0xca,0xc9,0xd8,0xdb,0xde,0xdd,0xd4,0xd7,0xd2,0xd1,
|
||||
0xf0,0xf3,0xf6,0xf5,0xfc,0xff,0xfa,0xf9,0xe8,0xeb,0xee,0xed,0xe4,0xe7,0xe2,0xe1,
|
||||
0xa0,0xa3,0xa6,0xa5,0xac,0xaf,0xaa,0xa9,0xb8,0xbb,0xbe,0xbd,0xb4,0xb7,0xb2,0xb1,
|
||||
0x90,0x93,0x96,0x95,0x9c,0x9f,0x9a,0x99,0x88,0x8b,0x8e,0x8d,0x84,0x87,0x82,0x81,
|
||||
0x9b,0x98,0x9d,0x9e,0x97,0x94,0x91,0x92,0x83,0x80,0x85,0x86,0x8f,0x8c,0x89,0x8a,
|
||||
0xab,0xa8,0xad,0xae,0xa7,0xa4,0xa1,0xa2,0xb3,0xb0,0xb5,0xb6,0xbf,0xbc,0xb9,0xba,
|
||||
0xfb,0xf8,0xfd,0xfe,0xf7,0xf4,0xf1,0xf2,0xe3,0xe0,0xe5,0xe6,0xef,0xec,0xe9,0xea,
|
||||
0xcb,0xc8,0xcd,0xce,0xc7,0xc4,0xc1,0xc2,0xd3,0xd0,0xd5,0xd6,0xdf,0xdc,0xd9,0xda,
|
||||
0x5b,0x58,0x5d,0x5e,0x57,0x54,0x51,0x52,0x43,0x40,0x45,0x46,0x4f,0x4c,0x49,0x4a,
|
||||
0x6b,0x68,0x6d,0x6e,0x67,0x64,0x61,0x62,0x73,0x70,0x75,0x76,0x7f,0x7c,0x79,0x7a,
|
||||
0x3b,0x38,0x3d,0x3e,0x37,0x34,0x31,0x32,0x23,0x20,0x25,0x26,0x2f,0x2c,0x29,0x2a,
|
||||
0x0b,0x08,0x0d,0x0e,0x07,0x04,0x01,0x02,0x13,0x10,0x15,0x16,0x1f,0x1c,0x19,0x1a)
|
||||
gal9=(
|
||||
0x00,0x09,0x12,0x1b,0x24,0x2d,0x36,0x3f,0x48,0x41,0x5a,0x53,0x6c,0x65,0x7e,0x77,
|
||||
0x90,0x99,0x82,0x8b,0xb4,0xbd,0xa6,0xaf,0xd8,0xd1,0xca,0xc3,0xfc,0xf5,0xee,0xe7,
|
||||
0x3b,0x32,0x29,0x20,0x1f,0x16,0x0d,0x04,0x73,0x7a,0x61,0x68,0x57,0x5e,0x45,0x4c,
|
||||
0xab,0xa2,0xb9,0xb0,0x8f,0x86,0x9d,0x94,0xe3,0xea,0xf1,0xf8,0xc7,0xce,0xd5,0xdc,
|
||||
0x76,0x7f,0x64,0x6d,0x52,0x5b,0x40,0x49,0x3e,0x37,0x2c,0x25,0x1a,0x13,0x08,0x01,
|
||||
0xe6,0xef,0xf4,0xfd,0xc2,0xcb,0xd0,0xd9,0xae,0xa7,0xbc,0xb5,0x8a,0x83,0x98,0x91,
|
||||
0x4d,0x44,0x5f,0x56,0x69,0x60,0x7b,0x72,0x05,0x0c,0x17,0x1e,0x21,0x28,0x33,0x3a,
|
||||
0xdd,0xd4,0xcf,0xc6,0xf9,0xf0,0xeb,0xe2,0x95,0x9c,0x87,0x8e,0xb1,0xb8,0xa3,0xaa,
|
||||
0xec,0xe5,0xfe,0xf7,0xc8,0xc1,0xda,0xd3,0xa4,0xad,0xb6,0xbf,0x80,0x89,0x92,0x9b,
|
||||
0x7c,0x75,0x6e,0x67,0x58,0x51,0x4a,0x43,0x34,0x3d,0x26,0x2f,0x10,0x19,0x02,0x0b,
|
||||
0xd7,0xde,0xc5,0xcc,0xf3,0xfa,0xe1,0xe8,0x9f,0x96,0x8d,0x84,0xbb,0xb2,0xa9,0xa0,
|
||||
0x47,0x4e,0x55,0x5c,0x63,0x6a,0x71,0x78,0x0f,0x06,0x1d,0x14,0x2b,0x22,0x39,0x30,
|
||||
0x9a,0x93,0x88,0x81,0xbe,0xb7,0xac,0xa5,0xd2,0xdb,0xc0,0xc9,0xf6,0xff,0xe4,0xed,
|
||||
0x0a,0x03,0x18,0x11,0x2e,0x27,0x3c,0x35,0x42,0x4b,0x50,0x59,0x66,0x6f,0x74,0x7d,
|
||||
0xa1,0xa8,0xb3,0xba,0x85,0x8c,0x97,0x9e,0xe9,0xe0,0xfb,0xf2,0xcd,0xc4,0xdf,0xd6,
|
||||
0x31,0x38,0x23,0x2a,0x15,0x1c,0x07,0x0e,0x79,0x70,0x6b,0x62,0x5d,0x54,0x4f,0x46)
|
||||
gal11=(
|
||||
0x00,0x0b,0x16,0x1d,0x2c,0x27,0x3a,0x31,0x58,0x53,0x4e,0x45,0x74,0x7f,0x62,0x69,
|
||||
0xb0,0xbb,0xa6,0xad,0x9c,0x97,0x8a,0x81,0xe8,0xe3,0xfe,0xf5,0xc4,0xcf,0xd2,0xd9,
|
||||
0x7b,0x70,0x6d,0x66,0x57,0x5c,0x41,0x4a,0x23,0x28,0x35,0x3e,0x0f,0x04,0x19,0x12,
|
||||
0xcb,0xc0,0xdd,0xd6,0xe7,0xec,0xf1,0xfa,0x93,0x98,0x85,0x8e,0xbf,0xb4,0xa9,0xa2,
|
||||
0xf6,0xfd,0xe0,0xeb,0xda,0xd1,0xcc,0xc7,0xae,0xa5,0xb8,0xb3,0x82,0x89,0x94,0x9f,
|
||||
0x46,0x4d,0x50,0x5b,0x6a,0x61,0x7c,0x77,0x1e,0x15,0x08,0x03,0x32,0x39,0x24,0x2f,
|
||||
0x8d,0x86,0x9b,0x90,0xa1,0xaa,0xb7,0xbc,0xd5,0xde,0xc3,0xc8,0xf9,0xf2,0xef,0xe4,
|
||||
0x3d,0x36,0x2b,0x20,0x11,0x1a,0x07,0x0c,0x65,0x6e,0x73,0x78,0x49,0x42,0x5f,0x54,
|
||||
0xf7,0xfc,0xe1,0xea,0xdb,0xd0,0xcd,0xc6,0xaf,0xa4,0xb9,0xb2,0x83,0x88,0x95,0x9e,
|
||||
0x47,0x4c,0x51,0x5a,0x6b,0x60,0x7d,0x76,0x1f,0x14,0x09,0x02,0x33,0x38,0x25,0x2e,
|
||||
0x8c,0x87,0x9a,0x91,0xa0,0xab,0xb6,0xbd,0xd4,0xdf,0xc2,0xc9,0xf8,0xf3,0xee,0xe5,
|
||||
0x3c,0x37,0x2a,0x21,0x10,0x1b,0x06,0x0d,0x64,0x6f,0x72,0x79,0x48,0x43,0x5e,0x55,
|
||||
0x01,0x0a,0x17,0x1c,0x2d,0x26,0x3b,0x30,0x59,0x52,0x4f,0x44,0x75,0x7e,0x63,0x68,
|
||||
0xb1,0xba,0xa7,0xac,0x9d,0x96,0x8b,0x80,0xe9,0xe2,0xff,0xf4,0xc5,0xce,0xd3,0xd8,
|
||||
0x7a,0x71,0x6c,0x67,0x56,0x5d,0x40,0x4b,0x22,0x29,0x34,0x3f,0x0e,0x05,0x18,0x13,
|
||||
0xca,0xc1,0xdc,0xd7,0xe6,0xed,0xf0,0xfb,0x92,0x99,0x84,0x8f,0xbe,0xb5,0xa8,0xa3)
|
||||
gal13=(
|
||||
0x00,0x0d,0x1a,0x17,0x34,0x39,0x2e,0x23,0x68,0x65,0x72,0x7f,0x5c,0x51,0x46,0x4b,
|
||||
0xd0,0xdd,0xca,0xc7,0xe4,0xe9,0xfe,0xf3,0xb8,0xb5,0xa2,0xaf,0x8c,0x81,0x96,0x9b,
|
||||
0xbb,0xb6,0xa1,0xac,0x8f,0x82,0x95,0x98,0xd3,0xde,0xc9,0xc4,0xe7,0xea,0xfd,0xf0,
|
||||
0x6b,0x66,0x71,0x7c,0x5f,0x52,0x45,0x48,0x03,0x0e,0x19,0x14,0x37,0x3a,0x2d,0x20,
|
||||
0x6d,0x60,0x77,0x7a,0x59,0x54,0x43,0x4e,0x05,0x08,0x1f,0x12,0x31,0x3c,0x2b,0x26,
|
||||
0xbd,0xb0,0xa7,0xaa,0x89,0x84,0x93,0x9e,0xd5,0xd8,0xcf,0xc2,0xe1,0xec,0xfb,0xf6,
|
||||
0xd6,0xdb,0xcc,0xc1,0xe2,0xef,0xf8,0xf5,0xbe,0xb3,0xa4,0xa9,0x8a,0x87,0x90,0x9d,
|
||||
0x06,0x0b,0x1c,0x11,0x32,0x3f,0x28,0x25,0x6e,0x63,0x74,0x79,0x5a,0x57,0x40,0x4d,
|
||||
0xda,0xd7,0xc0,0xcd,0xee,0xe3,0xf4,0xf9,0xb2,0xbf,0xa8,0xa5,0x86,0x8b,0x9c,0x91,
|
||||
0x0a,0x07,0x10,0x1d,0x3e,0x33,0x24,0x29,0x62,0x6f,0x78,0x75,0x56,0x5b,0x4c,0x41,
|
||||
0x61,0x6c,0x7b,0x76,0x55,0x58,0x4f,0x42,0x09,0x04,0x13,0x1e,0x3d,0x30,0x27,0x2a,
|
||||
0xb1,0xbc,0xab,0xa6,0x85,0x88,0x9f,0x92,0xd9,0xd4,0xc3,0xce,0xed,0xe0,0xf7,0xfa,
|
||||
0xb7,0xba,0xad,0xa0,0x83,0x8e,0x99,0x94,0xdf,0xd2,0xc5,0xc8,0xeb,0xe6,0xf1,0xfc,
|
||||
0x67,0x6a,0x7d,0x70,0x53,0x5e,0x49,0x44,0x0f,0x02,0x15,0x18,0x3b,0x36,0x21,0x2c,
|
||||
0x0c,0x01,0x16,0x1b,0x38,0x35,0x22,0x2f,0x64,0x69,0x7e,0x73,0x50,0x5d,0x4a,0x47,
|
||||
0xdc,0xd1,0xc6,0xcb,0xe8,0xe5,0xf2,0xff,0xb4,0xb9,0xae,0xa3,0x80,0x8d,0x9a,0x97)
|
||||
gal14=(
|
||||
0x00,0x0e,0x1c,0x12,0x38,0x36,0x24,0x2a,0x70,0x7e,0x6c,0x62,0x48,0x46,0x54,0x5a,
|
||||
0xe0,0xee,0xfc,0xf2,0xd8,0xd6,0xc4,0xca,0x90,0x9e,0x8c,0x82,0xa8,0xa6,0xb4,0xba,
|
||||
0xdb,0xd5,0xc7,0xc9,0xe3,0xed,0xff,0xf1,0xab,0xa5,0xb7,0xb9,0x93,0x9d,0x8f,0x81,
|
||||
0x3b,0x35,0x27,0x29,0x03,0x0d,0x1f,0x11,0x4b,0x45,0x57,0x59,0x73,0x7d,0x6f,0x61,
|
||||
0xad,0xa3,0xb1,0xbf,0x95,0x9b,0x89,0x87,0xdd,0xd3,0xc1,0xcf,0xe5,0xeb,0xf9,0xf7,
|
||||
0x4d,0x43,0x51,0x5f,0x75,0x7b,0x69,0x67,0x3d,0x33,0x21,0x2f,0x05,0x0b,0x19,0x17,
|
||||
0x76,0x78,0x6a,0x64,0x4e,0x40,0x52,0x5c,0x06,0x08,0x1a,0x14,0x3e,0x30,0x22,0x2c,
|
||||
0x96,0x98,0x8a,0x84,0xae,0xa0,0xb2,0xbc,0xe6,0xe8,0xfa,0xf4,0xde,0xd0,0xc2,0xcc,
|
||||
0x41,0x4f,0x5d,0x53,0x79,0x77,0x65,0x6b,0x31,0x3f,0x2d,0x23,0x09,0x07,0x15,0x1b,
|
||||
0xa1,0xaf,0xbd,0xb3,0x99,0x97,0x85,0x8b,0xd1,0xdf,0xcd,0xc3,0xe9,0xe7,0xf5,0xfb,
|
||||
0x9a,0x94,0x86,0x88,0xa2,0xac,0xbe,0xb0,0xea,0xe4,0xf6,0xf8,0xd2,0xdc,0xce,0xc0,
|
||||
0x7a,0x74,0x66,0x68,0x42,0x4c,0x5e,0x50,0x0a,0x04,0x16,0x18,0x32,0x3c,0x2e,0x20,
|
||||
0xec,0xe2,0xf0,0xfe,0xd4,0xda,0xc8,0xc6,0x9c,0x92,0x80,0x8e,0xa4,0xaa,0xb8,0xb6,
|
||||
0x0c,0x02,0x10,0x1e,0x34,0x3a,0x28,0x26,0x7c,0x72,0x60,0x6e,0x44,0x4a,0x58,0x56,
|
||||
0x37,0x39,0x2b,0x25,0x0f,0x01,0x13,0x1d,0x47,0x49,0x5b,0x55,0x7f,0x71,0x63,0x6d,
|
||||
0xd7,0xd9,0xcb,0xc5,0xef,0xe1,0xf3,0xfd,0xa7,0xa9,0xbb,0xb5,0x9f,0x91,0x83,0x8d)
|
||||
galI=gal14,gal11,gal13,gal9
|
||||
galNI=gal2,gal3,gal1,gal1
|
|
@ -0,0 +1,54 @@
|
|||
#!/usr/bin/env python
|
||||
"""
|
||||
CBC Mode of operation
|
||||
|
||||
Running this file as __main__ will result in a self-test of the algorithm.
|
||||
|
||||
Algorithm per NIST SP 800-38A http://csrc.nist.gov/publications/nistpubs/800-38a/sp800-38a.pdf
|
||||
|
||||
Copyright (c) 2010, Adam Newman http://www.caller9.com/
|
||||
Licensed under the MIT license http://www.opensource.org/licenses/mit-license.php
|
||||
"""
|
||||
|
||||
__author__ = "Adam Newman"
|
||||
|
||||
from cipher_mode import CipherMode
|
||||
from mode_test import GeneralTestEncryptionMode
|
||||
|
||||
class CBCMode(CipherMode):
|
||||
"""Perform CBC operation on a block and retain IV information for next operation"""
|
||||
|
||||
name = "CBC"
|
||||
|
||||
def __init__(self, block_cipher, block_size):
|
||||
CipherMode.__init__(self, block_cipher, block_size)
|
||||
|
||||
def encrypt_block(self, plaintext):
|
||||
xor = [i ^ j for i, j in zip(plaintext, self._iv)] + list(self._iv[len(plaintext)::])
|
||||
ciphertext = bytearray(self._block_cipher.cipher_block(xor))
|
||||
self._iv = ciphertext
|
||||
return ciphertext
|
||||
|
||||
def decrypt_block(self, ciphertext):
|
||||
result_decipher = self._block_cipher.decipher_block(list(ciphertext))
|
||||
plaintext = bytearray(i ^ j for i, j in zip(self._iv, result_decipher))
|
||||
self._iv = ciphertext
|
||||
return plaintext
|
||||
|
||||
class TestEncryptionMode(GeneralTestEncryptionMode):
|
||||
def test_mode(self):
|
||||
"""Test CBC Mode Encrypt/Decrypt"""
|
||||
try:
|
||||
from aespython.test_keys import TestKeys
|
||||
except:
|
||||
from test_keys import TestKeys
|
||||
|
||||
test_data = TestKeys()
|
||||
|
||||
test_mode = CBCMode(self.get_keyed_cipher(test_data.test_mode_key), 16)
|
||||
|
||||
self.run_cipher(test_mode, test_data.test_mode_iv, test_data.test_cbc_ciphertext, test_data.test_mode_plaintext)
|
||||
|
||||
if __name__ == "__main__":
|
||||
import unittest
|
||||
unittest.main()
|
|
@ -0,0 +1,53 @@
|
|||
#!/usr/bin/env python
|
||||
"""
|
||||
CFB Mode of operation
|
||||
|
||||
Running this file as __main__ will result in a self-test of the algorithm.
|
||||
|
||||
Algorithm per NIST SP 800-38A http://csrc.nist.gov/publications/nistpubs/800-38a/sp800-38a.pdf
|
||||
|
||||
Copyright (c) 2010, Adam Newman http://www.caller9.com/
|
||||
Licensed under the MIT license http://www.opensource.org/licenses/mit-license.php
|
||||
"""
|
||||
__author__ = "Adam Newman"
|
||||
|
||||
from .cipher_mode import CipherMode
|
||||
from .mode_test import GeneralTestEncryptionMode
|
||||
|
||||
class CFBMode(CipherMode):
|
||||
"""Perform CFB operation on a block and retain IV information for next operation"""
|
||||
|
||||
name = "CFB"
|
||||
|
||||
def __init__(self, block_cipher, block_size):
|
||||
CipherMode.__init__(self, block_cipher, block_size)
|
||||
|
||||
def encrypt_block(self, plaintext):
|
||||
cipher_iv = self._block_cipher.cipher_block(self._iv)
|
||||
ciphertext = [i ^ j for i,j in zip (plaintext, cipher_iv)]
|
||||
self._iv = ciphertext
|
||||
return ciphertext
|
||||
|
||||
def decrypt_block(self, ciphertext):
|
||||
cipher_iv = self._block_cipher.cipher_block(self._iv)
|
||||
plaintext = [i ^ j for i,j in zip (cipher_iv, ciphertext)]
|
||||
self._iv = ciphertext
|
||||
return plaintext
|
||||
|
||||
class TestEncryptionMode(GeneralTestEncryptionMode):
|
||||
def test_mode(self):
|
||||
"""Test CBC Mode Encrypt/Decrypt"""
|
||||
try:
|
||||
from .test_keys import TestKeys
|
||||
except:
|
||||
from test_keys import TestKeys
|
||||
|
||||
test_data = TestKeys()
|
||||
|
||||
test_mode = CFBMode(self.get_keyed_cipher(test_data.test_mode_key), 16)
|
||||
|
||||
self.run_cipher(test_mode, test_data.test_mode_iv, test_data.test_cfb_ciphertext, test_data.test_mode_plaintext)
|
||||
|
||||
if __name__ == "__main__":
|
||||
import unittest
|
||||
unittest.main()
|
|
@ -0,0 +1,37 @@
|
|||
#!/usr/bin/env python
|
||||
"""
|
||||
Cipher Mode of operation
|
||||
|
||||
Running this file as __main__ will result in a self-test of the algorithm.
|
||||
|
||||
Algorithm per NIST SP 800-38A http://csrc.nist.gov/publications/nistpubs/800-38a/sp800-38a.pdf
|
||||
|
||||
Copyright (c) 2010, Adam Newman http://www.caller9.com/
|
||||
Licensed under the MIT license http://www.opensource.org/licenses/mit-license.php
|
||||
"""
|
||||
from ...env.logs import Log
|
||||
|
||||
__author__ = "Adam Newman"
|
||||
|
||||
class CipherMode:
|
||||
"""Perform Cipher operation on a block and retain IV information for next operation"""
|
||||
|
||||
name = "ABSTRACT"
|
||||
|
||||
def __init__(self, block_cipher, block_size):
|
||||
self._block_cipher = block_cipher
|
||||
self._block_size = block_size
|
||||
self._iv = [0] * block_size
|
||||
|
||||
def set_iv(self, iv):
|
||||
if len(iv) != self._block_size:
|
||||
Log.error("iv is wrong size")
|
||||
self._iv = iv
|
||||
|
||||
def encrypt_block(self, plaintext):
|
||||
raise(NotImplementedError, "Abstract function")
|
||||
|
||||
def decrypt_block(self, ciphertext):
|
||||
raise(NotImplementedError, "Abstract function")
|
||||
|
||||
|
|
@ -0,0 +1,144 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
"""
|
||||
AES Key Expansion.
|
||||
|
||||
Expands 128, 192, or 256 bit key for use with AES
|
||||
|
||||
Running this file as __main__ will result in a self-test of the algorithm.
|
||||
|
||||
Algorithm per NIST FIPS-197 http://csrc.nist.gov/publications/fips/fips197/fips-197.pdf
|
||||
|
||||
Copyright (c) 2010, Adam Newman http://www.caller9.com/
|
||||
Licensed under the MIT license http://www.opensource.org/licenses/mit-license.php
|
||||
"""
|
||||
__author__ = "Adam Newman"
|
||||
|
||||
#Normally use relative import. In test mode use local import.
|
||||
try:
|
||||
from . import aes_tables
|
||||
except ValueError:
|
||||
import aes_tables
|
||||
|
||||
class KeyExpander:
|
||||
"""Perform AES Key Expansion"""
|
||||
|
||||
_expanded_key_length = {128 : 176, 192 : 208, 256 : 240}
|
||||
|
||||
def __init__(self, key_length):
|
||||
self._key_length = key_length
|
||||
self._n = int(key_length / 8)
|
||||
|
||||
if key_length in self._expanded_key_length:
|
||||
self._b = self._expanded_key_length[key_length]
|
||||
else:
|
||||
raise LookupError('Invalid Key Size')
|
||||
|
||||
def _core(self, key_array, iteration):
|
||||
if len(key_array) != 4:
|
||||
raise RuntimeError('_core(): key segment size invalid')
|
||||
|
||||
#Append the list of elements 1-3 and list comprised of element 0 (circular rotate left)
|
||||
#For each element of this new list, put the result of sbox into output array.
|
||||
#I was torn on readability vs pythonicity. This also may be faster.
|
||||
output = [aes_tables.sbox[i] for i in key_array[1:] + key_array[:1]]
|
||||
|
||||
#First byte of output array is XORed with rcon(iteration)
|
||||
output[0] = output[0] ^ aes_tables.rcon[iteration]
|
||||
|
||||
return output
|
||||
|
||||
def _xor_list(self, list_1, list_2):
|
||||
return [ i ^ j for i,j in zip(list_1, list_2)]
|
||||
|
||||
def expand(self, key_array):
|
||||
"""
|
||||
Expand the encryption key per AES key schedule specifications
|
||||
|
||||
http://en.wikipedia.org/wiki/Rijndael_key_schedule#Key_schedule_description
|
||||
"""
|
||||
|
||||
if len(key_array) != self._n:
|
||||
raise RuntimeError('expand(): key size ' + str(len(key_array)) + ' is invalid')
|
||||
|
||||
#First n bytes are copied from key. Copy prevents inplace modification of original key
|
||||
new_key = list(key_array)
|
||||
|
||||
rcon_iteration = 1
|
||||
len_new_key = len(new_key)
|
||||
|
||||
#There are several parts of the code below that could be done with tidy list comprehensions like
|
||||
#the one I put in _core, but I left this alone for readability.
|
||||
|
||||
#Grow the key until it is the correct length
|
||||
while len_new_key < self._b:
|
||||
|
||||
#Copy last 4 bytes of extended key, apply _core function order i, increment i(rcon_iteration),
|
||||
#xor with 4 bytes n bytes from end of extended key
|
||||
t = new_key[-4:]
|
||||
t = self._core(t, rcon_iteration)
|
||||
rcon_iteration += 1
|
||||
t = self._xor_list(t, new_key[-self._n : -self._n + 4])# self._n_bytes_before(len_new_key, new_key))
|
||||
new_key.extend(t)
|
||||
len_new_key += 4
|
||||
|
||||
#Run three passes of 4 byte expansion using copy of 4 byte tail of extended key
|
||||
#which is then xor'd with 4 bytes n bytes from end of extended key
|
||||
for j in range(3):
|
||||
t = new_key[-4:]
|
||||
t = self._xor_list(t, new_key[-self._n : -self._n + 4])
|
||||
new_key.extend(t)
|
||||
len_new_key += 4
|
||||
|
||||
#If key length is 256 and key is not complete, add 4 bytes tail of extended key
|
||||
#run through sbox before xor with 4 bytes n bytes from end of extended key
|
||||
if self._key_length == 256 and len_new_key < self._b:
|
||||
t = new_key[-4:]
|
||||
t2=[]
|
||||
for x in t:
|
||||
t2.append(aes_tables.sbox[x])
|
||||
t = self._xor_list(t2, new_key[-self._n : -self._n + 4])
|
||||
new_key.extend(t)
|
||||
len_new_key += 4
|
||||
|
||||
#If key length is 192 or 256 and key is not complete, run 2 or 3 passes respectively
|
||||
#of 4 byte tail of extended key xor with 4 bytes n bytes from end of extended key
|
||||
if self._key_length != 128 and len_new_key < self._b:
|
||||
if self._key_length == 192:
|
||||
r = range(2)
|
||||
else:
|
||||
r = range(3)
|
||||
|
||||
for j in r:
|
||||
t = new_key[-4:]
|
||||
t = self._xor_list(t, new_key[-self._n : -self._n + 4])
|
||||
new_key.extend(t)
|
||||
len_new_key += 4
|
||||
|
||||
return new_key
|
||||
|
||||
import unittest
|
||||
class TestKeyExpander(unittest.TestCase):
|
||||
|
||||
def test_keys(self):
|
||||
"""Test All Key Expansions"""
|
||||
try:
|
||||
from . import test_keys
|
||||
except:
|
||||
import test_keys
|
||||
|
||||
test_data = test_keys.TestKeys()
|
||||
|
||||
for key_size in [128, 192, 256]:
|
||||
test_expander = KeyExpander(key_size)
|
||||
test_expanded_key = test_expander.expand(test_data.test_key[key_size])
|
||||
self.assertEqual (len([i for i, j in zip(test_expanded_key, test_data.test_expanded_key_validated[key_size]) if i == j]),
|
||||
len(test_data.test_expanded_key_validated[key_size]),
|
||||
msg='Key expansion ' + str(key_size) + ' bit')
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,37 @@
|
|||
"""
|
||||
Cipher Mode of operation
|
||||
|
||||
Abstract encryption mode test harness.
|
||||
"""
|
||||
from .key_expander import KeyExpander
|
||||
from .aes_cipher import AESCipher
|
||||
|
||||
import unittest
|
||||
class GeneralTestEncryptionMode(unittest.TestCase):
|
||||
def get_keyed_cipher(self, key):
|
||||
|
||||
|
||||
test_expander = KeyExpander(256)
|
||||
test_expanded_key = test_expander.expand(key)
|
||||
|
||||
return AESCipher(test_expanded_key)
|
||||
|
||||
def run_cipher(self, cipher_mode, iv, ciphertext_list, plaintext_list):
|
||||
"""Given an cipher mode, test key, and test iv, use known ciphertext, plaintext to test algorithm"""
|
||||
|
||||
cipher_mode.set_iv(iv)
|
||||
for k in range(len(ciphertext_list)):
|
||||
self.assertEquals(len([i for i, j in zip(ciphertext_list[k],cipher_mode.encrypt_block(plaintext_list[k])) if i == j]),
|
||||
16,
|
||||
msg=cipher_mode.name + ' encrypt test block' + str(k))
|
||||
|
||||
cipher_mode.set_iv(iv)
|
||||
for k in range(len(plaintext_list)):
|
||||
self.assertEquals(len([i for i, j in zip(plaintext_list[k],cipher_mode.decrypt_block(ciphertext_list[k])) if i == j]),
|
||||
16,
|
||||
msg=cipher_mode.name + ' decrypt test block' + str(k))
|
||||
|
||||
def test_mode(self):
|
||||
"""Abstract Test Harness for Encrypt/Decrypt"""
|
||||
pass
|
||||
|
|
@ -0,0 +1,59 @@
|
|||
#!/usr/bin/env python
|
||||
"""
|
||||
OFB Mode of operation
|
||||
|
||||
Running this file as __main__ will result in a self-test of the algorithm.
|
||||
|
||||
Algorithm per NIST SP 800-38A http://csrc.nist.gov/publications/nistpubs/800-38a/sp800-38a.pdf
|
||||
|
||||
Copyright (c) 2010, Adam Newman http://www.caller9.com/
|
||||
Licensed under the MIT license http://www.opensource.org/licenses/mit-license.php
|
||||
"""
|
||||
__author__ = "Adam Newman"
|
||||
|
||||
from .cipher_mode import CipherMode
|
||||
from .mode_test import GeneralTestEncryptionMode
|
||||
|
||||
class OFBMode(CipherMode):
|
||||
"""Perform OFB operation on a block and retain IV information for next operation"""
|
||||
|
||||
name = "OFB"
|
||||
|
||||
def __init__(self, block_cipher, block_size):
|
||||
self._block_cipher = block_cipher
|
||||
self._block_size = block_size
|
||||
self._iv = [0] * block_size
|
||||
|
||||
def set_iv(self, iv):
|
||||
if len(iv) == self._block_size:
|
||||
self._iv = iv
|
||||
|
||||
def encrypt_block(self, plaintext):
|
||||
cipher_iv = self._block_cipher.cipher_block(self._iv)
|
||||
ciphertext = [i ^ j for i,j in zip (plaintext, cipher_iv)]
|
||||
self._iv = cipher_iv
|
||||
return ciphertext
|
||||
|
||||
def decrypt_block(self, ciphertext):
|
||||
cipher_iv = self._block_cipher.cipher_block(self._iv)
|
||||
plaintext = [i ^ j for i,j in zip (cipher_iv, ciphertext)]
|
||||
self._iv = cipher_iv
|
||||
return plaintext
|
||||
|
||||
class TestEncryptionMode(GeneralTestEncryptionMode):
|
||||
def test_mode(self):
|
||||
"""Test OFB Mode Encrypt/Decrypt"""
|
||||
try:
|
||||
from aespython.test_keys import TestKeys
|
||||
except:
|
||||
from test_keys import TestKeys
|
||||
|
||||
test_data = TestKeys()
|
||||
|
||||
test_mode = OFBMode(self.get_keyed_cipher(test_data.test_mode_key), 16)
|
||||
|
||||
self.run_cipher(test_mode, test_data.test_mode_iv, test_data.test_ofb_ciphertext, test_data.test_mode_plaintext)
|
||||
|
||||
if __name__ == "__main__":
|
||||
import unittest
|
||||
unittest.main()
|
|
@ -0,0 +1,119 @@
|
|||
"""
|
||||
Test keys and data for self-test operations.
|
||||
|
||||
Test data from:
|
||||
NIST SP 800-38A http://csrc.nist.gov/publications/nistpubs/800-38a/sp800-38a.pdf
|
||||
NIST FIPS-197 http://csrc.nist.gov/publications/fips/fips197/fips-197.pdf
|
||||
|
||||
Copyright (c) 2010, Adam Newman http://www.caller9.com/
|
||||
Licensed under the MIT license http://www.opensource.org/licenses/mit-license.php
|
||||
"""
|
||||
__author__ = "Adam Newman"
|
||||
|
||||
class TestKeys:
|
||||
"""Test data, keys, IVs, and output to use in self-tests"""
|
||||
test_key = {
|
||||
128 : [
|
||||
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f]
|
||||
, 192 : [
|
||||
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
|
||||
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17]
|
||||
, 256 : [
|
||||
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
|
||||
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f]
|
||||
}
|
||||
|
||||
test_expanded_key_validated = {
|
||||
128 : [
|
||||
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
|
||||
0xd6, 0xaa, 0x74, 0xfd, 0xd2, 0xaf, 0x72, 0xfa, 0xda, 0xa6, 0x78, 0xf1, 0xd6, 0xab, 0x76, 0xfe,
|
||||
0xb6, 0x92, 0xcf, 0x0b, 0x64, 0x3d, 0xbd, 0xf1, 0xbe, 0x9b, 0xc5, 0x00, 0x68, 0x30, 0xb3, 0xfe,
|
||||
0xb6, 0xff, 0x74, 0x4e, 0xd2, 0xc2, 0xc9, 0xbf, 0x6c, 0x59, 0x0c, 0xbf, 0x04, 0x69, 0xbf, 0x41,
|
||||
0x47, 0xf7, 0xf7, 0xbc, 0x95, 0x35, 0x3e, 0x03, 0xf9, 0x6c, 0x32, 0xbc, 0xfd, 0x05, 0x8d, 0xfd,
|
||||
0x3c, 0xaa, 0xa3, 0xe8, 0xa9, 0x9f, 0x9d, 0xeb, 0x50, 0xf3, 0xaf, 0x57, 0xad, 0xf6, 0x22, 0xaa,
|
||||
0x5e, 0x39, 0x0f, 0x7d, 0xf7, 0xa6, 0x92, 0x96, 0xa7, 0x55, 0x3d, 0xc1, 0x0a, 0xa3, 0x1f, 0x6b,
|
||||
0x14, 0xf9, 0x70, 0x1a, 0xe3, 0x5f, 0xe2, 0x8c, 0x44, 0x0a, 0xdf, 0x4d, 0x4e, 0xa9, 0xc0, 0x26,
|
||||
0x47, 0x43, 0x87, 0x35, 0xa4, 0x1c, 0x65, 0xb9, 0xe0, 0x16, 0xba, 0xf4, 0xae, 0xbf, 0x7a, 0xd2,
|
||||
0x54, 0x99, 0x32, 0xd1, 0xf0, 0x85, 0x57, 0x68, 0x10, 0x93, 0xed, 0x9c, 0xbe, 0x2c, 0x97, 0x4e,
|
||||
0x13, 0x11, 0x1d, 0x7f, 0xe3, 0x94, 0x4a, 0x17, 0xf3, 0x07, 0xa7, 0x8b, 0x4d, 0x2b, 0x30, 0xc5]
|
||||
, 192 : [
|
||||
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
|
||||
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x58, 0x46, 0xf2, 0xf9, 0x5c, 0x43, 0xf4, 0xfe,
|
||||
0x54, 0x4a, 0xfe, 0xf5, 0x58, 0x47, 0xf0, 0xfa, 0x48, 0x56, 0xe2, 0xe9, 0x5c, 0x43, 0xf4, 0xfe,
|
||||
0x40, 0xf9, 0x49, 0xb3, 0x1c, 0xba, 0xbd, 0x4d, 0x48, 0xf0, 0x43, 0xb8, 0x10, 0xb7, 0xb3, 0x42,
|
||||
0x58, 0xe1, 0x51, 0xab, 0x04, 0xa2, 0xa5, 0x55, 0x7e, 0xff, 0xb5, 0x41, 0x62, 0x45, 0x08, 0x0c,
|
||||
0x2a, 0xb5, 0x4b, 0xb4, 0x3a, 0x02, 0xf8, 0xf6, 0x62, 0xe3, 0xa9, 0x5d, 0x66, 0x41, 0x0c, 0x08,
|
||||
0xf5, 0x01, 0x85, 0x72, 0x97, 0x44, 0x8d, 0x7e, 0xbd, 0xf1, 0xc6, 0xca, 0x87, 0xf3, 0x3e, 0x3c,
|
||||
0xe5, 0x10, 0x97, 0x61, 0x83, 0x51, 0x9b, 0x69, 0x34, 0x15, 0x7c, 0x9e, 0xa3, 0x51, 0xf1, 0xe0,
|
||||
0x1e, 0xa0, 0x37, 0x2a, 0x99, 0x53, 0x09, 0x16, 0x7c, 0x43, 0x9e, 0x77, 0xff, 0x12, 0x05, 0x1e,
|
||||
0xdd, 0x7e, 0x0e, 0x88, 0x7e, 0x2f, 0xff, 0x68, 0x60, 0x8f, 0xc8, 0x42, 0xf9, 0xdc, 0xc1, 0x54,
|
||||
0x85, 0x9f, 0x5f, 0x23, 0x7a, 0x8d, 0x5a, 0x3d, 0xc0, 0xc0, 0x29, 0x52, 0xbe, 0xef, 0xd6, 0x3a,
|
||||
0xde, 0x60, 0x1e, 0x78, 0x27, 0xbc, 0xdf, 0x2c, 0xa2, 0x23, 0x80, 0x0f, 0xd8, 0xae, 0xda, 0x32,
|
||||
0xa4, 0x97, 0x0a, 0x33, 0x1a, 0x78, 0xdc, 0x09, 0xc4, 0x18, 0xc2, 0x71, 0xe3, 0xa4, 0x1d, 0x5d]
|
||||
, 256 : [
|
||||
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
|
||||
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
|
||||
0xa5, 0x73, 0xc2, 0x9f, 0xa1, 0x76, 0xc4, 0x98, 0xa9, 0x7f, 0xce, 0x93, 0xa5, 0x72, 0xc0, 0x9c,
|
||||
0x16, 0x51, 0xa8, 0xcd, 0x02, 0x44, 0xbe, 0xda, 0x1a, 0x5d, 0xa4, 0xc1, 0x06, 0x40, 0xba, 0xde,
|
||||
0xae, 0x87, 0xdf, 0xf0, 0x0f, 0xf1, 0x1b, 0x68, 0xa6, 0x8e, 0xd5, 0xfb, 0x03, 0xfc, 0x15, 0x67,
|
||||
0x6d, 0xe1, 0xf1, 0x48, 0x6f, 0xa5, 0x4f, 0x92, 0x75, 0xf8, 0xeb, 0x53, 0x73, 0xb8, 0x51, 0x8d,
|
||||
0xc6, 0x56, 0x82, 0x7f, 0xc9, 0xa7, 0x99, 0x17, 0x6f, 0x29, 0x4c, 0xec, 0x6c, 0xd5, 0x59, 0x8b,
|
||||
0x3d, 0xe2, 0x3a, 0x75, 0x52, 0x47, 0x75, 0xe7, 0x27, 0xbf, 0x9e, 0xb4, 0x54, 0x07, 0xcf, 0x39,
|
||||
0x0b, 0xdc, 0x90, 0x5f, 0xc2, 0x7b, 0x09, 0x48, 0xad, 0x52, 0x45, 0xa4, 0xc1, 0x87, 0x1c, 0x2f,
|
||||
0x45, 0xf5, 0xa6, 0x60, 0x17, 0xb2, 0xd3, 0x87, 0x30, 0x0d, 0x4d, 0x33, 0x64, 0x0a, 0x82, 0x0a,
|
||||
0x7c, 0xcf, 0xf7, 0x1c, 0xbe, 0xb4, 0xfe, 0x54, 0x13, 0xe6, 0xbb, 0xf0, 0xd2, 0x61, 0xa7, 0xdf,
|
||||
0xf0, 0x1a, 0xfa, 0xfe, 0xe7, 0xa8, 0x29, 0x79, 0xd7, 0xa5, 0x64, 0x4a, 0xb3, 0xaf, 0xe6, 0x40,
|
||||
0x25, 0x41, 0xfe, 0x71, 0x9b, 0xf5, 0x00, 0x25, 0x88, 0x13, 0xbb, 0xd5, 0x5a, 0x72, 0x1c, 0x0a,
|
||||
0x4e, 0x5a, 0x66, 0x99, 0xa9, 0xf2, 0x4f, 0xe0, 0x7e, 0x57, 0x2b, 0xaa, 0xcd, 0xf8, 0xcd, 0xea,
|
||||
0x24, 0xfc, 0x79, 0xcc, 0xbf, 0x09, 0x79, 0xe9, 0x37, 0x1a, 0xc2, 0x3c, 0x6d, 0x68, 0xde, 0x36]
|
||||
}
|
||||
|
||||
test_block_ciphertext_validated = {
|
||||
128 : [
|
||||
0x69, 0xc4, 0xe0, 0xd8, 0x6a, 0x7b, 0x04, 0x30, 0xd8, 0xcd, 0xb7, 0x80, 0x70, 0xb4, 0xc5, 0x5a]
|
||||
, 192 : [
|
||||
0xdd, 0xa9, 0x7c, 0xa4, 0x86, 0x4c, 0xdf, 0xe0, 0x6e, 0xaf, 0x70, 0xa0, 0xec, 0x0d, 0x71, 0x91]
|
||||
, 256 : [
|
||||
0x8e, 0xa2, 0xb7, 0xca, 0x51, 0x67, 0x45, 0xbf, 0xea, 0xfc, 0x49, 0x90, 0x4b, 0x49, 0x60, 0x89]
|
||||
}
|
||||
|
||||
test_block_plaintext = [
|
||||
0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff]
|
||||
|
||||
#After initial validation, these deviated from test in SP 800-38A to use same key, iv, and plaintext on tests.
|
||||
#Still valid, just easier to test with.
|
||||
test_mode_key= [
|
||||
0x60, 0x3d, 0xeb, 0x10, 0x15, 0xca, 0x71, 0xbe, 0x2b, 0x73, 0xae, 0xf0, 0x85, 0x7d, 0x77, 0x81,
|
||||
0x1f, 0x35, 0x2c, 0x07, 0x3b, 0x61, 0x08, 0xd7, 0x2d, 0x98, 0x10, 0xa3, 0x09, 0x14, 0xdf, 0xf4]
|
||||
test_mode_iv = [
|
||||
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f]
|
||||
test_mode_plaintext = [
|
||||
[0x6b, 0xc1, 0xbe, 0xe2, 0x2e, 0x40, 0x9f, 0x96, 0xe9, 0x3d, 0x7e, 0x11, 0x73, 0x93, 0x17, 0x2a],
|
||||
[0xae, 0x2d, 0x8a, 0x57, 0x1e, 0x03, 0xac, 0x9c, 0x9e, 0xb7, 0x6f, 0xac, 0x45, 0xaf, 0x8e, 0x51],
|
||||
[0x30, 0xc8, 0x1c, 0x46, 0xa3, 0x5c, 0xe4, 0x11, 0xe5, 0xfb, 0xc1, 0x19, 0x1a, 0x0a, 0x52, 0xef],
|
||||
[0xf6, 0x9f, 0x24, 0x45, 0xdf, 0x4f, 0x9b, 0x17, 0xad, 0x2b, 0x41, 0x7b, 0xe6, 0x6c, 0x37, 0x10]]
|
||||
test_cbc_ciphertext = [
|
||||
[0xf5, 0x8c, 0x4c, 0x04, 0xd6, 0xe5, 0xf1, 0xba, 0x77, 0x9e, 0xab, 0xfb, 0x5f, 0x7b, 0xfb, 0xd6],
|
||||
[0x9c, 0xfc, 0x4e, 0x96, 0x7e, 0xdb, 0x80, 0x8d, 0x67, 0x9f, 0x77, 0x7b, 0xc6, 0x70, 0x2c, 0x7d],
|
||||
[0x39, 0xf2, 0x33, 0x69, 0xa9, 0xd9, 0xba, 0xcf, 0xa5, 0x30, 0xe2, 0x63, 0x04, 0x23, 0x14, 0x61],
|
||||
[0xb2, 0xeb, 0x05, 0xe2, 0xc3, 0x9b, 0xe9, 0xfc, 0xda, 0x6c, 0x19, 0x07, 0x8c, 0x6a, 0x9d, 0x1b]]
|
||||
test_cfb_ciphertext = [
|
||||
[0xdc, 0x7e, 0x84, 0xbf, 0xda, 0x79, 0x16, 0x4b, 0x7e, 0xcd, 0x84, 0x86, 0x98, 0x5d, 0x38, 0x60],
|
||||
[0x39, 0xff, 0xed, 0x14, 0x3b, 0x28, 0xb1, 0xc8, 0x32, 0x11, 0x3c, 0x63, 0x31, 0xe5, 0x40, 0x7b],
|
||||
[0xdf, 0x10, 0x13, 0x24, 0x15, 0xe5, 0x4b, 0x92, 0xa1, 0x3e, 0xd0, 0xa8, 0x26, 0x7a, 0xe2, 0xf9],
|
||||
[0x75, 0xa3, 0x85, 0x74, 0x1a, 0xb9, 0xce, 0xf8, 0x20, 0x31, 0x62, 0x3d, 0x55, 0xb1, 0xe4, 0x71]]
|
||||
test_ofb_ciphertext = [
|
||||
[0xdc, 0x7e, 0x84, 0xbf, 0xda, 0x79, 0x16, 0x4b, 0x7e, 0xcd, 0x84, 0x86, 0x98, 0x5d, 0x38, 0x60],
|
||||
[0x4f, 0xeb, 0xdc, 0x67, 0x40, 0xd2, 0x0b, 0x3a, 0xc8, 0x8f, 0x6a, 0xd8, 0x2a, 0x4f, 0xb0, 0x8d],
|
||||
[0x71, 0xab, 0x47, 0xa0, 0x86, 0xe8, 0x6e, 0xed, 0xf3, 0x9d, 0x1c, 0x5b, 0xba, 0x97, 0xc4, 0x08],
|
||||
[0x01, 0x26, 0x14, 0x1d, 0x67, 0xf3, 0x7b, 0xe8, 0x53, 0x8f, 0x5a, 0x8b, 0xe7, 0x40, 0xe4, 0x84]]
|
||||
|
||||
def hex_output(self, list):
|
||||
#Debugging output helper
|
||||
result = '['
|
||||
for i in list[:-1]:
|
||||
result += hex(i) + ','
|
||||
return result + hex(list[-1]) + ']'
|
||||
|
||||
|
||||
|
|
@ -0,0 +1 @@
|
|||
snagged from http://www.nmr.mgh.harvard.edu/Neural_Systems_Group/gary/python.html
|
|
@ -0,0 +1,2 @@
|
|||
from . import stats
|
||||
|
|
@ -0,0 +1,626 @@
|
|||
##
|
||||
## glplot.py ... combines OpenGL and wxPython to produce quick-and-dirty, zoomable line-plots
|
||||
##
|
||||
## Copyright (c) Gary Strangman, All Rights Reserved
|
||||
## This software is provided AS-IS. Improvements are welcome. strang@nmr.mgh.harvard.edu
|
||||
##
|
||||
## NOTE: left button and drag creates a zoom box, right button returns to full-plot view
|
||||
##
|
||||
## Requires PyOpenGL, Numeric, and wxPython, and Python 2.2+
|
||||
## Tested on Linux and Windoze platforms. Does what I need it to do on both.
|
||||
##
|
||||
|
||||
try:
|
||||
import im # im module only required to save the generated bitmaps
|
||||
except:
|
||||
pass
|
||||
|
||||
import glob, os, sys, string
|
||||
import Numeric as N
|
||||
from wxPython.wx import *
|
||||
|
||||
|
||||
from OpenGL.GL import *
|
||||
from OpenGL.GLU import *
|
||||
from OpenGL import GLUT
|
||||
from wxPython.wx import *
|
||||
from wxPython.glcanvas import *
|
||||
from Numeric import *
|
||||
import math, os, sys
|
||||
|
||||
glplotcolors = [(0.,0.,1.), # blue
|
||||
(0.,1.,0.), # green
|
||||
(1.,0.,0.), # red
|
||||
(0.,1.,1.), # cyan
|
||||
(1.,0.,1.), # magenta
|
||||
(1.,1.,0.)] # yellow
|
||||
|
||||
|
||||
#---------------------------------------------------------------------------------------
|
||||
|
||||
class RawOpengl(wxGLCanvas):
|
||||
def __init__(self, parent,*args,**kw):
|
||||
apply(wxGLCanvas.__init__,(self,parent),kw)
|
||||
EVT_SIZE(self,self.wxSize)
|
||||
EVT_PAINT(self,self.wxPaint)
|
||||
EVT_ERASE_BACKGROUND(self, self.wxEraseBackground)
|
||||
|
||||
def wxSize(self, event):
|
||||
### Size callback
|
||||
size = self.GetClientSize()
|
||||
if self.GetContext():
|
||||
self.SetCurrent()
|
||||
glViewport(0, 0, size.width, size.height)
|
||||
|
||||
def wxEraseBackground(self, event):
|
||||
pass # Do nothing, to avoid flashing.
|
||||
|
||||
def wxPaint(self,*dummy):
|
||||
dc = wxPaintDC(self)
|
||||
self.wxRedraw(None)
|
||||
|
||||
def wxRedraw(self, *dummy):
|
||||
### Capture rendering context
|
||||
dc = wxClientDC(self)
|
||||
# dc = wxPaintDC(self)
|
||||
self.SetCurrent()
|
||||
|
||||
_mode = glGetDouble(GL_MATRIX_MODE)
|
||||
glMatrixMode(GL_PROJECTION)
|
||||
|
||||
glPushMatrix()
|
||||
self.redraw()
|
||||
glFlush()
|
||||
glPopMatrix()
|
||||
|
||||
### Swap buffers
|
||||
self.SwapBuffers()
|
||||
|
||||
glMatrixMode(_mode)
|
||||
|
||||
def wxExpose(self, *dummy):
|
||||
self.wxRedraw()
|
||||
|
||||
|
||||
#---------------------------------------------------------------------------------------
|
||||
class OpenglMultiLinePlot(RawOpengl):
|
||||
"""
|
||||
A class for drawing line plots on an openGL canvas.
|
||||
"""
|
||||
def __init__(self, parent=None, autospin_allowed=1, xs=None, ys=None, errors=None, **kw):
|
||||
apply(RawOpengl.__init__, (self, parent), kw)
|
||||
self.parent = parent
|
||||
if ys is None:
|
||||
self.ys = None
|
||||
self.xs = None
|
||||
else: # len(ys.shape) == 1:
|
||||
self.set_ys(ys)
|
||||
self.set_xs(xs)
|
||||
self.errors = errors
|
||||
self.arrow = 0
|
||||
self.font = GLUT.GLUT_BITMAP_HELVETICA_12
|
||||
# self.font = WGL.
|
||||
# self.font = GLTTwxFont.GLTTFont('arialbd',9)
|
||||
self.parent = parent
|
||||
self.drawcount = 0
|
||||
self.redraw = self.paintit
|
||||
self.xscale = 1
|
||||
self.yscale = 1
|
||||
self.lineweight = 1.0
|
||||
self.bkgdcolor = [0., 0., 0., 0.]
|
||||
self.settingbackground = 0
|
||||
self.xminusflag = 0
|
||||
self.yminusflag = 0
|
||||
self.box = None
|
||||
self.dataxmin = min(ravel(self.xs))
|
||||
self.dataymin = min(ravel(self.ys))
|
||||
self.dataxmax = max(ravel(self.xs))
|
||||
self.dataymax = max(ravel(self.ys))
|
||||
self.plotxmin = self.dataxmin
|
||||
self.plotymin = self.dataymin
|
||||
self.plotxmax = self.dataxmax
|
||||
self.plotymax = self.dataymax
|
||||
EVT_MOUSE_EVENTS(self, self.OnMouseEvent)
|
||||
EVT_CHAR(self,self.OnChar)
|
||||
|
||||
# def wxPaint(self,*dummy):
|
||||
# dc = wxPaintDC(self)
|
||||
# self.paintit()
|
||||
|
||||
def OnChar(self, event):
|
||||
# print event.KeyCode()
|
||||
if event.KeyCode() < 256:
|
||||
key = string.upper(chr(event.KeyCode()))
|
||||
if key == 'L':
|
||||
popup = wxFileDialog(NULL, "Choose LOG filename ...", "",
|
||||
"", "*", wxSAVE, wxPoint(100,100))
|
||||
popup.ShowModal()
|
||||
# @@@need to make "enter" default to Save, somehow
|
||||
a = glReadPixels(0,0,self.GetSize().x,self.GetSize().y,GL_RGB,GL_UNSIGNED_BYTE)
|
||||
size = self.GetClientSizeTuple()
|
||||
a = array(size,Int).tostring() + a
|
||||
f=open(popup.GetFilename(),'wb')
|
||||
f.write(a)
|
||||
f.close()
|
||||
|
||||
def OnMouseEvent(self,event):
|
||||
size = self.GetSize()
|
||||
# determine where (in proportions) on screen the click happened
|
||||
xr = float(event.GetX())/size.x # GetX=0 at left
|
||||
yr = float(event.GetY())/size.y # GetY=0 at top
|
||||
# scale this location to where WITHIN THE PLOT the click happened (in proportions)
|
||||
# ... with 0,0 at lower left of PLOT area
|
||||
xrs = (xr-(1-self.xscale)/2.)/float(self.xscale) # scale to the plot area
|
||||
yrs = 1-(yr-(1-self.yscale)/2.)/float(self.yscale) # invert Y and scale to plot area
|
||||
if event.LeftDown():
|
||||
self.xminusflag = 0 #was selection box dragged LEFT?
|
||||
self.yminusflag = 0 #was selection box dragged UP?
|
||||
self.box = [(xrs*(self.plotxmax-self.plotxmin)+self.plotxmin),
|
||||
(yrs*(self.plotymax-self.plotymin)+self.plotymin)]
|
||||
self.xstart = xr
|
||||
self.ystart = yr
|
||||
elif self.box and event.LeftIsDown() and not event.LeftDown():
|
||||
# compute position of other box-corner within plot
|
||||
nxrs = (xrs*(self.plotxmax-self.plotxmin)+self.plotxmin)
|
||||
nyrs = (yrs*(self.plotymax-self.plotymin)+self.plotymin)
|
||||
if nxrs < self.box[0]:
|
||||
self.xminusflag = 1
|
||||
else:
|
||||
self.xminusflag = 0
|
||||
if nyrs < self.box[1]:
|
||||
self.yminusflag = 1
|
||||
else:
|
||||
self.yminusflag = 0
|
||||
if self.box[0]<>nxrs or self.box[1]<>nyrs:
|
||||
self.box = [self.box[0], self.box[1], nxrs, nyrs]
|
||||
else: # may need to convert a 4-element box to a 2-element box
|
||||
self.box = [nxrs, nyrs]
|
||||
self.xend = xr
|
||||
self.yend = yr
|
||||
self.paintit()
|
||||
elif event.LeftUp():
|
||||
if len(self.box)>2:
|
||||
# if dragged up or left, exchange value-pairs
|
||||
if self.box[0] > self.box[2]:
|
||||
self.box[0],self.box[2] = self.box[2],self.box[0]
|
||||
if self.box[1] > self.box[3]:
|
||||
self.box[1],self.box[3] = self.box[3],self.box[1]
|
||||
self.plotxmin = self.box[0]
|
||||
self.plotymin = self.box[1]
|
||||
self.plotxmax = self.box[2]
|
||||
self.plotymax = self.box[3]
|
||||
self.xminusflag = 0
|
||||
self.yminusflag = 0
|
||||
self.box = None
|
||||
self.paintit() # can't use wxRedraw for some reason
|
||||
if event.RightUp():
|
||||
self.plotxmin = self.dataxmin
|
||||
self.plotymin = self.dataymin
|
||||
self.plotxmax = self.dataxmax
|
||||
self.plotymax = self.dataymax
|
||||
self.box = None
|
||||
self.paintit() # can't use wxRedraw for some reason
|
||||
|
||||
def OnSize(self, event):
|
||||
size = self.GetClientSize()
|
||||
if self.GetContext() != 'NULL':
|
||||
self.SetCurrent()
|
||||
glViewport(0, 0, size.width, size.height)
|
||||
|
||||
def changelineweight(self,step):
|
||||
self.lineweight += step
|
||||
if self.lineweight <= 0:
|
||||
self.lineweight = 0.1
|
||||
self.paintit()
|
||||
|
||||
def save_colorpixelmap(self):
|
||||
string = glReadPixels(0,0,self.GetSize().x,self.GetSize().y,GL_RGB,GL_UNSIGNED_BYTE)
|
||||
size = list(self.GetClientSizeTuple())
|
||||
a = fromstring(string,Int8) # convert pixels to array
|
||||
print a.shape, size
|
||||
size[0],size[1] = size[1],size[0] # swap x,y dimensions for proper unraveling
|
||||
r = a[0::3]+0
|
||||
g = a[1::3]+0
|
||||
b = a[2::3]+0
|
||||
r.shape = size
|
||||
g.shape = size
|
||||
b.shape = size
|
||||
carray = array([r[::-1,:],g[::-1,:],b[::-1,:]]) # up-down flip the image
|
||||
print carray.shape, type(carray), carray.typecode(), min(ravel(carray)), max(ravel(carray))
|
||||
im.ashow(carray)
|
||||
|
||||
def save_graypixelmap(self):
|
||||
string = glReadPixels(0,0,self.GetSize().x,self.GetSize().y,GL_LUMINANCE,GL_FLOAT)
|
||||
size = list(self.GetClientSizeTuple())
|
||||
a = fromstring(string,Float32) # convert pixels to array
|
||||
print a.shape, size
|
||||
size[0],size[1] = size[1],size[0] # swap x,y dimensions for proper unraveling
|
||||
carray = reshape(a,size)*255 # must be a luminance map
|
||||
print carray.shape, type(carray), carray.typecode(), min(ravel(carray)), max(ravel(carray))
|
||||
im.ashow(carray[::-1,:])
|
||||
|
||||
def setbackground(self,color):
|
||||
if self.settingbackground:
|
||||
return
|
||||
if len(color) == 3:
|
||||
color = list(color) + [0.]
|
||||
apply(glClearColor,color)
|
||||
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT)
|
||||
self.bkgdcolor = color
|
||||
self.settingbackground = 1
|
||||
self.paintit()
|
||||
self.settingbackground = 0
|
||||
|
||||
def set_xs(self, xs=None):
|
||||
if self.ys is None:
|
||||
xs = None
|
||||
return
|
||||
elif xs is None:
|
||||
xs = arange(self.ys.shape[0])
|
||||
self.xs = xs
|
||||
self.x_offset = -xs[0]
|
||||
self.x_scale = 1.0/(max(xs)-min(xs))
|
||||
self.dataxmin = min(ravel(self.xs))
|
||||
self.dataxmax = max(ravel(self.xs))
|
||||
self.plotxmin = self.dataxmin
|
||||
self.plotxmax = self.dataxmax
|
||||
|
||||
def transform(self, ys):
|
||||
# should convert to a rank-2 array
|
||||
return add.reduce(ys)
|
||||
|
||||
def set_ys(self, ys):
|
||||
if ys is None:
|
||||
self.ys = None
|
||||
return
|
||||
while len(ys.shape) > 2:
|
||||
ys = self.transform(ys)
|
||||
self.ys = ys
|
||||
self.y_offset = -ys[0]
|
||||
try:
|
||||
self.y_scale = 1.0/(max(ys)-min(ys))
|
||||
except ZeroDivisionError:
|
||||
self.y_scale = 1.0
|
||||
self.dataymin = min(ravel(self.ys))
|
||||
self.dataymax = max(ravel(self.ys))
|
||||
self.plotymin = self.dataymin
|
||||
self.plotymax = self.dataymax
|
||||
|
||||
def set_errors(self, errors):
|
||||
if errors is None:
|
||||
self.errors = None
|
||||
return
|
||||
while len(errors.shape) > 2:
|
||||
errors = self.transform(errors)
|
||||
self.errors = errors
|
||||
self.dataymin = min(ravel(self.ys-abs(self.errors)))
|
||||
self.dataymax = max(ravel(self.ys+abs(self.errors)))
|
||||
self.plotymin = self.dataymin
|
||||
self.plotymax = self.dataymax
|
||||
|
||||
def paintit(self):#, event):
|
||||
### PREPARE FOR DRAWING AND CLEAR WINDOW
|
||||
self.setbackground(self.bkgdcolor)
|
||||
if self.ys is None:
|
||||
return
|
||||
|
||||
### SET UP FOR REDRAWING
|
||||
if not self.xs:
|
||||
self.set_xs()
|
||||
size = self.GetClientSize()
|
||||
w,h = size.x, size.y
|
||||
WZ = float(w) / len(self.xs)
|
||||
HZ = float(h) / len(self.ys)
|
||||
glLoadIdentity()
|
||||
glEnable(GL_LINE_SMOOTH)
|
||||
glEnable(GL_BLEND)
|
||||
glHint(GL_LINE_SMOOTH_HINT, GL_NICEST)
|
||||
glHint(GL_POINT_SMOOTH_HINT, GL_NICEST)
|
||||
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA)
|
||||
|
||||
# IMPORTANT COORDINATE TRANSFORMATIONS
|
||||
self.xscale = 0.84
|
||||
self.yscale = 0.8
|
||||
glScale(self.xscale, self.yscale, 1.0) # scale everything hereafter in this matrix
|
||||
glOrtho(self.plotxmin, self.plotxmax,
|
||||
self.plotymin, self.plotymax,
|
||||
0, 1)
|
||||
|
||||
# Make sure both are 2D, so plot code can be general for multi and single lines
|
||||
if len(self.ys.shape) == 1:
|
||||
self.ys = self.ys[:,NewAxis]
|
||||
if self.errors:
|
||||
self.errors.shape = (len(self.errors),1)
|
||||
|
||||
### PLOT ERRORBARS (SAME COLOR AS ASSOCIATED TIMESERIES)
|
||||
if hasattr(self, 'errors') and self.errors:
|
||||
# loop through all timeseries'
|
||||
for i in range(self.errors.shape[1]):
|
||||
if self.errors.shape[1] > 1:
|
||||
colortrio = glplotcolors[i%len(glplotcolors)]
|
||||
apply(glColor3f,colortrio)
|
||||
else:
|
||||
glColor3f(1.,1.,0.)
|
||||
glLineWidth(1.0)
|
||||
lower = self.ys[:,i] - self.errors[:,i]
|
||||
upper = self.ys[:,i] + self.errors[:,i]
|
||||
glBegin(GL_LINES)
|
||||
for x,yl, yu in transpose(array([self.xs, lower, upper])):
|
||||
if x>=self.plotxmin and x<=self.plotxmax:
|
||||
glVertex2f(x,yl)
|
||||
glVertex2f(x,yu)
|
||||
glEnd()
|
||||
|
||||
|
||||
### PLOT TIMESERIES (after/ON-TOP-OF ERRORBARS)
|
||||
# loop through all timeseries'
|
||||
for i in range(self.ys.shape[1]):
|
||||
glLineWidth(self.lineweight)
|
||||
if self.ys.shape[1] > 1:
|
||||
colortrio = glplotcolors[i%len(glplotcolors)]
|
||||
apply(glColor3f,colortrio)
|
||||
else:
|
||||
glColor3f(1.,1.,1.)
|
||||
d = array((self.xs+0.0, self.ys[:,i]))
|
||||
t = transpose(d)
|
||||
glBegin(GL_LINE_STRIP)
|
||||
for vert in t:
|
||||
if vert[0]>=self.plotxmin and vert[0]<=self.plotxmax:
|
||||
glVertex(vert[0],vert[1])
|
||||
glEnd()
|
||||
|
||||
### PLOT X/Y-AXIS LINES (white)
|
||||
glColor3f(1.,1.,1.)
|
||||
glLineWidth(1.5)
|
||||
glBegin(GL_LINES)
|
||||
glVertex2i(self.plotxmin, 0)
|
||||
glVertex2i(self.plotxmax, 0)
|
||||
glVertex2i(0, self.plotymin)
|
||||
glVertex2i(0, self.plotymax)
|
||||
glEnd()
|
||||
|
||||
###
|
||||
### TEXT PLOTTING CODE ... USED TO USE PyGLTT; NOW USES GLUT (until GLTT/FTGL works again)
|
||||
###
|
||||
self.textcolor = (1,1,1)
|
||||
|
||||
# Pick round numbers to be displayed
|
||||
xrange_sigfig = log10(self.plotxmax-self.plotxmin)
|
||||
yrange_sigfig = log10(self.plotymax-self.plotymin)
|
||||
# print self.plotymax, self.plotymin, yrange_sigfig
|
||||
if xrange_sigfig<=1:
|
||||
xrounddigits = int(xrange_sigfig)+3
|
||||
else:
|
||||
xrounddigits = 0
|
||||
if yrange_sigfig<=1:
|
||||
yrounddigits = int(yrange_sigfig)+3
|
||||
else:
|
||||
yrounddigits = 0
|
||||
# print self.plotymax, self.plotymin, yrange_sigfig
|
||||
|
||||
# And properly format the numeric text strings to be dispalyed
|
||||
if xrounddigits:
|
||||
xminstr = str(round(self.plotxmin,xrounddigits))
|
||||
xmaxstr = str(round(self.plotxmax,xrounddigits))
|
||||
else:
|
||||
xminstr = str(int(round(self.plotxmin,xrounddigits)))
|
||||
xmaxstr = str(int(round(self.plotxmax,xrounddigits)))
|
||||
if yrounddigits:
|
||||
yminstr = str(round(self.plotymin,yrounddigits))
|
||||
ymaxstr = str(round(self.plotymax,yrounddigits))
|
||||
else:
|
||||
yminstr = str(int(round(self.plotymin,yrounddigits)))
|
||||
ymaxstr = str(int(round(self.plotymax,yrounddigits)))
|
||||
|
||||
# Figure out where to place the numerical labels
|
||||
# NOTE: Though we are using an Identity matrix, bitmap font locations apparently
|
||||
# want to be localized in pixel-coordinates (hence all the GetSize() calls)
|
||||
glPushMatrix()
|
||||
glLoadIdentity()
|
||||
xaxis_yoffset = -0.93*self.GetSize().y
|
||||
yaxis_xoffset = -0.94*self.GetSize().x
|
||||
xaxis_xmin = (-self.xscale-0.01)*self.GetSize().x
|
||||
xaxis_xmax = (self.xscale-0.01)*self.GetSize().x
|
||||
yaxis_ymin = -0.86*self.GetSize().y
|
||||
yaxis_ymax = 0.78*self.GetSize().y
|
||||
|
||||
# print
|
||||
# print self.GetSize(), self.GetClientSize()
|
||||
# print "X-axis min: ",xaxis_xmin, xaxis_yoffset, ' / ', xminstr
|
||||
# print "X-axis max: ",xaxis_xmax, xaxis_yoffset, ' / ', xmaxstr
|
||||
# print "Y-axis min: ",yaxis_xoffset, yaxis_ymin, ' / ', yminstr
|
||||
# print "Y-axis max: ",yaxis_xoffset, yaxis_ymax, ' / ', ymaxstr
|
||||
|
||||
### y-axis maximum
|
||||
self.draw_text(self,
|
||||
yaxis_xoffset, #self.GetSize().x*xoffset,
|
||||
yaxis_ymax, #self.GetSize().y*ymaxoffset,
|
||||
ymaxstr,None,None)
|
||||
### y-axis minimum
|
||||
self.draw_text(self,
|
||||
yaxis_xoffset, #self.GetSize().x*xoffset,
|
||||
yaxis_ymin, #self.GetSize().y*yminoffset,
|
||||
yminstr,None,None)
|
||||
# GLTTwxFont.ALIGN_RIGHT, GLTTwxFont.VALIGN_BOTTOM)
|
||||
### x-axis maximum
|
||||
self.draw_text(self,
|
||||
xaxis_xmax, #self.GetSize().x*xoffset,
|
||||
xaxis_yoffset, #self.GetSize().y*ymaxoffset,
|
||||
xmaxstr,None,None)
|
||||
### x-axis minimum
|
||||
self.draw_text(self,
|
||||
xaxis_xmin, #self.GetSize().x*xoffset,
|
||||
xaxis_yoffset, #self.GetSize().y*yminoffset,
|
||||
xminstr,None,None)
|
||||
# GLTTwxFont.ALIGN_RIGHT, GLTTwxFont.VALIGN_BOTTOM)
|
||||
### arrow value
|
||||
# self.draw_text(self,
|
||||
# xarrowoffset,
|
||||
# self.GetSize().y*ymaxoffset,
|
||||
# ' '+str(round(self.ys[self.arrow],1)), 0, 0) #,
|
||||
# GLTTwxFont.ALIGN_LEFT, GLTTwxFont.VALIGN_BOTTOM)
|
||||
### arrow timepoint
|
||||
# self.draw_text(self,
|
||||
# xarrowoffset,
|
||||
# self.GetSize().y*yminoffset,
|
||||
# ' '+str(self.arrow),None,None)
|
||||
# GLTTwxFont.ALIGN_LEFT, GLTTwxFont.VALIGN_BOTTOM)
|
||||
|
||||
# Finally, draw a bounding-box (bottom/top left/right)
|
||||
# NOTE: No need to use GetSize() here; we have an Identity matrix and are
|
||||
# drawing normal (non-bitmap-text) stuff
|
||||
BL = [-self.xscale,-self.yscale]
|
||||
TL = [-self.xscale, self.yscale]
|
||||
TR = [self.xscale, self.yscale]
|
||||
BR = [self.xscale, -self.yscale]
|
||||
|
||||
#print BL, TL, TR, BR
|
||||
glPointSize(1.0)
|
||||
glColor3f(0.3,0.3,0.3)
|
||||
glBegin(GL_LINE_STRIP)
|
||||
glVertex2f(BL[0],BL[1])
|
||||
glVertex2f(TL[0],TL[1])
|
||||
glVertex2f(TR[0],TR[1])
|
||||
glVertex2f(BR[0],BR[1])
|
||||
glVertex2f(BL[0],BL[1])
|
||||
glEnd()
|
||||
|
||||
glPopMatrix()
|
||||
|
||||
### LAST, BUT NOT LEAST, DRAW SELECTION-BOX ... (RED)
|
||||
if self.box and len(self.box)==4:
|
||||
glPointSize(2.0)
|
||||
glColor3f(1.,0.,0.)
|
||||
glBegin(GL_LINE_STRIP)
|
||||
glVertex2f(self.box[0], self.box[1])
|
||||
glVertex2f(self.box[2], self.box[1])
|
||||
glVertex2f(self.box[2], self.box[3])
|
||||
glVertex2f(self.box[0], self.box[3])
|
||||
glVertex2f(self.box[0], self.box[1])
|
||||
glEnd()
|
||||
|
||||
# FINALLY, CLIP VIEW TO SPECIFIED SUB-PORTION OF WINDOW
|
||||
# glEnable(GL_CLIP_PLANE1)
|
||||
# glEnable(GL_CLIP_PLANE2)
|
||||
# glEnable(GL_CLIP_PLANE3)
|
||||
# glEnable(GL_CLIP_PLANE4)
|
||||
# glClipPlane(GL_CLIP_PLANE1, [0., 1., 0., -self.plotymin]) # clips off the bottom
|
||||
# glClipPlane(GL_CLIP_PLANE2, [0., -1., 0., self.plotymax]) # clips off the top
|
||||
# glClipPlane(GL_CLIP_PLANE3, [1., 0., 0., -self.plotxmin]) # clips off the left
|
||||
# glClipPlane(GL_CLIP_PLANE4, [-1., 0., 0., self.plotxmax]) # clips off the right
|
||||
|
||||
self.SwapBuffers() # NECESSARY, or screen doesn't redraw
|
||||
|
||||
def draw_text(self, canvas, x,y,text,align,valign):
|
||||
apply(glColor3f, self.textcolor)
|
||||
size = self.GetClientSize()
|
||||
w,h = float(size.x), float(size.y)
|
||||
glRasterPos2f(x/w,y/h)
|
||||
for char in text:
|
||||
# print x,y,self.font,char
|
||||
GLUT.glutBitmapCharacter(self.font,ord(char)) #text[0]) #self.font,text)
|
||||
# self.font.write_string(canvas, x, y, text, align, valign)
|
||||
|
||||
def getpropX(self, x):
|
||||
w = self.GetClientSize().x
|
||||
p = (x - w*.1) / (w*.8)
|
||||
return p
|
||||
|
||||
def TimeToQuit(self, event):
|
||||
### REMAKE LINEPLOT WHEN SELF.BOX IS RE-CREATED
|
||||
self.Close(true)
|
||||
|
||||
|
||||
def glplot(yvals=None,xvals=None,errors=None):
|
||||
"""
|
||||
Create a plot using a wxGLCanvas.
|
||||
|
||||
Usage: glplot(
|
||||
x=None, x-axis data
|
||||
y=None, y-axis data, skip x and use y=[data] for x=range(len(y))
|
||||
errors=None, y-axis errorbar data
|
||||
"""
|
||||
if not xvals and not yvals:
|
||||
return
|
||||
if not xvals:
|
||||
xvals = N.arange(yvals.shape[0])
|
||||
|
||||
class MyApp(wxApp):
|
||||
def OnInit(self): #,x=None,y=None,errors=None):
|
||||
windowXpixels = 8 # 8 pixels of frame OUTSIDE the canvas
|
||||
windowYpixels = 27 # 27 pixels of frame plus title-bar OUTSIDE the canvas
|
||||
self.frame = wxFrame(NULL, -1, "wxPython Context",
|
||||
wxPoint(0,0),
|
||||
wxSize(1200+windowXpixels,400+windowYpixels))
|
||||
|
||||
self.mainmenu = wxMenuBar()
|
||||
filemenu = wxMenu()
|
||||
cimgID = wxNewId()
|
||||
gimgID = wxNewId()
|
||||
exitID = wxNewId()
|
||||
filemenu.Append(cimgID, 'Save C&olor\tAlt-C', 'Save color pixelmap using IC.exe')
|
||||
filemenu.Append(gimgID, 'Save G&ray\tAlt-G', 'Save gray pixelmap using IC.exe')
|
||||
filemenu.Append(exitID, 'E&xit\tAlt-X', 'Quit')
|
||||
EVT_MENU(self, cimgID, self.OnCImgSave)
|
||||
EVT_MENU(self, gimgID, self.OnGImgSave)
|
||||
EVT_MENU(self, exitID, self.OnFileExit)
|
||||
self.mainmenu.Append(filemenu, '&File')
|
||||
propmenu = wxMenu()
|
||||
fontID = wxNewId()
|
||||
lineweightupID = wxNewId()
|
||||
lineweightdnID = wxNewId()
|
||||
bkgdID = wxNewId()
|
||||
propmenu.Append(fontID, 'F&onts\tAlt-F', 'Change font for all text items')
|
||||
propmenu.Append(lineweightupID, 'I&ncrease lineweight\tAlt-I', 'Increase plotting line weight')
|
||||
propmenu.Append(lineweightdnID, 'D&ecrease lineweight\tAlt-D', 'Decrease plotting line weight')
|
||||
propmenu.Append(bkgdID, 'B&ackground color\tAlt-B', 'Change plot background color')
|
||||
EVT_MENU(self, fontID, self.OnFont)
|
||||
EVT_MENU(self, lineweightupID, self.OnLineweightup)
|
||||
EVT_MENU(self, lineweightdnID, self.OnLineweightdn)
|
||||
EVT_MENU(self, bkgdID, self.OnBkgd)
|
||||
self.mainmenu.Append(propmenu, '&Edit')
|
||||
self.frame.SetMenuBar(self.mainmenu)
|
||||
|
||||
# Now, create the line-plot part
|
||||
self.win = OpenglMultiLinePlot(self.frame,autospin_allowed=0)
|
||||
self.frame.Show(TRUE)
|
||||
self.SetTopWindow(self.frame)
|
||||
return TRUE
|
||||
def OnCImgSave(self,event):
|
||||
self.win.save_colorpixelmap()
|
||||
def OnGImgSave(self,event):
|
||||
self.win.save_graypixelmap()
|
||||
def OnFileExit(self,event):
|
||||
sys.exit()
|
||||
def OnFont(self,event):
|
||||
data = wxFontData()
|
||||
dlg = wxFontDialog(self.frame, data)
|
||||
if dlg.ShowModal() == wxID_OK:
|
||||
data = dlg.GetFontData()
|
||||
font = data.GetChosenFont()
|
||||
print 'You selected: ',font.GetFaceName(),', ',str(font.GetPointSize()),', color ',data.GetColour().Get()
|
||||
self.win.fontname = font.GetFaceName()
|
||||
self.win.fontstype = font.GetStyle()
|
||||
self.win.fontsize = font.GetPointSize()
|
||||
self.win.fontcolor = data.GetColour().Get()
|
||||
dlg.Destroy()
|
||||
def OnLineweightup(self,event):
|
||||
self.win.changelineweight(+1)
|
||||
def OnLineweightdn(self,event):
|
||||
self.win.changelineweight(-1)
|
||||
def OnBkgd(self,event):
|
||||
data = wxColourData()
|
||||
dlg = wxColourDialog(self.frame, data)
|
||||
if dlg.ShowModal() == wxID_OK:
|
||||
data = dlg.GetColourData()
|
||||
wxcolor = data.GetColour()
|
||||
dlg.Destroy()
|
||||
color = N.array([wxcolor.Red(), wxcolor.Green(), wxcolor.Blue()])
|
||||
newcolor = color / 255.
|
||||
self.win.setbackground(newcolor)
|
||||
|
||||
app = MyApp(0)
|
||||
app.win.set_xs(xvals)
|
||||
app.win.set_ys(yvals)
|
||||
app.win.set_errors(errors)
|
||||
app.MainLoop()
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,199 @@
|
|||
|
||||
import stats, os, pstat
|
||||
reload(stats)
|
||||
|
||||
try:
|
||||
import numpy as N
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
l = range(1,21)
|
||||
lf = range(1,21)
|
||||
lf[2] = 3.0
|
||||
a = N.array(l)
|
||||
af = N.array(lf)
|
||||
ll = [l]*5
|
||||
aa = N.array(ll)
|
||||
|
||||
print '\nCENTRAL TENDENCY'
|
||||
print 'geometricmean:',stats.geometricmean(l), stats.geometricmean(lf), stats.geometricmean(a), stats.geometricmean(af)
|
||||
print 'harmonicmean:',stats.harmonicmean(l), stats.harmonicmean(lf), stats.harmonicmean(a), stats.harmonicmean(af)
|
||||
print 'mean:',stats.mean(l), stats.mean(lf), stats.mean(a), stats.mean(af)
|
||||
print 'median:',stats.median(l),stats.median(lf),stats.median(a),stats.median(af)
|
||||
print 'medianscore:',stats.medianscore(l),stats.medianscore(lf),stats.medianscore(a),stats.medianscore(af)
|
||||
print 'mode:',stats.mode(l),stats.mode(a)
|
||||
|
||||
print '\nMOMENTS'
|
||||
print 'moment:',stats.moment(l),stats.moment(lf),stats.moment(a),stats.moment(af)
|
||||
print 'variation:',stats.variation(l),stats.variation(a),stats.variation(lf),stats.variation(af)
|
||||
print 'skew:',stats.skew(l),stats.skew(lf),stats.skew(a),stats.skew(af)
|
||||
print 'kurtosis:',stats.kurtosis(l),stats.kurtosis(lf),stats.kurtosis(a),stats.kurtosis(af)
|
||||
print 'tmean:',stats.tmean(a,(5,17)),stats.tmean(af,(5,17))
|
||||
print 'tvar:',stats.tvar(a,(5,17)),stats.tvar(af,(5,17))
|
||||
print 'tstdev:',stats.tstdev(a,(5,17)),stats.tstdev(af,(5,17))
|
||||
print 'tsem:',stats.tsem(a,(5,17)),stats.tsem(af,(5,17))
|
||||
print 'describe:'
|
||||
print stats.describe(l)
|
||||
print stats.describe(lf)
|
||||
print stats.describe(a)
|
||||
print stats.describe(af)
|
||||
|
||||
print '\nFREQUENCY'
|
||||
print 'freqtable:'
|
||||
print 'itemfreq:'
|
||||
print stats.itemfreq(l)
|
||||
print stats.itemfreq(a)
|
||||
print 'scoreatpercentile:',stats.scoreatpercentile(l,40),stats.scoreatpercentile(lf,40),stats.scoreatpercentile(a,40),stats.scoreatpercentile(af,40)
|
||||
print 'percentileofscore:',stats.percentileofscore(l,12),stats.percentileofscore(lf,12),stats.percentileofscore(a,12),stats.percentileofscore(af,12)
|
||||
print 'histogram:',stats.histogram(l),stats.histogram(a)
|
||||
print 'cumfreq:'
|
||||
print stats.cumfreq(l)
|
||||
print stats.cumfreq(lf)
|
||||
print stats.cumfreq(a)
|
||||
print stats.cumfreq(af)
|
||||
print 'relfreq:'
|
||||
print stats.relfreq(l)
|
||||
print stats.relfreq(lf)
|
||||
print stats.relfreq(a)
|
||||
print stats.relfreq(af)
|
||||
|
||||
print '\nVARIATION'
|
||||
print 'obrientransform:'
|
||||
|
||||
l = range(1,21)
|
||||
a = N.array(l)
|
||||
ll = [l]*5
|
||||
aa = N.array(ll)
|
||||
|
||||
print stats.obrientransform(l,l,l,l,l)
|
||||
print stats.obrientransform(a,a,a,a,a)
|
||||
|
||||
print 'samplevar:',stats.samplevar(l),stats.samplevar(a)
|
||||
print 'samplestdev:',stats.samplestdev(l),stats.samplestdev(a)
|
||||
print 'var:',stats.var(l),stats.var(a)
|
||||
print 'stdev:',stats.stdev(l),stats.stdev(a)
|
||||
print 'sterr:',stats.sterr(l),stats.sterr(a)
|
||||
print 'sem:',stats.sem(l),stats.sem(a)
|
||||
print 'z:',stats.z(l,4),stats.z(a,4)
|
||||
print 'zs:'
|
||||
print stats.zs(l)
|
||||
print stats.zs(a)
|
||||
|
||||
print '\nTRIMMING'
|
||||
print 'trimboth:'
|
||||
print stats.trimboth(l,.2)
|
||||
print stats.trimboth(lf,.2)
|
||||
print stats.trimboth(a,.2)
|
||||
print stats.trimboth(af,.2)
|
||||
print 'trim1:'
|
||||
print stats.trim1(l,.2)
|
||||
print stats.trim1(lf,.2)
|
||||
print stats.trim1(a,.2)
|
||||
print stats.trim1(af,.2)
|
||||
|
||||
print '\nCORRELATION'
|
||||
#execfile('testpairedstats.py')
|
||||
|
||||
l = range(1,21)
|
||||
a = N.array(l)
|
||||
ll = [l]*5
|
||||
aa = N.array(ll)
|
||||
|
||||
m = range(4,24)
|
||||
m[10] = 34
|
||||
b = N.array(m)
|
||||
|
||||
pb = [0]*9 + [1]*11
|
||||
apb = N.array(pb)
|
||||
|
||||
print 'paired:'
|
||||
#stats.paired(l,m)
|
||||
#stats.paired(a,b)
|
||||
|
||||
print
|
||||
print
|
||||
print 'pearsonr:'
|
||||
print stats.pearsonr(l,m)
|
||||
print stats.pearsonr(a,b)
|
||||
print 'spearmanr:'
|
||||
print stats.spearmanr(l,m)
|
||||
print stats.spearmanr(a,b)
|
||||
print 'pointbiserialr:'
|
||||
print stats.pointbiserialr(pb,l)
|
||||
print stats.pointbiserialr(apb,a)
|
||||
print 'kendalltau:'
|
||||
print stats.kendalltau(l,m)
|
||||
print stats.kendalltau(a,b)
|
||||
print 'linregress:'
|
||||
print stats.linregress(l,m)
|
||||
print stats.linregress(a,b)
|
||||
|
||||
print '\nINFERENTIAL'
|
||||
print 'ttest_1samp:'
|
||||
print stats.ttest_1samp(l,12)
|
||||
print stats.ttest_1samp(a,12)
|
||||
print 'ttest_ind:'
|
||||
print stats.ttest_ind(l,m)
|
||||
print stats.ttest_ind(a,b)
|
||||
print 'ttest_rel:'
|
||||
print stats.ttest_rel(l,m)
|
||||
print stats.ttest_rel(a,b)
|
||||
print 'chisquare:'
|
||||
print stats.chisquare(l)
|
||||
print stats.chisquare(a)
|
||||
print 'ks_2samp:'
|
||||
print stats.ks_2samp(l,m)
|
||||
print stats.ks_2samp(a,b)
|
||||
|
||||
print 'mannwhitneyu:'
|
||||
print stats.mannwhitneyu(l,m)
|
||||
print stats.mannwhitneyu(a,b)
|
||||
print 'ranksums:'
|
||||
print stats.ranksums(l,m)
|
||||
print stats.ranksums(a,b)
|
||||
print 'wilcoxont:'
|
||||
print stats.wilcoxont(l,m)
|
||||
print stats.wilcoxont(a,b)
|
||||
print 'kruskalwallish:'
|
||||
print stats.kruskalwallish(l,m,l)
|
||||
print len(l), len(m)
|
||||
print stats.kruskalwallish(a,b,a)
|
||||
print 'friedmanchisquare:'
|
||||
print stats.friedmanchisquare(l,m,l)
|
||||
print stats.friedmanchisquare(a,b,a)
|
||||
|
||||
l = range(1,21)
|
||||
a = N.array(l)
|
||||
ll = [l]*5
|
||||
aa = N.array(ll)
|
||||
|
||||
m = range(4,24)
|
||||
m[10] = 34
|
||||
b = N.array(m)
|
||||
|
||||
print '\n\nF_oneway:'
|
||||
print stats.F_oneway(l,m)
|
||||
print stats.F_oneway(a,b)
|
||||
#print 'F_value:',stats.F_value(l),stats.F_value(a)
|
||||
|
||||
print '\nSUPPORT'
|
||||
print 'sum:',stats.sum(l),stats.sum(lf),stats.sum(a),stats.sum(af)
|
||||
print 'cumsum:'
|
||||
print stats.cumsum(l)
|
||||
print stats.cumsum(lf)
|
||||
print stats.cumsum(a)
|
||||
print stats.cumsum(af)
|
||||
print 'ss:',stats.ss(l),stats.ss(lf),stats.ss(a),stats.ss(af)
|
||||
print 'summult:',stats.summult(l,m),stats.summult(lf,m),stats.summult(a,b),stats.summult(af,b)
|
||||
print 'sumsquared:',stats.square_of_sums(l),stats.square_of_sums(lf),stats.square_of_sums(a),stats.square_of_sums(af)
|
||||
print 'sumdiffsquared:',stats.sumdiffsquared(l,m),stats.sumdiffsquared(lf,m),stats.sumdiffsquared(a,b),stats.sumdiffsquared(af,b)
|
||||
print 'shellsort:'
|
||||
print stats.shellsort(m)
|
||||
print stats.shellsort(b)
|
||||
print 'rankdata:'
|
||||
print stats.rankdata(m)
|
||||
print stats.rankdata(b)
|
||||
|
||||
print '\nANOVAs'
|
||||
execfile('testanova.py')
|
||||
|
Загрузка…
Ссылка в новой задаче