fixes to pass some of etl_one, PY3 upgrade

This commit is contained in:
Kyle Lahnakoski 2018-03-29 15:34:22 -04:00
Родитель f3dc4a73fc
Коммит 32fd67988b
15 изменённых файлов: 46 добавлений и 43 удалений

Просмотреть файл

@ -118,9 +118,9 @@ class AliasAnalyzer(object):
self.bugs[d.bug_id] = agg
def analysis(self, last_run, please_stop):
DIFF = 7
minimum_diff = 7
if last_run:
DIFF = 4 #ONCE WE HAVE ALL THE DATA IN WE CAN BE LESS DISCRIMINATING
minimum_diff = 4 #ONCE WE HAVE ALL THE DATA IN WE CAN BE LESS DISCRIMINATING
try_again = True
while try_again and not please_stop:
@ -135,7 +135,7 @@ class AliasAnalyzer(object):
problems = jx.sort([
{"email": e, "count": c}
for e, c in iteritems(problem_agg.dic)
if not self.not_aliases.get(e, None) and (c <= -(DIFF / 2) or last_run)
if not self.not_aliases.get(e, None) and (c <= -(minimum_diff / 2) or last_run)
], ["count", "email"])
try_again = False
@ -153,7 +153,7 @@ class AliasAnalyzer(object):
if last_run and len(solutions) == 2 and solutions[0].count == -solutions[1].count:
#exact match
pass
elif len(solutions) <= 1 or (solutions[1].count + DIFF >= solutions[0].count):
elif len(solutions) <= 1 or (solutions[1].count + minimum_diff >= solutions[0].count):
#not distinctive enough
continue
@ -168,7 +168,7 @@ class AliasAnalyzer(object):
try_again = True
self.add_alias(problem.email, best_solution.email)
self.saveAliases()
self.save_aliases()
def alias(self, email):
canonical = self.aliases.get(email, None)
@ -231,7 +231,7 @@ class AliasAnalyzer(object):
for k, found in reassign:
self.aliases[k] = {"canonical":found, "dirty":True}
def saveAliases(self):
def save_aliases(self):
records = []
for k, v in self.aliases.items():
if v["dirty"]:

Просмотреть файл

@ -43,6 +43,7 @@ from __future__ import unicode_literals
import math
import re
import jx_elasticsearch
from mo_future import text_type
from bzETL.transform_bugzilla import normalize, NUMERIC_FIELDS, MULTI_FIELDS, DIFF_FIELDS
@ -90,7 +91,7 @@ class BugHistoryParser(object):
self.alias_config=alias_config
self.aliases = Null
self.initializeAliases()
self.initialize_aliases()
def processRow(self, row_in):
@ -1002,22 +1003,23 @@ class BugHistoryParser(object):
def alias(self, name):
if name == None:
return Null
return coalesce(self.aliases.get(name, Null).canonical, name)
return coalesce(self.aliases.get(name, Null), name)
def initializeAliases(self):
def initialize_aliases(self):
try:
try:
if self.alias_config.elasticsearch:
esq = jx_elasticsearch.new_instance(self.alias_config.elasticsearch)
result = esq.query({"select": ["alias", "canonical"], "where": {"missing": "ignore"}, "limit": 10000, "format":"list"})
self.aliases = {d.alias:d.canonical for d in result.data}
else:
alias_json = File(self.alias_config.file).read()
except Exception as e:
Log.warning("Could not load alias file", cause=e)
alias_json = "{}"
self.aliases = {k: wrap(v) for k, v in json2value(alias_json).items()}
Log.note("{{num}} aliases loaded", num=len(self.aliases.keys()))
self.aliases = {k: wrap(v) for k, v in json2value(alias_json).items()}
except Exception as e:
Log.error("Can not init aliases", e)
Log.warning("Could not load alias file", cause=e)
self.aliases = {}
Log.note("{{num}} aliases loaded", num=len(self.aliases.keys()))
def deformat(value):
if value == None:

Просмотреть файл

@ -72,7 +72,7 @@ def normalize(bug, old_school=False):
if k.startswith("attachments") and (k.endswith("isobsolete") or k.endswith("ispatch") or k.endswith("isprivate")):
new_v=convert.value2int(v)
new_k=k[12:]
a[k.replace(".", "\.")]=new_v
a[k.replace(".", "\\.")]=new_v
if not old_school:
a[new_k]=new_v
a.flags = jx.sort(a.flags, ["modified_ts", "value"])

Просмотреть файл

@ -15,8 +15,8 @@
// 6810,
// 9622,
// 10575,
11040
// 12911,
// 11040 //LOTS OF HISTORY
12911,
// 67742,
// 96421,
// 123203,
@ -54,6 +54,10 @@
"file": {
"path": "resources/schema/bugzilla_aliases.json",
"$ref": "file://~/private.json#alias_file"
},
"elasticsearch": {
"host": "http://localhost",
"index": "bug_aliases"
}
},
"reference": {

Просмотреть файл

@ -616,12 +616,11 @@ def compare_both(candidate, reference, settings, some_bugs):
max_time = convert.milli2datetime(versions.last().modified_ts)
pre_ref_versions = get_all_bug_versions(reference, bug_id, max_time)
ref_versions = \
jx.sort(
#ADDED TO FIX OLD PRODUCTION BUG VERSIONS
[compare_es.old2new(x, settings.bugzilla.expires_on) for x in pre_ref_versions],
"modified_ts"
)
ref_versions = jx.sort(
# ADDED TO FIX OLD PRODUCTION BUG VERSIONS
[compare_es.old2new(x, settings.bugzilla.expires_on) for x in pre_ref_versions],
"modified_ts"
)
can = value2json(versions, pretty=True)
ref = value2json(ref_versions, pretty=True)

Просмотреть файл

@ -40,7 +40,6 @@ def get_all_bug_versions(es, bug_id, max_time=None):
raise Log.error("unknown container")
response = esq.query({
"from": es.settings.alias,
"where": {"and": [
{"eq": {"bug_id": bug_id}},
{"lte": {"modified_ts": convert.datetime2milli(max_time)}}

4
vendor/jx_elasticsearch/es14/aggs.py поставляемый
Просмотреть файл

@ -161,7 +161,7 @@ def es_aggsop(es, frum, query):
es_query.aggs[key].percentiles.field = es_cols[0].es_column
es_query.aggs[key].percentiles.percents += [50]
s.pull = jx_expression_to_function(key + ".values.50\.0")
s.pull = jx_expression_to_function(key + ".values.50\\.0")
elif s.aggregate == "percentile":
if len(es_cols) > 1:
Log.error("Do not know how to count columns with more than one type (script probably)")
@ -253,7 +253,7 @@ def es_aggsop(es, frum, query):
es_query.aggs[key].percentiles.script = s.value.to_ruby(schema).script(schema)
es_query.aggs[key].percentiles.percents += [50]
s.pull = jx_expression_to_function(key + ".values.50\.0")
s.pull = jx_expression_to_function(key + ".values.50\\.0")
elif s.aggregate == "percentile":
# ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT
key = literal_field(canonical_name + " percentile")

2
vendor/jx_elasticsearch/es14/setop.py поставляемый
Просмотреть файл

@ -374,6 +374,6 @@ def get_pull_stats(stats_name, median_name):
{"name": "sos", "value": stats_name + ".sum_of_squares"},
{"name": "std", "value": stats_name + ".std_deviation"},
{"name": "var", "value": stats_name + ".variance"},
{"name": "median", "value": median_name + ".values.50\.0"}
{"name": "median", "value": median_name + ".values.50\\.0"}
]})

4
vendor/jx_elasticsearch/es52/aggs.py поставляемый
Просмотреть файл

@ -211,7 +211,7 @@ def es_aggsop(es, frum, query):
es_query.aggs[key].percentiles.field = es_cols[0].es_column
es_query.aggs[key].percentiles.percents += [50]
s.pull = jx_expression_to_function(key + ".values.50\.0")
s.pull = jx_expression_to_function(key + ".values.50\\.0")
elif s.aggregate == "percentile":
if len(es_cols) > 1:
Log.error("Do not know how to count columns with more than one type (script probably)")
@ -325,7 +325,7 @@ def es_aggsop(es, frum, query):
es_query.aggs[key].percentiles.script = s.value.to_painless(schema).script(schema)
es_query.aggs[key].percentiles.percents += [50]
s.pull = jx_expression_to_function(key + ".values.50\.0")
s.pull = jx_expression_to_function(key + ".values.50\\.0")
elif s.aggregate == "percentile":
# ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT
key = literal_field(canonical_name + " percentile")

2
vendor/jx_elasticsearch/es52/setop.py поставляемый
Просмотреть файл

@ -375,6 +375,6 @@ def get_pull_stats(stats_name, median_name):
{"name": "sos", "value": stats_name + ".sum_of_squares"},
{"name": "std", "value": stats_name + ".std_deviation"},
{"name": "var", "value": stats_name + ".variance"},
{"name": "median", "value": median_name + ".values.50\.0"}
{"name": "median", "value": median_name + ".values.50\\.0"}
]})

2
vendor/jx_elasticsearch/meta.py поставляемый
Просмотреть файл

@ -215,7 +215,7 @@ class FromESMetadata(Schema):
with self.meta.columns.locker:
columns = self.meta.columns.find(es_index_name, column_name)
if columns:
columns = jx.sort(columns, "names.\.")
columns = jx.sort(columns, "names.\\.")
# AT LEAST WAIT FOR THE COLUMNS TO UPDATE
while len(self.todo) and not all(columns.get("last_updated")):
if DEBUG:

Просмотреть файл

@ -137,9 +137,8 @@ class ListContainer(Container):
return self.where(where)
def where(self, where):
temp = None
if isinstance(where, Mapping):
exec ("def temp(row):\n return " + jx_expression(where).to_python())
temp = compile_expression(jx_expression(where).to_python())
elif isinstance(where, Expression):
temp = compile_expression(where.to_python())
else:

6
vendor/mo_dots/__init__.py поставляемый
Просмотреть файл

@ -68,7 +68,7 @@ def literal_field(field):
RETURN SAME WITH DOTS (`.`) ESCAPED
"""
try:
return field.replace(".", "\.")
return field.replace(".", "\\.")
except Exception as e:
get_logger().error("bad literal", e)
@ -85,7 +85,7 @@ def unliteral_field(field):
"""
if len(split_field(field)) > 1:
get_logger().error("Bad call! Dude!")
return field.replace("\.", ".")
return field.replace("\\.", ".")
def split_field(field):
@ -112,7 +112,7 @@ def join_field(field):
potent = [f for f in field if f != "."]
if not potent:
return "."
return ".".join([f.replace(".", "\.") for f in potent])
return ".".join([f.replace(".", "\\.") for f in potent])
def concat_field(prefix, suffix):

2
vendor/mo_dots/datas.py поставляемый
Просмотреть файл

@ -299,7 +299,7 @@ def _split_field(field):
"""
SIMPLE SPLIT, NO CHECKS
"""
return [k.replace("\a", ".") for k in field.replace("\.", "\a").split(".")]
return [k.replace("\a", ".") for k in field.replace("\\.", "\a").split(".")]
class _DictUsingSelf(dict):

2
vendor/mo_dots/nones.py поставляемый
Просмотреть файл

@ -270,4 +270,4 @@ def _split_field(field):
if field == ".":
return []
else:
return [k.replace("\a", ".") for k in field.replace("\.", "\a").split(".")]
return [k.replace("\a", ".") for k in field.replace("\\.", "\a").split(".")]