fixes to pass some of etl_one, PY3 upgrade
This commit is contained in:
Родитель
f3dc4a73fc
Коммит
32fd67988b
|
@ -118,9 +118,9 @@ class AliasAnalyzer(object):
|
|||
self.bugs[d.bug_id] = agg
|
||||
|
||||
def analysis(self, last_run, please_stop):
|
||||
DIFF = 7
|
||||
minimum_diff = 7
|
||||
if last_run:
|
||||
DIFF = 4 #ONCE WE HAVE ALL THE DATA IN WE CAN BE LESS DISCRIMINATING
|
||||
minimum_diff = 4 #ONCE WE HAVE ALL THE DATA IN WE CAN BE LESS DISCRIMINATING
|
||||
try_again = True
|
||||
|
||||
while try_again and not please_stop:
|
||||
|
@ -135,7 +135,7 @@ class AliasAnalyzer(object):
|
|||
problems = jx.sort([
|
||||
{"email": e, "count": c}
|
||||
for e, c in iteritems(problem_agg.dic)
|
||||
if not self.not_aliases.get(e, None) and (c <= -(DIFF / 2) or last_run)
|
||||
if not self.not_aliases.get(e, None) and (c <= -(minimum_diff / 2) or last_run)
|
||||
], ["count", "email"])
|
||||
|
||||
try_again = False
|
||||
|
@ -153,7 +153,7 @@ class AliasAnalyzer(object):
|
|||
if last_run and len(solutions) == 2 and solutions[0].count == -solutions[1].count:
|
||||
#exact match
|
||||
pass
|
||||
elif len(solutions) <= 1 or (solutions[1].count + DIFF >= solutions[0].count):
|
||||
elif len(solutions) <= 1 or (solutions[1].count + minimum_diff >= solutions[0].count):
|
||||
#not distinctive enough
|
||||
continue
|
||||
|
||||
|
@ -168,7 +168,7 @@ class AliasAnalyzer(object):
|
|||
try_again = True
|
||||
self.add_alias(problem.email, best_solution.email)
|
||||
|
||||
self.saveAliases()
|
||||
self.save_aliases()
|
||||
|
||||
def alias(self, email):
|
||||
canonical = self.aliases.get(email, None)
|
||||
|
@ -231,7 +231,7 @@ class AliasAnalyzer(object):
|
|||
for k, found in reassign:
|
||||
self.aliases[k] = {"canonical":found, "dirty":True}
|
||||
|
||||
def saveAliases(self):
|
||||
def save_aliases(self):
|
||||
records = []
|
||||
for k, v in self.aliases.items():
|
||||
if v["dirty"]:
|
||||
|
|
|
@ -43,6 +43,7 @@ from __future__ import unicode_literals
|
|||
import math
|
||||
import re
|
||||
|
||||
import jx_elasticsearch
|
||||
from mo_future import text_type
|
||||
|
||||
from bzETL.transform_bugzilla import normalize, NUMERIC_FIELDS, MULTI_FIELDS, DIFF_FIELDS
|
||||
|
@ -90,7 +91,7 @@ class BugHistoryParser(object):
|
|||
|
||||
self.alias_config=alias_config
|
||||
self.aliases = Null
|
||||
self.initializeAliases()
|
||||
self.initialize_aliases()
|
||||
|
||||
|
||||
def processRow(self, row_in):
|
||||
|
@ -1002,22 +1003,23 @@ class BugHistoryParser(object):
|
|||
def alias(self, name):
|
||||
if name == None:
|
||||
return Null
|
||||
return coalesce(self.aliases.get(name, Null).canonical, name)
|
||||
return coalesce(self.aliases.get(name, Null), name)
|
||||
|
||||
|
||||
def initializeAliases(self):
|
||||
def initialize_aliases(self):
|
||||
try:
|
||||
try:
|
||||
if self.alias_config.elasticsearch:
|
||||
esq = jx_elasticsearch.new_instance(self.alias_config.elasticsearch)
|
||||
result = esq.query({"select": ["alias", "canonical"], "where": {"missing": "ignore"}, "limit": 10000, "format":"list"})
|
||||
self.aliases = {d.alias:d.canonical for d in result.data}
|
||||
else:
|
||||
alias_json = File(self.alias_config.file).read()
|
||||
except Exception as e:
|
||||
Log.warning("Could not load alias file", cause=e)
|
||||
alias_json = "{}"
|
||||
self.aliases = {k: wrap(v) for k, v in json2value(alias_json).items()}
|
||||
|
||||
Log.note("{{num}} aliases loaded", num=len(self.aliases.keys()))
|
||||
|
||||
self.aliases = {k: wrap(v) for k, v in json2value(alias_json).items()}
|
||||
except Exception as e:
|
||||
Log.error("Can not init aliases", e)
|
||||
Log.warning("Could not load alias file", cause=e)
|
||||
self.aliases = {}
|
||||
|
||||
Log.note("{{num}} aliases loaded", num=len(self.aliases.keys()))
|
||||
|
||||
|
||||
def deformat(value):
|
||||
if value == None:
|
||||
|
|
|
@ -72,7 +72,7 @@ def normalize(bug, old_school=False):
|
|||
if k.startswith("attachments") and (k.endswith("isobsolete") or k.endswith("ispatch") or k.endswith("isprivate")):
|
||||
new_v=convert.value2int(v)
|
||||
new_k=k[12:]
|
||||
a[k.replace(".", "\.")]=new_v
|
||||
a[k.replace(".", "\\.")]=new_v
|
||||
if not old_school:
|
||||
a[new_k]=new_v
|
||||
a.flags = jx.sort(a.flags, ["modified_ts", "value"])
|
||||
|
|
|
@ -15,8 +15,8 @@
|
|||
// 6810,
|
||||
// 9622,
|
||||
// 10575,
|
||||
11040
|
||||
// 12911,
|
||||
// 11040 //LOTS OF HISTORY
|
||||
12911,
|
||||
// 67742,
|
||||
// 96421,
|
||||
// 123203,
|
||||
|
@ -54,6 +54,10 @@
|
|||
"file": {
|
||||
"path": "resources/schema/bugzilla_aliases.json",
|
||||
"$ref": "file://~/private.json#alias_file"
|
||||
},
|
||||
"elasticsearch": {
|
||||
"host": "http://localhost",
|
||||
"index": "bug_aliases"
|
||||
}
|
||||
},
|
||||
"reference": {
|
||||
|
|
|
@ -616,12 +616,11 @@ def compare_both(candidate, reference, settings, some_bugs):
|
|||
max_time = convert.milli2datetime(versions.last().modified_ts)
|
||||
|
||||
pre_ref_versions = get_all_bug_versions(reference, bug_id, max_time)
|
||||
ref_versions = \
|
||||
jx.sort(
|
||||
#ADDED TO FIX OLD PRODUCTION BUG VERSIONS
|
||||
[compare_es.old2new(x, settings.bugzilla.expires_on) for x in pre_ref_versions],
|
||||
"modified_ts"
|
||||
)
|
||||
ref_versions = jx.sort(
|
||||
# ADDED TO FIX OLD PRODUCTION BUG VERSIONS
|
||||
[compare_es.old2new(x, settings.bugzilla.expires_on) for x in pre_ref_versions],
|
||||
"modified_ts"
|
||||
)
|
||||
|
||||
can = value2json(versions, pretty=True)
|
||||
ref = value2json(ref_versions, pretty=True)
|
||||
|
|
|
@ -40,7 +40,6 @@ def get_all_bug_versions(es, bug_id, max_time=None):
|
|||
raise Log.error("unknown container")
|
||||
|
||||
response = esq.query({
|
||||
"from": es.settings.alias,
|
||||
"where": {"and": [
|
||||
{"eq": {"bug_id": bug_id}},
|
||||
{"lte": {"modified_ts": convert.datetime2milli(max_time)}}
|
||||
|
|
|
@ -161,7 +161,7 @@ def es_aggsop(es, frum, query):
|
|||
|
||||
es_query.aggs[key].percentiles.field = es_cols[0].es_column
|
||||
es_query.aggs[key].percentiles.percents += [50]
|
||||
s.pull = jx_expression_to_function(key + ".values.50\.0")
|
||||
s.pull = jx_expression_to_function(key + ".values.50\\.0")
|
||||
elif s.aggregate == "percentile":
|
||||
if len(es_cols) > 1:
|
||||
Log.error("Do not know how to count columns with more than one type (script probably)")
|
||||
|
@ -253,7 +253,7 @@ def es_aggsop(es, frum, query):
|
|||
|
||||
es_query.aggs[key].percentiles.script = s.value.to_ruby(schema).script(schema)
|
||||
es_query.aggs[key].percentiles.percents += [50]
|
||||
s.pull = jx_expression_to_function(key + ".values.50\.0")
|
||||
s.pull = jx_expression_to_function(key + ".values.50\\.0")
|
||||
elif s.aggregate == "percentile":
|
||||
# ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT
|
||||
key = literal_field(canonical_name + " percentile")
|
||||
|
|
|
@ -374,6 +374,6 @@ def get_pull_stats(stats_name, median_name):
|
|||
{"name": "sos", "value": stats_name + ".sum_of_squares"},
|
||||
{"name": "std", "value": stats_name + ".std_deviation"},
|
||||
{"name": "var", "value": stats_name + ".variance"},
|
||||
{"name": "median", "value": median_name + ".values.50\.0"}
|
||||
{"name": "median", "value": median_name + ".values.50\\.0"}
|
||||
]})
|
||||
|
||||
|
|
|
@ -211,7 +211,7 @@ def es_aggsop(es, frum, query):
|
|||
|
||||
es_query.aggs[key].percentiles.field = es_cols[0].es_column
|
||||
es_query.aggs[key].percentiles.percents += [50]
|
||||
s.pull = jx_expression_to_function(key + ".values.50\.0")
|
||||
s.pull = jx_expression_to_function(key + ".values.50\\.0")
|
||||
elif s.aggregate == "percentile":
|
||||
if len(es_cols) > 1:
|
||||
Log.error("Do not know how to count columns with more than one type (script probably)")
|
||||
|
@ -325,7 +325,7 @@ def es_aggsop(es, frum, query):
|
|||
|
||||
es_query.aggs[key].percentiles.script = s.value.to_painless(schema).script(schema)
|
||||
es_query.aggs[key].percentiles.percents += [50]
|
||||
s.pull = jx_expression_to_function(key + ".values.50\.0")
|
||||
s.pull = jx_expression_to_function(key + ".values.50\\.0")
|
||||
elif s.aggregate == "percentile":
|
||||
# ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT
|
||||
key = literal_field(canonical_name + " percentile")
|
||||
|
|
|
@ -375,6 +375,6 @@ def get_pull_stats(stats_name, median_name):
|
|||
{"name": "sos", "value": stats_name + ".sum_of_squares"},
|
||||
{"name": "std", "value": stats_name + ".std_deviation"},
|
||||
{"name": "var", "value": stats_name + ".variance"},
|
||||
{"name": "median", "value": median_name + ".values.50\.0"}
|
||||
{"name": "median", "value": median_name + ".values.50\\.0"}
|
||||
]})
|
||||
|
||||
|
|
|
@ -215,7 +215,7 @@ class FromESMetadata(Schema):
|
|||
with self.meta.columns.locker:
|
||||
columns = self.meta.columns.find(es_index_name, column_name)
|
||||
if columns:
|
||||
columns = jx.sort(columns, "names.\.")
|
||||
columns = jx.sort(columns, "names.\\.")
|
||||
# AT LEAST WAIT FOR THE COLUMNS TO UPDATE
|
||||
while len(self.todo) and not all(columns.get("last_updated")):
|
||||
if DEBUG:
|
||||
|
|
|
@ -137,9 +137,8 @@ class ListContainer(Container):
|
|||
return self.where(where)
|
||||
|
||||
def where(self, where):
|
||||
temp = None
|
||||
if isinstance(where, Mapping):
|
||||
exec ("def temp(row):\n return " + jx_expression(where).to_python())
|
||||
temp = compile_expression(jx_expression(where).to_python())
|
||||
elif isinstance(where, Expression):
|
||||
temp = compile_expression(where.to_python())
|
||||
else:
|
||||
|
|
|
@ -68,7 +68,7 @@ def literal_field(field):
|
|||
RETURN SAME WITH DOTS (`.`) ESCAPED
|
||||
"""
|
||||
try:
|
||||
return field.replace(".", "\.")
|
||||
return field.replace(".", "\\.")
|
||||
except Exception as e:
|
||||
get_logger().error("bad literal", e)
|
||||
|
||||
|
@ -85,7 +85,7 @@ def unliteral_field(field):
|
|||
"""
|
||||
if len(split_field(field)) > 1:
|
||||
get_logger().error("Bad call! Dude!")
|
||||
return field.replace("\.", ".")
|
||||
return field.replace("\\.", ".")
|
||||
|
||||
|
||||
def split_field(field):
|
||||
|
@ -112,7 +112,7 @@ def join_field(field):
|
|||
potent = [f for f in field if f != "."]
|
||||
if not potent:
|
||||
return "."
|
||||
return ".".join([f.replace(".", "\.") for f in potent])
|
||||
return ".".join([f.replace(".", "\\.") for f in potent])
|
||||
|
||||
|
||||
def concat_field(prefix, suffix):
|
||||
|
|
|
@ -299,7 +299,7 @@ def _split_field(field):
|
|||
"""
|
||||
SIMPLE SPLIT, NO CHECKS
|
||||
"""
|
||||
return [k.replace("\a", ".") for k in field.replace("\.", "\a").split(".")]
|
||||
return [k.replace("\a", ".") for k in field.replace("\\.", "\a").split(".")]
|
||||
|
||||
|
||||
class _DictUsingSelf(dict):
|
||||
|
|
|
@ -270,4 +270,4 @@ def _split_field(field):
|
|||
if field == ".":
|
||||
return []
|
||||
else:
|
||||
return [k.replace("\a", ".") for k in field.replace("\.", "\a").split(".")]
|
||||
return [k.replace("\a", ".") for k in field.replace("\\.", "\a").split(".")]
|
||||
|
|
Загрузка…
Ссылка в новой задаче