fixes for alias analysis using ES over not-verified ssl

This commit is contained in:
Kyle Lahnakoski 2018-03-29 11:39:02 -04:00
Родитель 59d5ddaf82
Коммит 9c792433d6
36 изменённых файлов: 381 добавлений и 233 удалений

Просмотреть файл

@ -15,9 +15,10 @@ import jx_elasticsearch
from bzETL.extract_bugzilla import get_all_cc_changes
from jx_python import jx
from mo_collections.multiset import Multiset
from mo_dots import coalesce, set_default
from mo_dots import coalesce
from mo_future import iteritems
from mo_json import value2json
from mo_logs import Log, startup
from mo_logs import Log, startup, constants
from pyLibrary.env import elasticsearch
from pyLibrary.sql.mysql import MySQL
@ -44,11 +45,11 @@ def full_analysis(settings, bug_list=None, please_stop=None):
return
with MySQL(kwargs=settings.bugzilla, readonly=True) as db:
start = coalesce(settings.param.start, 0)
end = coalesce(settings.param.end, db.query("SELECT max(bug_id)+1 bug_id FROM bugs")[0].bug_id)
start = coalesce(settings.alias.start, 0)
end = coalesce(settings.alias.end, db.query("SELECT max(bug_id)+1 bug_id FROM bugs")[0].bug_id)
#Perform analysis on blocks of bugs, in case we crash partway through
for s, e in jx.intervals(start, end, settings.param.increment):
for s, e in jx.reverse(jx.intervals(start, end, settings.alias.increment)):
Log.note(
"Load range {{start}}-{{end}}",
start=s,
@ -77,13 +78,14 @@ class AliasAnalyzer(object):
self.esq = jx_elasticsearch.new_instance(self.es.settings)
result = self.esq.query({
"from":"bug_aliases",
"select":["canonical", "alias"],
"from": "bug_aliases",
"select": ["canonical", "alias"],
"where": {"missing": "ignore"},
"format": "list"
"format": "list",
"limit": 50000
})
for r in result.data:
self.aliases[r.alias] = {"canonical":r["canonical"], "dirty":False}
self.aliases[r.alias] = {"canonical": r.canonical, "dirty": False}
Log.note("{{num}} aliases loaded", num=len(self.aliases.keys()))
@ -95,7 +97,7 @@ class AliasAnalyzer(object):
"format": "list"
})
for r in result.data:
self.not_aliases[r.alias] = r["canonical"]
self.not_aliases[r.alias] = r.canonical
except Exception as e:
Log.error("Can not init aliases", cause=e)
@ -124,15 +126,15 @@ class AliasAnalyzer(object):
while try_again and not please_stop:
#FIND EMAIL MOST NEEDING REPLACEMENT
problem_agg = Multiset(allow_negative=True)
for bug_id, agg in self.bugs.iteritems():
for bug_id, agg in iteritems(self.bugs):
#ONLY COUNT NEGATIVE EMAILS
for email, count in agg.dic.iteritems():
for email, count in iteritems(agg.dic):
if count < 0:
problem_agg.add(self.alias(email)["canonical"], amount=count)
problems = jx.sort([
{"email": e, "count": c}
for e, c in problem_agg.dic.iteritems()
for e, c in iteritems(problem_agg.dic)
if not self.not_aliases.get(e, None) and (c <= -(DIFF / 2) or last_run)
], ["count", "email"])
@ -143,10 +145,10 @@ class AliasAnalyzer(object):
#FIND MOST LIKELY MATCH
solution_agg = Multiset(allow_negative=True)
for bug_id, agg in self.bugs.iteritems():
for bug_id, agg in iteritems(self.bugs):
if agg.dic.get(problem.email, 0) < 0: #ONLY BUGS THAT ARE EXPERIENCING THIS problem
solution_agg += agg
solutions = jx.sort([{"email": e, "count": c} for e, c in solution_agg.dic.iteritems()], [{"field": "count", "sort": -1}, "email"])
solutions = jx.sort([{"email": e, "count": c} for e, c in iteritems(solution_agg.dic)], [{"field": "count", "sort": -1}, "email"])
if last_run and len(solutions) == 2 and solutions[0].count == -solutions[1].count:
#exact match
@ -188,13 +190,13 @@ class AliasAnalyzer(object):
def add_alias(self, lost, found):
new_canonical = self.alias(found)
old_canonical = self.alias(lost)
new_canonical = self.alias(found)
delete_list = []
#FOLD bugs ON lost=found
for bug_id, agg in self.bugs.iteritems():
for bug_id, agg in iteritems(self.bugs):
v = agg.dic.get(lost, 0)
if v != 0:
agg.add(lost, -v)
@ -205,7 +207,7 @@ class AliasAnalyzer(object):
#FOLD bugs ON old_canonical=new_canonical
if old_canonical["canonical"] != lost:
for bug_id, agg in self.bugs.iteritems():
for bug_id, agg in iteritems(self.bugs):
v = agg.dic.get(old_canonical["canonical"], 0)
if v != 0:
agg.add(old_canonical["canonical"], -v)
@ -221,12 +223,11 @@ class AliasAnalyzer(object):
reassign=[]
for k, v in self.aliases.items():
if v["canonical"] == old_canonical["canonical"]:
Log.note(
"ALIAS REMAPPED: {{alias}}->{{old}} to {{alias}}->{{new}}",
alias= k,
old= old_canonical["canonical"],
new= found
)
if k == v["canonical"]:
Log.note("ALIAS FOUND : {{alias}} -> {{new}}", alias=k, new=found)
else:
Log.note("ALIAS REMAPPED: {{alias}} -> {{old}} -> {{new}}", alias=k, old=v["canonical"], new=found)
reassign.append((k, found))
for k, found in reassign:
@ -264,6 +265,7 @@ def split_email(value):
def start():
try:
settings = startup.read_settings()
constants.set(settings.constants)
Log.start(settings.debug)
full_analysis(settings)
except Exception as e:
@ -282,10 +284,6 @@ ALIAS_SCHEMA = {
"enabled": False
},
"properties": {
"canonical":{
"type": "keyword",
"store": True
}
}
}
}

Просмотреть файл

@ -30,7 +30,7 @@ from pyLibrary import convert
from pyLibrary.env.elasticsearch import Cluster
from pyLibrary.sql.mysql import MySQL
NUM_CONNECTIONS = 10
NUM_CONNECTIONS = 4
db_cache_lock = Lock()
db_cache = []

Просмотреть файл

@ -21,6 +21,8 @@ from pyLibrary.queries.jx_usingMySQL import esfilter2sqlwhere
from pyLibrary.sql import SQL, SQL_ONE, SQL_NEG_ONE, sql_list, sql_iso
# USING THE TEXT DATETIME OF EPOCH THROWS A WARNING! USE ONE SECOND PAST EPOCH AS MINIMUM TIME.
from pyLibrary.sql.mysql import int_list_packer
MIN_TIMESTAMP = 1000 # MILLISECONDS SINCE EPOCH
#ALL BUGS IN PRIVATE ETL HAVE SCREENED FIELDS
@ -444,7 +446,7 @@ def get_all_cc_changes(db, bug_list):
""", {
"max_time": MAX_TIME,
"cc_field_id": CC_FIELD_ID,
"bug_filter": esfilter2sqlwhere(db, {"terms": {"bug_id": bug_list}})
"bug_filter": esfilter2sqlwhere(db, int_list_packer("bug_id", bug_list))
})

Просмотреть файл

@ -777,7 +777,7 @@ class BugHistoryParser(object):
self.currActivity.changes.append({
"field_name": field_name,
"new_value": Null,
"old_value": ", ".join(map(unicode, jx.sort(diff))),
"old_value": ", ".join(map(text_type, jx.sort(diff))),
"attach_id": target.attach_id
})
@ -794,7 +794,7 @@ class BugHistoryParser(object):
if valueType == "added" and remove:
self.currActivity.changes.append({
"field_name": field_name,
"new_value": u", ".join(map(unicode, jx.sort(remove))),
"new_value": u", ".join(map(text_type, jx.sort(remove))),
"old_value": Null,
"attach_id": target.attach_id
})
@ -897,7 +897,7 @@ class BugHistoryParser(object):
if final_removed:
self.currActivity.changes.append({
"field_name": field_name,
"new_value": u", ".join(map(unicode, jx.sort(final_removed))),
"new_value": u", ".join(map(text_type, jx.sort(final_removed))),
"old_value": Null,
"attach_id": target.attach_id
})
@ -913,7 +913,7 @@ class BugHistoryParser(object):
if valueType == "added" and removed:
self.currActivity.changes.append({
"field_name": field_name,
"new_value": u", ".join(map(unicode, jx.sort(removed))),
"new_value": u", ".join(map(text_type, jx.sort(removed))),
"old_value": Null,
"attach_id": target.attach_id
})

Просмотреть файл

@ -1,37 +1,42 @@
{
"alias":{
"start":0,
"increment":5000000,
"elasticsearch":{
"host": "http://localhost",
"index": "bug_aliases"
},
"file":{
"path":"../schema/bugzilla_aliases.json",
"$ref":"//~/private.json#alias_file"
}
},
"bugzilla":{
"preamble":"from https://github.com/klahnakoski/Bugzilla-ETL",
"host":"localhost",
"port":3307,
"$ref":"//~/private.json#bugzilla-dev",
"schema":"test_bugzilla",
"debug":false
"alias": {
"start": 0,
"increment": 10000,
"elasticsearch": {
"host": "http://localhost",
"index": "bug_aliases"
},
"file": {
"path": "../schema/bugzilla_aliases.json",
"$ref": "//~/private.json#alias_file"
}
},
"debug":{
"log":[{
"class": "logging.handlers.RotatingFileHandler",
"filename": "./results/logs/alias_analysis.log",
"maxBytes": 10000000,
"backupCount": 200,
"encoding": "utf8"
},{
"log_type": "stream",
"stream":"sys.stdout"
}]
"bugzilla": {
"preamble": "from https://github.com/klahnakoski/Bugzilla-ETL",
"host": "localhost",
"port": 3307,
"$ref": "//~/private.json#bugzilla-dev",
"schema": "bugs",
"debug": false
},
"constants":{
"pyLibrary.sql.mysql.EXECUTE_TIMEOUT": 0,
"pyLibrary.env.http.default_headers": {"Referer": "https://github.com/mozilla/Bugzilla-ETL"},
"pyLibrary.env.http.DEFAULTS.verify": false
},
"debug": {
"log": [
{
"class": "logging.handlers.RotatingFileHandler",
"filename": "results/logs/alias_analysis.log",
"maxBytes": 10000000,
"backupCount": 200,
"encoding": "utf8"
},
{
"log_type": "stream",
"stream": "sys.stdout"
}
]
}
}

Просмотреть файл

@ -7,8 +7,8 @@
"comment": "key is only meant to keep the aliases out of clear text. Aliases are public as per https://www.mozilla.org/en-US/privacy/policies/websites/",
"key": ""
},
"first_run_time": "./results/data/first_run_time.txt",
"last_run_time": "./results/data/last_run_time.txt",
"first_run_time": "results/data/first_run_time.txt",
"last_run_time": "results/data/last_run_time.txt",
"look_back": 3600000,
//hour
//1hour
@ -27,9 +27,9 @@
}
},
"bugzilla": {
"$ref": "//~/private.json#bugzilla-dev",
"host": "localhost",
"$ref": "file://~/private.json#bugzilla-dev",
"preamble": "from https://github.com/klahnakoski/Bugzilla-ETL",
"host": "localhost",
"port": 3307,
"schema": "bugs",
"debug": true
@ -63,9 +63,9 @@
"log": [
{
"class": "logging.handlers.RotatingFileHandler",
"filename": "./results/logs/bz_etl.log",
"filename": "results/logs/bz_etl.log",
"maxBytes": 10000000,
"backupCount": 200,
"backupCount": 10,
"encoding": "utf8"
},
{

Просмотреть файл

@ -1,5 +1,5 @@
{
"errors": "./results/data",
"errors": "results/data",
"source": {
"filename": "C:/Users/klahnakoski/git/ES Extract/data/bug_versions.txt",
"schema_filename": "../schema/bug_version.json"
@ -15,7 +15,7 @@
"log": [
{
"class": "logging.handlers.RotatingFileHandler",
"filename": "./tests/results/logs/fileload.log",
"filename": "tests/results/logs/fileload.log",
"maxBytes": 10000000,
"backupCount": 200,
"encoding": "utf8"

Просмотреть файл

@ -48,7 +48,7 @@
"log": [
{
"class": "logging.handlers.RotatingFileHandler",
"filename": "./results/logs/leak_check.log",
"filename": "results/logs/leak_check.log",
"maxBytes": 10000000,
"backupCount": 200,
"encoding": "utf8"

Просмотреть файл

@ -21,7 +21,7 @@
"debug":{
"log":[{
"class": "logging.handlers.RotatingFileHandler",
"filename": "./results/logs/replication.log",
"filename": "results/logs/replication.log",
"maxBytes": 10000000,
"backupCount": 200,
"encoding": "utf8"

Просмотреть файл

@ -19,7 +19,7 @@
"debug":{
"log":[{
"class": "logging.handlers.RotatingFileHandler",
"filename": "./results/logs/replication.log",
"filename": "results/logs/replication.log",
"maxBytes": 10000000,
"backupCount": 200,
"encoding": "utf8"

Просмотреть файл

@ -1,36 +1,36 @@
{
"param": {
"increment": 10000,
"bugs": [
// 384,
// 1108,
// 1045,
// 1046,
// 1157,
// 1877,
// 1865,
// 1869,
// 2586,
// 3140,
// 6810,
// 9622,
// 10575,
"param": {
"increment": 10000,
"bugs": [
// 384,
// 1108,
// 1045,
// 1046,
// 1157,
// 1877,
// 1865,
// 1869,
// 2586,
// 3140,
// 6810,
// 9622,
// 10575,
11040
// 12911,
// 67742,
// 96421,
// 123203,
// 178960,
// 367518,
// 457765,
// 458397,
// 471427,
// 544327,
// 547727,
// 643420,
// 692436,
// 726635,
// 813650
// 12911,
// 67742,
// 96421,
// 123203,
// 178960,
// 367518,
// 457765,
// 458397,
// 471427,
// 544327,
// 547727,
// 643420,
// 692436,
// 726635,
// 813650
// 1165765 VERY LONG short_desc
// 1007019 does not have bug_status, or component, or product
// 372836 (REVIEW FLAGS TEST)
@ -41,63 +41,66 @@
// 248970 another cutoff review request
// 248971 another cutoff review request
],
"temp_dir": "./tests/resources",
"errors": "./tests/results/errors",
"allow_private_bugs": false,
"last_run_time": "./tests/results/last_run_time.txt",
"first_run_time": "./tests/results/first_run_time.txt",
"look_back": 3600000 //1hour
},
"alias":{
"temp_dir": "tests/resources",
"errors": "tests/results/errors",
"allow_private_bugs": false,
"last_run_time": "tests/results/last_run_time.txt",
"first_run_time": "tests/results/first_run_time.txt",
"look_back": 3600000
//1hour
},
"alias": {
"increment": 1000000,
"file": {
"path": "../schema/bugzilla_aliases.json",
"$ref": "file://~/private.json#alias_file"
}
},
"elasticsearch": {
"description": "pointer to es with test results",
"filename": "./tests/results/test_results.json",
"schema": {"$ref":"resources/schema/bug_version.json"},
"host": "http://localhost",
"port": 9200,
"index": "test_bugs",
"type": "bug_version"
},
"reference":{
"filename": "./tests/resources/reference/public_bugs.json"
"elasticsearch": {
"description": "pointer to es with test results",
"filename": "tests/results/test_results.json",
"schema": {
"$ref": "../../../resources/schema/bug_version.json"
},
"host": "http://localhost",
"port": 9200,
"index": "test_bugs",
"type": "bug_version"
},
"bugzilla": {
"filename": "./tests/resources/sql/small_bugzilla.sql",
"preamble": "from https://github.com/klahnakoski/Bugzilla-ETL",
"host": "localhost",
"port": 3307,
"$ref":"file://~/private.json#bugzilla-dev",
"schema": "test_bugzilla",
"expires_on": 1372867005000,
"debug": true,
"reference": {
"filename": "tests/resources/reference/public_bugs.json"
},
"bugzilla": {
"filename": "tests/resources/sql/small_bugzilla.sql",
"preamble": "from https://github.com/klahnakoski/Bugzilla-ETL",
"host": "localhost",
"port": 3307,
"$ref": "file://~/private.json#bugzilla-dev",
"schema": "test_bugzilla",
"expires_on": 1372867005000,
"debug": true,
"readonly": true
},
},
"constants": {
"pyLibrary.env.http.default_headers": {
"Referer": "https://wiki.mozilla.org/BMO/ElasticSearch"
}
},
"debug": {
},
"debug": {
"trace": true,
"profile":false,
"log": [
{
"class": "logging.handlers.RotatingFileHandler",
"filename": "./tests/results/logs/test_etl.log",
"maxBytes": 10000000,
"backupCount": 10,
"encoding": "utf8"
},
{
"log_type": "stream",
"stream": "sys.stdout"
}
]
}
"profile": false,
"log": [
{
"class": "logging.handlers.RotatingFileHandler",
"filename": "tests/results/logs/test_etl.log",
"maxBytes": 10000000,
"backupCount": 10,
"encoding": "utf8"
},
{
"log_type": "stream",
"stream": "sys.stdout"
}
]
}
}

Просмотреть файл

@ -0,0 +1,5 @@
REM -N DO NOT START SHELL
REM -v VERBOSE
REM -L <local_port>:<distant_host>:<distant_port> <putty config>
plink -v -N -L 9200:vpc-vpc-etl-es-public-devsvcdev-debriixex5z7p3orlxtsbkvhoi.us-west-2.es.amazonaws.com:443 CloudOps-ETL

Просмотреть файл

@ -1,5 +1,5 @@
REM -N DO NOT START SHELL
REM -v VERBOSE
REM -L <local_port>:<distant_host>:<distant_port> <putty config for remote>
REM -L <local_port>:<distant_host>:<distant_port> <putty config>
plink -v -N -L 3307:bugzilla-masterdb-devsvcdev-2017100901.czvlmp16hwe4.us-west-2.rds.amazonaws.com:3306 CloudOps-ETL
plink -v -N -L 3307:bugzilla-masterdb-devsvcdev-2017100901.czvlmp16hwe4.us-west-2.rds.amazonaws.com:3306 CloudOps-ETL

Просмотреть файл

@ -9,15 +9,15 @@
},
"public_bugs_reference": {
"description": "pointer to es with known good *public* results",
"filename": "./tests/resources/config/public_bugs_reference_es.json"
"filename": "tests/resources/config/public_bugs_reference_es.json"
},
"public_comments_reference": {
"description": "pointer to es with known good public comments",
"filename": "./tests/resources/config/public_comments_reference_es.json"
"filename": "tests/resources/config/public_comments_reference_es.json"
},
"private_bugs_reference": {
"description": "pointer to es with known good results",
"filename": "./tests/resources/config/private_bugs_reference_es.json"
"filename": "tests/resources/config/private_bugs_reference_es.json"
},
"private_comments_reference": {
"description": "pointer to es with known good private comments",
@ -25,7 +25,7 @@
},
"candidate": {
"description": "pointer to es with test results",
"filename": "./tests/results/test_results.json",
"filename": "tests/results/test_results.json",
"host": "http://localhost",
"port": 9200,
"index": "test_bugs",
@ -34,10 +34,10 @@
"fake": {
//FOR TESTING JSON CREATION, NO NEED FOR REAL ES
"bugs": {
"filename": "./tests/results/test_bugs.json"
"filename": "tests/results/test_bugs.json"
},
"comments": {
"filename": "./tests/results/test_comments.json"
"filename": "tests/results/test_comments.json"
}
},
"real": {
@ -100,11 +100,11 @@
// 937428 whitespace after comma in user story, complex diff
// 248970 another cutoff review request
],
"temp_dir": "./tests/resources",
"errors": "./tests/results/errors",
"temp_dir": "tests/resources",
"errors": "tests/results/errors",
"allow_private_bugs": true,
"last_run_time": "./tests/results/last_run_time.txt",
"first_run_time": "./tests/results/first_run_time.txt",
"last_run_time": "tests/results/last_run_time.txt",
"first_run_time": "tests/results/first_run_time.txt",
"look_back": 3600000 //1hour
},
"alias": {
@ -123,7 +123,7 @@
}
},
"bugzilla": {
"filename": "./tests/resources/sql/small_bugzilla.sql",
"filename": "tests/resources/sql/small_bugzilla.sql",
"preamble": "from https://github.com/klahnakoski/Bugzilla-ETL",
"host": "localhost",
"port": 3307,
@ -151,7 +151,7 @@
"log": [
{
"class": "logging.handlers.RotatingFileHandler",
"filename": "./tests/results/logs/test_etl.log",
"filename": "tests/results/logs/test_etl.log",
"maxBytes": 10000000,
"backupCount": 200,
"encoding": "utf8"

Просмотреть файл

@ -0,0 +1,104 @@
{
"param": {
"increment": 10000,
"bugs": [
// 384,
// 1108,
// 1045,
// 1046,
// 1157,
// 1877,
// 1865,
// 1869,
// 2586,
// 3140,
// 6810,
// 9622,
// 10575,
11040
// 12911,
// 67742,
// 96421,
// 123203,
// 178960,
// 367518,
// 457765,
// 458397,
// 471427,
// 544327,
// 547727,
// 643420,
// 692436,
// 726635,
// 813650
// 1165765 VERY LONG short_desc
// 1007019 does not have bug_status, or component, or product
// 372836 (REVIEW FLAGS TEST)
// 13534 (REVIEW MOVES TO OTHER PERSON)
// 393845 added blocking1.9+ twice
// 671185 *many* review requests
// 937428 whitespace after comma in user story, complex diff
// 248970 another cutoff review request
// 248971 another cutoff review request
],
"temp_dir": "tests/resources",
"errors": "tests/results/errors",
"allow_private_bugs": false,
"last_run_time": "tests/results/last_run_time.txt",
"first_run_time": "tests/results/first_run_time.txt",
"look_back": 3600000
//1hour
},
"alias": {
"increment": 1000000,
"file": {
"path": "resources/schema/bugzilla_aliases.json",
"$ref": "file://~/private.json#alias_file"
}
},
"reference": {
"filename": "tests/resources/reference/public_bugs.json"
},
"bugzilla": {
"$ref": "file://~/private.json#bugzilla-dev",
"preamble": "from https://github.com/klahnakoski/Bugzilla-ETL",
"host": "localhost",
"port": 3307,
"schema": "bugs",
"expires_on": 1372867005000,
"debug": true,
"readonly": true
},
"elasticsearch": {
"host": "http://localhost",
"port": 9200,
"schema": {
"$ref": "../../../resources/schema/bug_version.json"
},
"index": "test_bugs",
"type": "bug_version",
"filename": "tests/results/test_results.json",
},
"constants": {
"pyLibrary.env.http.default_headers": {
"Referer": "https://wiki.mozilla.org/BMO/ElasticSearch"
}
},
"debug": {
"trace": true,
"profile": false,
"log": [
{
"class": "logging.handlers.RotatingFileHandler",
"filename": "tests/results/logs/test_etl.log",
"maxBytes": 10000000,
"backupCount": 10,
"encoding": "utf8"
},
{
"log_type": "stream",
"stream": "sys.stdout"
}
]
}
}

Просмотреть файл

@ -1,6 +1,6 @@
Mozilla's Bugzilla instance stores time in local DST-adjusted Pacific Time.
The ETL script converts all times to GMT for ease of comparison. If you plan to
The ETL script converts all times to GMT for ease of comparison. If you plan to
run tests or generate your own ES instance from a MySQL database
you will need to install the timezone database to perform this conversion.
@ -28,4 +28,4 @@ Linux without zoneinfo database
* ```mysql_tzinfo_to_sql <directory> | mysql -u root mysql```
([more Linux instructions](http://dev.mysql.com/doc/refman/4.1/en/mysql-tzinfo-to-sql.html))
([more Linux instructions](http://dev.mysql.com/doc/refman/4.1/en/mysql-tzinfo-to-sql.html))

Просмотреть файл

@ -17,7 +17,7 @@ from datetime import datetime
from mo_future import text_type
from bzETL import extract_bugzilla, bz_etl
from bzETL.bz_etl import etl, esfilter2sqlwhere, MIN_TIMESTAMP
from bzETL.bz_etl import etl, MIN_TIMESTAMP
from bzETL.extract_bugzilla import get_current_time, SCREENED_WHITEBOARD_BUG_GROUPS
from jx_python import jx
from mo_dots import Data, Null, wrap
@ -29,6 +29,7 @@ from mo_math.randoms import Random
from mo_threads import ThreadedQueue, Till
from mo_times import Timer
from pyLibrary import convert
from pyLibrary.queries.jx_usingMySQL import esfilter2sqlwhere
from pyLibrary.sql.mysql import all_db, MySQL
from pyLibrary.testing import elasticsearch
from util import database, compare_es
@ -618,7 +619,7 @@ def compare_both(candidate, reference, settings, some_bugs):
ref_versions = \
jx.sort(
#ADDED TO FIX OLD PRODUCTION BUG VERSIONS
[compare_es.old2new(x, settings.bugzilla.expires_on) for x in pre_ref_versions],
[compare_es.old2new(x, settings.bugzilla.expires_on) for x in pre_ref_versions],
"modified_ts"
)

Просмотреть файл

@ -32,7 +32,7 @@ class TestOneETL(unittest.TestCase):
I USE THIS TO IDENTIFY CANDIDATES TO ADD TO THE TEST SUITE
"""
def setUp(self):
self.settings = startup.read_settings(filename="test_one_settings.json")
self.settings = startup.read_settings(filename="tests/resources/config/test_one.json")
constants.set(self.settings.constants)
Log.start(self.settings.debug)
@ -81,7 +81,7 @@ class TestOneETL(unittest.TestCase):
# "port": 9200,
# "index": ElasticSearch.proto_name("test_public_bugs"),
# "type": "bug_version",
# "schema_file": "./resources/json/bug_version.json"
# "schema_file": "resources/json/bug_version.json"
# })
# es = ElasticSearch.create_index(es_settings, File(es_settings.schema_file).read())
# es.delete_all_but("test_public_bugs", es_settings.index)

2
vendor/jx_elasticsearch/es52/expressions.py поставляемый
Просмотреть файл

@ -56,7 +56,7 @@ return output.toString()
class Painless(Expression):
__slots__ = ("miss", "type", "expr", "many")
__slots__ = ("miss", "data_type", "expr", "many")
def __init__(self, type, expr, frum, miss=None, many=False):
self.miss = coalesce(miss, FALSE) # Expression that will return true/false to indicate missing result

10
vendor/jx_python/jx.py поставляемый
Просмотреть файл

@ -591,11 +591,15 @@ def value_compare(left, right, ordering=1):
ltype = type(left)
rtype = type(right)
type_diff = TYPE_ORDER.get(ltype, 10) - TYPE_ORDER.get(rtype, 10)
ltype_num = TYPE_ORDER.get(ltype, 10)
rtype_num = TYPE_ORDER.get(rtype, 10)
type_diff = ltype_num - rtype_num
if type_diff != 0:
return ordering if type_diff > 0 else -ordering
if ltype is builtin_tuple:
if ltype_num == 9:
return 0
elif ltype is builtin_tuple:
for a, b in zip(left, right):
c = value_compare(a, b)
if c != 0:
@ -1084,6 +1088,8 @@ def accumulate(vals):
def reverse(vals):
# TODO: Test how to do this fastest
if not hasattr(vals, "len"):
vals = list(vals)
l = len(vals)
output = [None] * l

4
vendor/mo_collections/multiset.py поставляемый
Просмотреть файл

@ -51,7 +51,7 @@ class Multiset(object):
class _Multiset(Multiset):
def __new__(cls, *args):
return object.__new__(cls, *args)
return object.__new__(cls)
def __init__(self, list=None, key_field=None, count_field=None, **kwargs):
@ -150,7 +150,7 @@ class _Multiset(Multiset):
class _NegMultiset(Multiset):
def __new__(cls, *args, **kwargs):
return object.__new__(cls, *args, **kwargs)
return object.__new__(cls)
def __init__(self, list=None, key_field=None, count_field=None, **kwargs):
if not key_field and not count_field:

4
vendor/mo_collections/unique_index.py поставляемый
Просмотреть файл

@ -16,7 +16,7 @@ from collections import Mapping, Iterable, Set
from mo_dots import unwrap, tuplewrap, wrap
from mo_dots.objects import datawrap
from mo_future import PY2
from mo_future import PY2, iteritems
from mo_logs import Log
from mo_logs.exceptions import suppress_exception
@ -73,7 +73,7 @@ class UniqueIndex(Set, Mapping):
return self._data.keys()
def pop(self):
output = self._data.iteritems().next()[1]
output = iteritems(self._data).next()[1]
self.remove(output)
return wrap(output)

16
vendor/mo_dots/datas.py поставляемый
Просмотреть файл

@ -15,7 +15,7 @@ from collections import MutableMapping, Mapping
from copy import deepcopy
from mo_dots import _getdefault, hash_value, literal_field, coalesce, listwrap, get_logger
from mo_future import text_type
from mo_future import text_type, PY2
_get = object.__getattribute__
_set = object.__setattr__
@ -210,7 +210,7 @@ class Data(MutableMapping):
def iteritems(self):
# LOW LEVEL ITERATION, NO WRAPPING
d = _get(self, "_dict")
return ((k, wrap(v)) for k, v in d.iteritems())
return ((k, wrap(v)) for k, v in iteritems(d))
def keys(self):
d = _get(self, "_dict")
@ -429,9 +429,15 @@ class _DictUsingSelf(dict):
output.append((prefix + literal_field(k), v))
return output
def iteritems(self):
for k, v in dict.iteritems(self):
yield k, wrap(v)
if PY2:
def iteritems(self):
for k, v in dict.iteritems(self):
yield k, wrap(v)
else:
def iteritems(self):
for k, v in dict.items(self):
yield k, wrap(v)
def keys(self):
return set(dict.keys(self))

6
vendor/mo_future/__init__.py поставляемый
Просмотреть файл

@ -48,6 +48,9 @@ if PY3:
from io import BytesIO
from _thread import allocate_lock, get_ident, start_new_thread, interrupt_main
def iteritems(d):
return d.items()
def transpose(*args):
return list(zip(*args))
@ -107,6 +110,9 @@ else:
from io import BytesIO
from thread import allocate_lock, get_ident, start_new_thread, interrupt_main
def iteritems(d):
return d.iteritems()
def get_function_name(func):
return func.func_name

4
vendor/mo_logs/__init__.py поставляемый
Просмотреть файл

@ -19,7 +19,7 @@ from datetime import datetime
import sys
from mo_dots import coalesce, listwrap, wrap, unwrap, unwraplist, set_default, FlatList
from mo_future import text_type, PY3
from mo_future import text_type, PY3, iteritems
from mo_logs import constants
from mo_logs.exceptions import Except, suppress_exception
from mo_logs.strings import indent
@ -469,7 +469,7 @@ def write_profile(profile_settings, stats):
"line": f[1],
"method": f[2].lstrip("<").rstrip(">")
}
for f, d, in acc.stats.iteritems()
for f, d, in iteritems(acc.stats)
]
stats_file = File(profile_settings.filename, suffix=convert.datetime2string(datetime.now(), "_%Y%m%d_%H%M%S"))
stats_file.write(convert.list2tab(stats))

1
vendor/mo_logs/exceptions.py поставляемый
Просмотреть файл

@ -102,7 +102,6 @@ class Except(Exception):
with suppress_exception:
cause_strings.append(text_type(c))
output += "caused by\n\t" + "and caused by\n\t".join(cause_strings)
return output

2
vendor/mo_logs/log_usingLogger.py поставляемый
Просмотреть файл

@ -84,7 +84,7 @@ def make_log_from_settings(settings):
path = ".".join(path[:-1])
constructor = None
try:
temp = __import__(path, globals(), locals(), [class_name], -1)
temp = __import__(path, globals(), locals(), [class_name], 0)
constructor = object.__getattribute__(temp, class_name)
except Exception as e:
if settings.stream and not constructor:

4
vendor/mo_logs/log_usingThreadedStream.py поставляемый
Просмотреть файл

@ -16,7 +16,7 @@ from __future__ import unicode_literals
import sys
from time import time
from mo_future import text_type
from mo_future import text_type, PY2
from mo_logs import Log
from mo_logs.log_usingNothing import StructuredLogger
from mo_logs.strings import expand_template
@ -45,7 +45,7 @@ class StructuredLogger_usingThreadedStream(StructuredLogger):
# WRITE TO STREAMS CAN BE *REALLY* SLOW, WE WILL USE A THREAD
from mo_threads import Queue
if use_UTF8:
if use_UTF8 and PY2:
def utf8_appender(value):
if isinstance(value, text_type):
value = value.encode('utf8')

2
vendor/mo_logs/startup.py поставляемый
Просмотреть файл

@ -83,7 +83,7 @@ def read_settings(filename=None, defs=None, env_filename=None):
"help": "path to JSON file with settings",
"type": str,
"dest": "filename",
"default": "./config.json",
"default": "config.json",
"required": False
})
args = argparse(defs)

19
vendor/mo_logs/strings.py поставляемый
Просмотреть файл

@ -22,6 +22,8 @@ from datetime import datetime as builtin_datetime
from datetime import timedelta, date
from json.encoder import encode_basestring
import sys
from mo_dots import coalesce, wrap, get_module
from mo_future import text_type, xrange, binary_type, round as _round, PY3, get_function_name
from mo_logs.convert import datetime2unix, datetime2string, value2json, milli2datetime, unix2datetime
@ -512,17 +514,26 @@ THE REST OF THIS FILE IS TEMPLATE EXPANSION CODE USED BY mo-logs
"""
recursive_counter = 0
def expand_template(template, value):
"""
:param template: A UNICODE STRING WITH VARIABLE NAMES IN MOUSTACHES `{{}}`
:param value: Data HOLDING THE PARAMTER VALUES
:return: UNICODE STRING WITH VARIABLES EXPANDED
"""
value = wrap(value)
if isinstance(template, text_type):
return _simple_expand(template, (value,))
global recursive_counter
if recursive_counter>10:
sys.stderr.write("problem with template expansion")
return
recursive_counter +=1
try:
value = wrap(value)
if isinstance(template, text_type):
return _simple_expand(template, (value,))
return _expand(template, (value,))
return _expand(template, (value,))
finally:
recursive_counter -=1
def common_prefix(*args):

2
vendor/pyLibrary/env/README.md поставляемый
Просмотреть файл

@ -36,7 +36,7 @@ selecting only the properties it requires.
"type" : "test_result",
"debug" : true,
"limit_replicas" : true,
"schema_file" : "./resources/schema/test_schema.json"
"schema_file" : "resources/schema/test_schema.json"
},

6
vendor/pyLibrary/env/elasticsearch.py поставляемый
Просмотреть файл

@ -93,11 +93,9 @@ class Index(Features):
Log.error("not allowed")
if type == None:
# NO type PROVIDED, MAYBE THERE IS A SUITABLE DEFAULT?
with self.cluster.metadata_locker:
index_ = self.cluster._metadata.indices[self.settings.index]
index_ = self.cluster.get_metadata().indices[self.settings.index]
if not index_:
indices = self.cluster.get_metadata().indices
index_ = indices[self.settings.index]
Log.error("can not find index {{index}}", index=self.settings.index)
candidate_types = list(index_.mappings.keys())
if len(candidate_types) != 1:

44
vendor/pyLibrary/env/http.py поставляемый
Просмотреть файл

@ -45,11 +45,19 @@ DEBUG = False
FILE_SIZE_LIMIT = 100 * 1024 * 1024
MIN_READ_SIZE = 8 * 1024
ZIP_REQUEST = False
default_headers = Data() # TODO: MAKE THIS VARIABLE A SPECIAL TYPE OF EXPECTED MODULE PARAMETER SO IT COMPLAINS IF NOT SET
default_timeout = 600
DEFAULTS = {
"allow_redirects": True,
"stream": True,
"verify": True,
"timeout": 600,
"zip": False,
"retry": {"times": 1, "sleep": 0}
}
_warning_sent = False
request_count = 0
@ -87,7 +95,7 @@ def request(method, url, zip=None, retry=None, **kwargs):
failures = []
for remaining, u in jx.countdown(url):
try:
response = request(method, u, zip=zip, retry=retry, **kwargs)
response = request(method, u, retry=retry, **kwargs)
if Math.round(response.status_code, decimal=-2) not in [400, 500]:
return response
if not remaining:
@ -106,16 +114,10 @@ def request(method, url, zip=None, retry=None, **kwargs):
session.headers.update(default_headers)
with closing(sess):
if zip is None:
zip = ZIP_REQUEST
if isinstance(url, text_type):
if PY2 and isinstance(url, text_type):
# httplib.py WILL **FREAK OUT** IF IT SEES ANY UNICODE
url = url.encode('ascii')
_to_ascii_dict(kwargs)
timeout = kwargs['timeout'] = coalesce(kwargs.get('timeout'), default_timeout)
if retry == None:
retry = Data(times=1, sleep=0)
elif isinstance(retry, Number):
@ -126,6 +128,9 @@ def request(method, url, zip=None, retry=None, **kwargs):
retry.sleep = retry.sleep.seconds
set_default(retry, {"times": 1, "sleep": 0})
_to_ascii_dict(kwargs)
set_default(kwargs, DEFAULTS)
if 'json' in kwargs:
kwargs['data'] = value2json(kwargs['json']).encode('utf8')
del kwargs['json']
@ -134,7 +139,7 @@ def request(method, url, zip=None, retry=None, **kwargs):
headers = kwargs['headers'] = unwrap(coalesce(kwargs.get('headers'), {}))
set_default(headers, {'Accept-Encoding': 'compress, gzip'})
if zip and len(coalesce(kwargs.get('data'))) > 1000:
if kwargs['zip'] and len(coalesce(kwargs.get('data'))) > 1000:
compressed = convert.bytes2zip(kwargs['data'])
headers['content-encoding'] = 'gzip'
kwargs['data'] = compressed
@ -154,12 +159,15 @@ def request(method, url, zip=None, retry=None, **kwargs):
if DEBUG:
Log.note(u"http {{method}} to {{url}}", method=method, url=url)
request_count += 1
del kwargs['retry']
del kwargs['zip']
return session.request(method=method, url=url, **kwargs)
except Exception as e:
errors.append(Except.wrap(e))
if " Read timed out." in errors[0]:
Log.error(u"Tried {{times}} times: Timeout failure (timeout was {{timeout}}", timeout=timeout, times=retry.times, cause=errors[0])
Log.error(u"Tried {{times}} times: Timeout failure (timeout was {{timeout}}", timeout=kwargs['timeout'], times=retry.times, cause=errors[0])
else:
Log.error(u"Tried {{times}} times: Request failure of {{url}}", url=url, times=retry.times, cause=errors[0])
@ -183,8 +191,6 @@ else:
def get(url, **kwargs):
kwargs.setdefault('allow_redirects', True)
kwargs.setdefault('stream', True)
return HttpResponse(request('get', url, **kwargs))
@ -204,23 +210,19 @@ def get_json(url, **kwargs):
def options(url, **kwargs):
kwargs.setdefault('allow_redirects', True)
kwargs.setdefault('stream', True)
return HttpResponse(request('options', url, **kwargs))
def head(url, **kwargs):
kwargs.setdefault('allow_redirects', False)
kwargs.setdefault('stream', True)
return HttpResponse(request('head', url, **kwargs))
def post(url, **kwargs):
kwargs.setdefault('stream', True)
return HttpResponse(request('post', url, **kwargs))
def delete(url, **kwargs):
kwargs.setdefault('stream', False)
return HttpResponse(request('delete', url, **kwargs))
@ -253,15 +255,9 @@ def put(url, **kwargs):
def patch(url, **kwargs):
kwargs.setdefault('stream', True)
return HttpResponse(request('patch', url, **kwargs))
def delete(url, **kwargs):
kwargs.setdefault('stream', False)
return HttpResponse(request('delete', url, **kwargs))
class HttpResponse(Response):
def __new__(cls, resp):
resp.__class__ = HttpResponse

14
vendor/pyLibrary/sql/mysql.py поставляемый
Просмотреть файл

@ -22,7 +22,7 @@ import mo_json
from jx_python import jx
from mo_dots import coalesce, wrap, listwrap, unwrap
from mo_files import File
from mo_future import text_type, utf8_json_encoder
from mo_future import text_type, utf8_json_encoder, binary_type
from mo_kwargs import override
from mo_logs import Log
from mo_logs.exceptions import Except, suppress_exception
@ -159,7 +159,15 @@ class MySQL(object):
self.transaction_level += 1
self.execute("SET TIME_ZONE='+00:00'")
if EXECUTE_TIMEOUT:
self.execute("SET MAX_EXECUTION_TIME=" + text_type(EXECUTE_TIMEOUT))
try:
self.execute("SET MAX_EXECUTION_TIME=" + text_type(EXECUTE_TIMEOUT))
self._execute_backlog()
except Exception as e:
e = Except.wrap(e)
if "Unknown system variable 'MAX_EXECUTION_TIME'" in e:
globals()['EXECUTE_TIMEOUT'] = 0 # THIS VERSION OF MYSQL DOES NOT HAVE SESSION LEVEL VARIABLE
else:
raise e
def close(self):
if self.transaction_level > 0:
@ -623,7 +631,7 @@ class MySQL(object):
def utf8_to_unicode(v):
try:
if isinstance(v, str):
if isinstance(v, binary_type):
return v.decode("utf8")
else:
return v

2
vendor/pyLibrary/sql/util.py поставляемый
Просмотреть файл

@ -13,7 +13,7 @@ from __future__ import division
from __future__ import unicode_literals
from mo_dots import wrap
from jx_python.jx_usingMySQL import esfilter2sqlwhere
from pyLibrary.queries.jx_usingMySQL import esfilter2sqlwhere
def find_holes(db, table_name, column_name, _range, filter=None):

4
vendor/pyLibrary/testing/elasticsearch.py поставляемый
Просмотреть файл

@ -60,10 +60,10 @@ class FakeES():
@override
def __init__(self, filename, host="fake", index="fake", kwargs=None):
self.settings = kwargs
self.filename = kwargs.filename
self.filename = filename
try:
self.data = mo_json.json2value(File(self.filename).read())
except Exception:
except Exception as e:
self.data = Data()
def search(self, query):