fixes for alias analysis using ES over not-verified ssl
This commit is contained in:
Родитель
59d5ddaf82
Коммит
9c792433d6
|
@ -15,9 +15,10 @@ import jx_elasticsearch
|
|||
from bzETL.extract_bugzilla import get_all_cc_changes
|
||||
from jx_python import jx
|
||||
from mo_collections.multiset import Multiset
|
||||
from mo_dots import coalesce, set_default
|
||||
from mo_dots import coalesce
|
||||
from mo_future import iteritems
|
||||
from mo_json import value2json
|
||||
from mo_logs import Log, startup
|
||||
from mo_logs import Log, startup, constants
|
||||
from pyLibrary.env import elasticsearch
|
||||
from pyLibrary.sql.mysql import MySQL
|
||||
|
||||
|
@ -44,11 +45,11 @@ def full_analysis(settings, bug_list=None, please_stop=None):
|
|||
return
|
||||
|
||||
with MySQL(kwargs=settings.bugzilla, readonly=True) as db:
|
||||
start = coalesce(settings.param.start, 0)
|
||||
end = coalesce(settings.param.end, db.query("SELECT max(bug_id)+1 bug_id FROM bugs")[0].bug_id)
|
||||
start = coalesce(settings.alias.start, 0)
|
||||
end = coalesce(settings.alias.end, db.query("SELECT max(bug_id)+1 bug_id FROM bugs")[0].bug_id)
|
||||
|
||||
#Perform analysis on blocks of bugs, in case we crash partway through
|
||||
for s, e in jx.intervals(start, end, settings.param.increment):
|
||||
for s, e in jx.reverse(jx.intervals(start, end, settings.alias.increment)):
|
||||
Log.note(
|
||||
"Load range {{start}}-{{end}}",
|
||||
start=s,
|
||||
|
@ -77,13 +78,14 @@ class AliasAnalyzer(object):
|
|||
|
||||
self.esq = jx_elasticsearch.new_instance(self.es.settings)
|
||||
result = self.esq.query({
|
||||
"from":"bug_aliases",
|
||||
"select":["canonical", "alias"],
|
||||
"from": "bug_aliases",
|
||||
"select": ["canonical", "alias"],
|
||||
"where": {"missing": "ignore"},
|
||||
"format": "list"
|
||||
"format": "list",
|
||||
"limit": 50000
|
||||
})
|
||||
for r in result.data:
|
||||
self.aliases[r.alias] = {"canonical":r["canonical"], "dirty":False}
|
||||
self.aliases[r.alias] = {"canonical": r.canonical, "dirty": False}
|
||||
|
||||
Log.note("{{num}} aliases loaded", num=len(self.aliases.keys()))
|
||||
|
||||
|
@ -95,7 +97,7 @@ class AliasAnalyzer(object):
|
|||
"format": "list"
|
||||
})
|
||||
for r in result.data:
|
||||
self.not_aliases[r.alias] = r["canonical"]
|
||||
self.not_aliases[r.alias] = r.canonical
|
||||
|
||||
except Exception as e:
|
||||
Log.error("Can not init aliases", cause=e)
|
||||
|
@ -124,15 +126,15 @@ class AliasAnalyzer(object):
|
|||
while try_again and not please_stop:
|
||||
#FIND EMAIL MOST NEEDING REPLACEMENT
|
||||
problem_agg = Multiset(allow_negative=True)
|
||||
for bug_id, agg in self.bugs.iteritems():
|
||||
for bug_id, agg in iteritems(self.bugs):
|
||||
#ONLY COUNT NEGATIVE EMAILS
|
||||
for email, count in agg.dic.iteritems():
|
||||
for email, count in iteritems(agg.dic):
|
||||
if count < 0:
|
||||
problem_agg.add(self.alias(email)["canonical"], amount=count)
|
||||
|
||||
problems = jx.sort([
|
||||
{"email": e, "count": c}
|
||||
for e, c in problem_agg.dic.iteritems()
|
||||
for e, c in iteritems(problem_agg.dic)
|
||||
if not self.not_aliases.get(e, None) and (c <= -(DIFF / 2) or last_run)
|
||||
], ["count", "email"])
|
||||
|
||||
|
@ -143,10 +145,10 @@ class AliasAnalyzer(object):
|
|||
|
||||
#FIND MOST LIKELY MATCH
|
||||
solution_agg = Multiset(allow_negative=True)
|
||||
for bug_id, agg in self.bugs.iteritems():
|
||||
for bug_id, agg in iteritems(self.bugs):
|
||||
if agg.dic.get(problem.email, 0) < 0: #ONLY BUGS THAT ARE EXPERIENCING THIS problem
|
||||
solution_agg += agg
|
||||
solutions = jx.sort([{"email": e, "count": c} for e, c in solution_agg.dic.iteritems()], [{"field": "count", "sort": -1}, "email"])
|
||||
solutions = jx.sort([{"email": e, "count": c} for e, c in iteritems(solution_agg.dic)], [{"field": "count", "sort": -1}, "email"])
|
||||
|
||||
if last_run and len(solutions) == 2 and solutions[0].count == -solutions[1].count:
|
||||
#exact match
|
||||
|
@ -188,13 +190,13 @@ class AliasAnalyzer(object):
|
|||
|
||||
|
||||
def add_alias(self, lost, found):
|
||||
new_canonical = self.alias(found)
|
||||
old_canonical = self.alias(lost)
|
||||
new_canonical = self.alias(found)
|
||||
|
||||
delete_list = []
|
||||
|
||||
#FOLD bugs ON lost=found
|
||||
for bug_id, agg in self.bugs.iteritems():
|
||||
for bug_id, agg in iteritems(self.bugs):
|
||||
v = agg.dic.get(lost, 0)
|
||||
if v != 0:
|
||||
agg.add(lost, -v)
|
||||
|
@ -205,7 +207,7 @@ class AliasAnalyzer(object):
|
|||
|
||||
#FOLD bugs ON old_canonical=new_canonical
|
||||
if old_canonical["canonical"] != lost:
|
||||
for bug_id, agg in self.bugs.iteritems():
|
||||
for bug_id, agg in iteritems(self.bugs):
|
||||
v = agg.dic.get(old_canonical["canonical"], 0)
|
||||
if v != 0:
|
||||
agg.add(old_canonical["canonical"], -v)
|
||||
|
@ -221,12 +223,11 @@ class AliasAnalyzer(object):
|
|||
reassign=[]
|
||||
for k, v in self.aliases.items():
|
||||
if v["canonical"] == old_canonical["canonical"]:
|
||||
Log.note(
|
||||
"ALIAS REMAPPED: {{alias}}->{{old}} to {{alias}}->{{new}}",
|
||||
alias= k,
|
||||
old= old_canonical["canonical"],
|
||||
new= found
|
||||
)
|
||||
if k == v["canonical"]:
|
||||
Log.note("ALIAS FOUND : {{alias}} -> {{new}}", alias=k, new=found)
|
||||
else:
|
||||
Log.note("ALIAS REMAPPED: {{alias}} -> {{old}} -> {{new}}", alias=k, old=v["canonical"], new=found)
|
||||
|
||||
reassign.append((k, found))
|
||||
|
||||
for k, found in reassign:
|
||||
|
@ -264,6 +265,7 @@ def split_email(value):
|
|||
def start():
|
||||
try:
|
||||
settings = startup.read_settings()
|
||||
constants.set(settings.constants)
|
||||
Log.start(settings.debug)
|
||||
full_analysis(settings)
|
||||
except Exception as e:
|
||||
|
@ -282,10 +284,6 @@ ALIAS_SCHEMA = {
|
|||
"enabled": False
|
||||
},
|
||||
"properties": {
|
||||
"canonical":{
|
||||
"type": "keyword",
|
||||
"store": True
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -30,7 +30,7 @@ from pyLibrary import convert
|
|||
from pyLibrary.env.elasticsearch import Cluster
|
||||
from pyLibrary.sql.mysql import MySQL
|
||||
|
||||
NUM_CONNECTIONS = 10
|
||||
NUM_CONNECTIONS = 4
|
||||
|
||||
db_cache_lock = Lock()
|
||||
db_cache = []
|
||||
|
|
|
@ -21,6 +21,8 @@ from pyLibrary.queries.jx_usingMySQL import esfilter2sqlwhere
|
|||
from pyLibrary.sql import SQL, SQL_ONE, SQL_NEG_ONE, sql_list, sql_iso
|
||||
|
||||
# USING THE TEXT DATETIME OF EPOCH THROWS A WARNING! USE ONE SECOND PAST EPOCH AS MINIMUM TIME.
|
||||
from pyLibrary.sql.mysql import int_list_packer
|
||||
|
||||
MIN_TIMESTAMP = 1000 # MILLISECONDS SINCE EPOCH
|
||||
|
||||
#ALL BUGS IN PRIVATE ETL HAVE SCREENED FIELDS
|
||||
|
@ -444,7 +446,7 @@ def get_all_cc_changes(db, bug_list):
|
|||
""", {
|
||||
"max_time": MAX_TIME,
|
||||
"cc_field_id": CC_FIELD_ID,
|
||||
"bug_filter": esfilter2sqlwhere(db, {"terms": {"bug_id": bug_list}})
|
||||
"bug_filter": esfilter2sqlwhere(db, int_list_packer("bug_id", bug_list))
|
||||
})
|
||||
|
||||
|
||||
|
|
|
@ -777,7 +777,7 @@ class BugHistoryParser(object):
|
|||
self.currActivity.changes.append({
|
||||
"field_name": field_name,
|
||||
"new_value": Null,
|
||||
"old_value": ", ".join(map(unicode, jx.sort(diff))),
|
||||
"old_value": ", ".join(map(text_type, jx.sort(diff))),
|
||||
"attach_id": target.attach_id
|
||||
})
|
||||
|
||||
|
@ -794,7 +794,7 @@ class BugHistoryParser(object):
|
|||
if valueType == "added" and remove:
|
||||
self.currActivity.changes.append({
|
||||
"field_name": field_name,
|
||||
"new_value": u", ".join(map(unicode, jx.sort(remove))),
|
||||
"new_value": u", ".join(map(text_type, jx.sort(remove))),
|
||||
"old_value": Null,
|
||||
"attach_id": target.attach_id
|
||||
})
|
||||
|
@ -897,7 +897,7 @@ class BugHistoryParser(object):
|
|||
if final_removed:
|
||||
self.currActivity.changes.append({
|
||||
"field_name": field_name,
|
||||
"new_value": u", ".join(map(unicode, jx.sort(final_removed))),
|
||||
"new_value": u", ".join(map(text_type, jx.sort(final_removed))),
|
||||
"old_value": Null,
|
||||
"attach_id": target.attach_id
|
||||
})
|
||||
|
@ -913,7 +913,7 @@ class BugHistoryParser(object):
|
|||
if valueType == "added" and removed:
|
||||
self.currActivity.changes.append({
|
||||
"field_name": field_name,
|
||||
"new_value": u", ".join(map(unicode, jx.sort(removed))),
|
||||
"new_value": u", ".join(map(text_type, jx.sort(removed))),
|
||||
"old_value": Null,
|
||||
"attach_id": target.attach_id
|
||||
})
|
||||
|
|
|
@ -1,37 +1,42 @@
|
|||
{
|
||||
"alias":{
|
||||
"start":0,
|
||||
"increment":5000000,
|
||||
"elasticsearch":{
|
||||
"host": "http://localhost",
|
||||
"index": "bug_aliases"
|
||||
},
|
||||
"file":{
|
||||
"path":"../schema/bugzilla_aliases.json",
|
||||
"$ref":"//~/private.json#alias_file"
|
||||
}
|
||||
},
|
||||
"bugzilla":{
|
||||
"preamble":"from https://github.com/klahnakoski/Bugzilla-ETL",
|
||||
"host":"localhost",
|
||||
"port":3307,
|
||||
"$ref":"//~/private.json#bugzilla-dev",
|
||||
"schema":"test_bugzilla",
|
||||
"debug":false
|
||||
"alias": {
|
||||
"start": 0,
|
||||
"increment": 10000,
|
||||
"elasticsearch": {
|
||||
"host": "http://localhost",
|
||||
"index": "bug_aliases"
|
||||
},
|
||||
"file": {
|
||||
"path": "../schema/bugzilla_aliases.json",
|
||||
"$ref": "//~/private.json#alias_file"
|
||||
}
|
||||
},
|
||||
"debug":{
|
||||
"log":[{
|
||||
"class": "logging.handlers.RotatingFileHandler",
|
||||
"filename": "./results/logs/alias_analysis.log",
|
||||
"maxBytes": 10000000,
|
||||
"backupCount": 200,
|
||||
"encoding": "utf8"
|
||||
},{
|
||||
"log_type": "stream",
|
||||
"stream":"sys.stdout"
|
||||
}]
|
||||
|
||||
"bugzilla": {
|
||||
"preamble": "from https://github.com/klahnakoski/Bugzilla-ETL",
|
||||
"host": "localhost",
|
||||
"port": 3307,
|
||||
"$ref": "//~/private.json#bugzilla-dev",
|
||||
"schema": "bugs",
|
||||
"debug": false
|
||||
},
|
||||
"constants":{
|
||||
"pyLibrary.sql.mysql.EXECUTE_TIMEOUT": 0,
|
||||
"pyLibrary.env.http.default_headers": {"Referer": "https://github.com/mozilla/Bugzilla-ETL"},
|
||||
"pyLibrary.env.http.DEFAULTS.verify": false
|
||||
},
|
||||
"debug": {
|
||||
"log": [
|
||||
{
|
||||
"class": "logging.handlers.RotatingFileHandler",
|
||||
"filename": "results/logs/alias_analysis.log",
|
||||
"maxBytes": 10000000,
|
||||
"backupCount": 200,
|
||||
"encoding": "utf8"
|
||||
},
|
||||
{
|
||||
"log_type": "stream",
|
||||
"stream": "sys.stdout"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -7,8 +7,8 @@
|
|||
"comment": "key is only meant to keep the aliases out of clear text. Aliases are public as per https://www.mozilla.org/en-US/privacy/policies/websites/",
|
||||
"key": ""
|
||||
},
|
||||
"first_run_time": "./results/data/first_run_time.txt",
|
||||
"last_run_time": "./results/data/last_run_time.txt",
|
||||
"first_run_time": "results/data/first_run_time.txt",
|
||||
"last_run_time": "results/data/last_run_time.txt",
|
||||
"look_back": 3600000,
|
||||
//hour
|
||||
//1hour
|
||||
|
@ -27,9 +27,9 @@
|
|||
}
|
||||
},
|
||||
"bugzilla": {
|
||||
"$ref": "//~/private.json#bugzilla-dev",
|
||||
"host": "localhost",
|
||||
"$ref": "file://~/private.json#bugzilla-dev",
|
||||
"preamble": "from https://github.com/klahnakoski/Bugzilla-ETL",
|
||||
"host": "localhost",
|
||||
"port": 3307,
|
||||
"schema": "bugs",
|
||||
"debug": true
|
||||
|
@ -63,9 +63,9 @@
|
|||
"log": [
|
||||
{
|
||||
"class": "logging.handlers.RotatingFileHandler",
|
||||
"filename": "./results/logs/bz_etl.log",
|
||||
"filename": "results/logs/bz_etl.log",
|
||||
"maxBytes": 10000000,
|
||||
"backupCount": 200,
|
||||
"backupCount": 10,
|
||||
"encoding": "utf8"
|
||||
},
|
||||
{
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
{
|
||||
"errors": "./results/data",
|
||||
"errors": "results/data",
|
||||
"source": {
|
||||
"filename": "C:/Users/klahnakoski/git/ES Extract/data/bug_versions.txt",
|
||||
"schema_filename": "../schema/bug_version.json"
|
||||
|
@ -15,7 +15,7 @@
|
|||
"log": [
|
||||
{
|
||||
"class": "logging.handlers.RotatingFileHandler",
|
||||
"filename": "./tests/results/logs/fileload.log",
|
||||
"filename": "tests/results/logs/fileload.log",
|
||||
"maxBytes": 10000000,
|
||||
"backupCount": 200,
|
||||
"encoding": "utf8"
|
||||
|
|
|
@ -48,7 +48,7 @@
|
|||
"log": [
|
||||
{
|
||||
"class": "logging.handlers.RotatingFileHandler",
|
||||
"filename": "./results/logs/leak_check.log",
|
||||
"filename": "results/logs/leak_check.log",
|
||||
"maxBytes": 10000000,
|
||||
"backupCount": 200,
|
||||
"encoding": "utf8"
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
"debug":{
|
||||
"log":[{
|
||||
"class": "logging.handlers.RotatingFileHandler",
|
||||
"filename": "./results/logs/replication.log",
|
||||
"filename": "results/logs/replication.log",
|
||||
"maxBytes": 10000000,
|
||||
"backupCount": 200,
|
||||
"encoding": "utf8"
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
"debug":{
|
||||
"log":[{
|
||||
"class": "logging.handlers.RotatingFileHandler",
|
||||
"filename": "./results/logs/replication.log",
|
||||
"filename": "results/logs/replication.log",
|
||||
"maxBytes": 10000000,
|
||||
"backupCount": 200,
|
||||
"encoding": "utf8"
|
||||
|
|
|
@ -1,36 +1,36 @@
|
|||
{
|
||||
"param": {
|
||||
"increment": 10000,
|
||||
"bugs": [
|
||||
// 384,
|
||||
// 1108,
|
||||
// 1045,
|
||||
// 1046,
|
||||
// 1157,
|
||||
// 1877,
|
||||
// 1865,
|
||||
// 1869,
|
||||
// 2586,
|
||||
// 3140,
|
||||
// 6810,
|
||||
// 9622,
|
||||
// 10575,
|
||||
"param": {
|
||||
"increment": 10000,
|
||||
"bugs": [
|
||||
// 384,
|
||||
// 1108,
|
||||
// 1045,
|
||||
// 1046,
|
||||
// 1157,
|
||||
// 1877,
|
||||
// 1865,
|
||||
// 1869,
|
||||
// 2586,
|
||||
// 3140,
|
||||
// 6810,
|
||||
// 9622,
|
||||
// 10575,
|
||||
11040
|
||||
// 12911,
|
||||
// 67742,
|
||||
// 96421,
|
||||
// 123203,
|
||||
// 178960,
|
||||
// 367518,
|
||||
// 457765,
|
||||
// 458397,
|
||||
// 471427,
|
||||
// 544327,
|
||||
// 547727,
|
||||
// 643420,
|
||||
// 692436,
|
||||
// 726635,
|
||||
// 813650
|
||||
// 12911,
|
||||
// 67742,
|
||||
// 96421,
|
||||
// 123203,
|
||||
// 178960,
|
||||
// 367518,
|
||||
// 457765,
|
||||
// 458397,
|
||||
// 471427,
|
||||
// 544327,
|
||||
// 547727,
|
||||
// 643420,
|
||||
// 692436,
|
||||
// 726635,
|
||||
// 813650
|
||||
// 1165765 VERY LONG short_desc
|
||||
// 1007019 does not have bug_status, or component, or product
|
||||
// 372836 (REVIEW FLAGS TEST)
|
||||
|
@ -41,63 +41,66 @@
|
|||
// 248970 another cutoff review request
|
||||
// 248971 another cutoff review request
|
||||
],
|
||||
"temp_dir": "./tests/resources",
|
||||
"errors": "./tests/results/errors",
|
||||
"allow_private_bugs": false,
|
||||
"last_run_time": "./tests/results/last_run_time.txt",
|
||||
"first_run_time": "./tests/results/first_run_time.txt",
|
||||
"look_back": 3600000 //1hour
|
||||
},
|
||||
"alias":{
|
||||
"temp_dir": "tests/resources",
|
||||
"errors": "tests/results/errors",
|
||||
"allow_private_bugs": false,
|
||||
"last_run_time": "tests/results/last_run_time.txt",
|
||||
"first_run_time": "tests/results/first_run_time.txt",
|
||||
"look_back": 3600000
|
||||
//1hour
|
||||
},
|
||||
"alias": {
|
||||
"increment": 1000000,
|
||||
"file": {
|
||||
"path": "../schema/bugzilla_aliases.json",
|
||||
"$ref": "file://~/private.json#alias_file"
|
||||
}
|
||||
},
|
||||
"elasticsearch": {
|
||||
"description": "pointer to es with test results",
|
||||
"filename": "./tests/results/test_results.json",
|
||||
"schema": {"$ref":"resources/schema/bug_version.json"},
|
||||
"host": "http://localhost",
|
||||
"port": 9200,
|
||||
"index": "test_bugs",
|
||||
"type": "bug_version"
|
||||
},
|
||||
"reference":{
|
||||
"filename": "./tests/resources/reference/public_bugs.json"
|
||||
"elasticsearch": {
|
||||
"description": "pointer to es with test results",
|
||||
"filename": "tests/results/test_results.json",
|
||||
"schema": {
|
||||
"$ref": "../../../resources/schema/bug_version.json"
|
||||
},
|
||||
"host": "http://localhost",
|
||||
"port": 9200,
|
||||
"index": "test_bugs",
|
||||
"type": "bug_version"
|
||||
},
|
||||
"bugzilla": {
|
||||
"filename": "./tests/resources/sql/small_bugzilla.sql",
|
||||
"preamble": "from https://github.com/klahnakoski/Bugzilla-ETL",
|
||||
"host": "localhost",
|
||||
"port": 3307,
|
||||
"$ref":"file://~/private.json#bugzilla-dev",
|
||||
"schema": "test_bugzilla",
|
||||
"expires_on": 1372867005000,
|
||||
"debug": true,
|
||||
"reference": {
|
||||
"filename": "tests/resources/reference/public_bugs.json"
|
||||
},
|
||||
"bugzilla": {
|
||||
"filename": "tests/resources/sql/small_bugzilla.sql",
|
||||
"preamble": "from https://github.com/klahnakoski/Bugzilla-ETL",
|
||||
"host": "localhost",
|
||||
"port": 3307,
|
||||
"$ref": "file://~/private.json#bugzilla-dev",
|
||||
"schema": "test_bugzilla",
|
||||
"expires_on": 1372867005000,
|
||||
"debug": true,
|
||||
"readonly": true
|
||||
},
|
||||
},
|
||||
"constants": {
|
||||
"pyLibrary.env.http.default_headers": {
|
||||
"Referer": "https://wiki.mozilla.org/BMO/ElasticSearch"
|
||||
}
|
||||
},
|
||||
"debug": {
|
||||
},
|
||||
"debug": {
|
||||
"trace": true,
|
||||
"profile":false,
|
||||
"log": [
|
||||
{
|
||||
"class": "logging.handlers.RotatingFileHandler",
|
||||
"filename": "./tests/results/logs/test_etl.log",
|
||||
"maxBytes": 10000000,
|
||||
"backupCount": 10,
|
||||
"encoding": "utf8"
|
||||
},
|
||||
{
|
||||
"log_type": "stream",
|
||||
"stream": "sys.stdout"
|
||||
}
|
||||
]
|
||||
}
|
||||
"profile": false,
|
||||
"log": [
|
||||
{
|
||||
"class": "logging.handlers.RotatingFileHandler",
|
||||
"filename": "tests/results/logs/test_etl.log",
|
||||
"maxBytes": 10000000,
|
||||
"backupCount": 10,
|
||||
"encoding": "utf8"
|
||||
},
|
||||
{
|
||||
"log_type": "stream",
|
||||
"stream": "sys.stdout"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
REM -N DO NOT START SHELL
|
||||
REM -v VERBOSE
|
||||
REM -L <local_port>:<distant_host>:<distant_port> <putty config>
|
||||
|
||||
plink -v -N -L 9200:vpc-vpc-etl-es-public-devsvcdev-debriixex5z7p3orlxtsbkvhoi.us-west-2.es.amazonaws.com:443 CloudOps-ETL
|
|
@ -1,5 +1,5 @@
|
|||
REM -N DO NOT START SHELL
|
||||
REM -v VERBOSE
|
||||
REM -L <local_port>:<distant_host>:<distant_port> <putty config for remote>
|
||||
REM -L <local_port>:<distant_host>:<distant_port> <putty config>
|
||||
|
||||
plink -v -N -L 3307:bugzilla-masterdb-devsvcdev-2017100901.czvlmp16hwe4.us-west-2.rds.amazonaws.com:3306 CloudOps-ETL
|
||||
plink -v -N -L 3307:bugzilla-masterdb-devsvcdev-2017100901.czvlmp16hwe4.us-west-2.rds.amazonaws.com:3306 CloudOps-ETL
|
|
@ -9,15 +9,15 @@
|
|||
},
|
||||
"public_bugs_reference": {
|
||||
"description": "pointer to es with known good *public* results",
|
||||
"filename": "./tests/resources/config/public_bugs_reference_es.json"
|
||||
"filename": "tests/resources/config/public_bugs_reference_es.json"
|
||||
},
|
||||
"public_comments_reference": {
|
||||
"description": "pointer to es with known good public comments",
|
||||
"filename": "./tests/resources/config/public_comments_reference_es.json"
|
||||
"filename": "tests/resources/config/public_comments_reference_es.json"
|
||||
},
|
||||
"private_bugs_reference": {
|
||||
"description": "pointer to es with known good results",
|
||||
"filename": "./tests/resources/config/private_bugs_reference_es.json"
|
||||
"filename": "tests/resources/config/private_bugs_reference_es.json"
|
||||
},
|
||||
"private_comments_reference": {
|
||||
"description": "pointer to es with known good private comments",
|
||||
|
@ -25,7 +25,7 @@
|
|||
},
|
||||
"candidate": {
|
||||
"description": "pointer to es with test results",
|
||||
"filename": "./tests/results/test_results.json",
|
||||
"filename": "tests/results/test_results.json",
|
||||
"host": "http://localhost",
|
||||
"port": 9200,
|
||||
"index": "test_bugs",
|
||||
|
@ -34,10 +34,10 @@
|
|||
"fake": {
|
||||
//FOR TESTING JSON CREATION, NO NEED FOR REAL ES
|
||||
"bugs": {
|
||||
"filename": "./tests/results/test_bugs.json"
|
||||
"filename": "tests/results/test_bugs.json"
|
||||
},
|
||||
"comments": {
|
||||
"filename": "./tests/results/test_comments.json"
|
||||
"filename": "tests/results/test_comments.json"
|
||||
}
|
||||
},
|
||||
"real": {
|
||||
|
@ -100,11 +100,11 @@
|
|||
// 937428 whitespace after comma in user story, complex diff
|
||||
// 248970 another cutoff review request
|
||||
],
|
||||
"temp_dir": "./tests/resources",
|
||||
"errors": "./tests/results/errors",
|
||||
"temp_dir": "tests/resources",
|
||||
"errors": "tests/results/errors",
|
||||
"allow_private_bugs": true,
|
||||
"last_run_time": "./tests/results/last_run_time.txt",
|
||||
"first_run_time": "./tests/results/first_run_time.txt",
|
||||
"last_run_time": "tests/results/last_run_time.txt",
|
||||
"first_run_time": "tests/results/first_run_time.txt",
|
||||
"look_back": 3600000 //1hour
|
||||
},
|
||||
"alias": {
|
||||
|
@ -123,7 +123,7 @@
|
|||
}
|
||||
},
|
||||
"bugzilla": {
|
||||
"filename": "./tests/resources/sql/small_bugzilla.sql",
|
||||
"filename": "tests/resources/sql/small_bugzilla.sql",
|
||||
"preamble": "from https://github.com/klahnakoski/Bugzilla-ETL",
|
||||
"host": "localhost",
|
||||
"port": 3307,
|
||||
|
@ -151,7 +151,7 @@
|
|||
"log": [
|
||||
{
|
||||
"class": "logging.handlers.RotatingFileHandler",
|
||||
"filename": "./tests/results/logs/test_etl.log",
|
||||
"filename": "tests/results/logs/test_etl.log",
|
||||
"maxBytes": 10000000,
|
||||
"backupCount": 200,
|
||||
"encoding": "utf8"
|
|
@ -0,0 +1,104 @@
|
|||
{
|
||||
"param": {
|
||||
"increment": 10000,
|
||||
"bugs": [
|
||||
// 384,
|
||||
// 1108,
|
||||
// 1045,
|
||||
// 1046,
|
||||
// 1157,
|
||||
// 1877,
|
||||
// 1865,
|
||||
// 1869,
|
||||
// 2586,
|
||||
// 3140,
|
||||
// 6810,
|
||||
// 9622,
|
||||
// 10575,
|
||||
11040
|
||||
// 12911,
|
||||
// 67742,
|
||||
// 96421,
|
||||
// 123203,
|
||||
// 178960,
|
||||
// 367518,
|
||||
// 457765,
|
||||
// 458397,
|
||||
// 471427,
|
||||
// 544327,
|
||||
// 547727,
|
||||
// 643420,
|
||||
// 692436,
|
||||
// 726635,
|
||||
// 813650
|
||||
// 1165765 VERY LONG short_desc
|
||||
// 1007019 does not have bug_status, or component, or product
|
||||
// 372836 (REVIEW FLAGS TEST)
|
||||
// 13534 (REVIEW MOVES TO OTHER PERSON)
|
||||
// 393845 added blocking1.9+ twice
|
||||
// 671185 *many* review requests
|
||||
// 937428 whitespace after comma in user story, complex diff
|
||||
// 248970 another cutoff review request
|
||||
// 248971 another cutoff review request
|
||||
],
|
||||
"temp_dir": "tests/resources",
|
||||
"errors": "tests/results/errors",
|
||||
"allow_private_bugs": false,
|
||||
"last_run_time": "tests/results/last_run_time.txt",
|
||||
"first_run_time": "tests/results/first_run_time.txt",
|
||||
"look_back": 3600000
|
||||
//1hour
|
||||
},
|
||||
"alias": {
|
||||
"increment": 1000000,
|
||||
"file": {
|
||||
"path": "resources/schema/bugzilla_aliases.json",
|
||||
"$ref": "file://~/private.json#alias_file"
|
||||
}
|
||||
},
|
||||
"reference": {
|
||||
"filename": "tests/resources/reference/public_bugs.json"
|
||||
},
|
||||
"bugzilla": {
|
||||
"$ref": "file://~/private.json#bugzilla-dev",
|
||||
"preamble": "from https://github.com/klahnakoski/Bugzilla-ETL",
|
||||
"host": "localhost",
|
||||
"port": 3307,
|
||||
"schema": "bugs",
|
||||
"expires_on": 1372867005000,
|
||||
"debug": true,
|
||||
"readonly": true
|
||||
},
|
||||
"elasticsearch": {
|
||||
"host": "http://localhost",
|
||||
"port": 9200,
|
||||
"schema": {
|
||||
"$ref": "../../../resources/schema/bug_version.json"
|
||||
},
|
||||
"index": "test_bugs",
|
||||
"type": "bug_version",
|
||||
"filename": "tests/results/test_results.json",
|
||||
},
|
||||
"constants": {
|
||||
"pyLibrary.env.http.default_headers": {
|
||||
"Referer": "https://wiki.mozilla.org/BMO/ElasticSearch"
|
||||
}
|
||||
},
|
||||
"debug": {
|
||||
"trace": true,
|
||||
"profile": false,
|
||||
"log": [
|
||||
{
|
||||
"class": "logging.handlers.RotatingFileHandler",
|
||||
"filename": "tests/results/logs/test_etl.log",
|
||||
"maxBytes": 10000000,
|
||||
"backupCount": 10,
|
||||
"encoding": "utf8"
|
||||
},
|
||||
{
|
||||
"log_type": "stream",
|
||||
"stream": "sys.stdout"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
|
@ -1,6 +1,6 @@
|
|||
|
||||
Mozilla's Bugzilla instance stores time in local DST-adjusted Pacific Time.
|
||||
The ETL script converts all times to GMT for ease of comparison. If you plan to
|
||||
The ETL script converts all times to GMT for ease of comparison. If you plan to
|
||||
run tests or generate your own ES instance from a MySQL database
|
||||
you will need to install the timezone database to perform this conversion.
|
||||
|
||||
|
@ -28,4 +28,4 @@ Linux without zoneinfo database
|
|||
* ```mysql_tzinfo_to_sql <directory> | mysql -u root mysql```
|
||||
|
||||
|
||||
([more Linux instructions](http://dev.mysql.com/doc/refman/4.1/en/mysql-tzinfo-to-sql.html))
|
||||
([more Linux instructions](http://dev.mysql.com/doc/refman/4.1/en/mysql-tzinfo-to-sql.html))
|
||||
|
|
|
@ -17,7 +17,7 @@ from datetime import datetime
|
|||
from mo_future import text_type
|
||||
|
||||
from bzETL import extract_bugzilla, bz_etl
|
||||
from bzETL.bz_etl import etl, esfilter2sqlwhere, MIN_TIMESTAMP
|
||||
from bzETL.bz_etl import etl, MIN_TIMESTAMP
|
||||
from bzETL.extract_bugzilla import get_current_time, SCREENED_WHITEBOARD_BUG_GROUPS
|
||||
from jx_python import jx
|
||||
from mo_dots import Data, Null, wrap
|
||||
|
@ -29,6 +29,7 @@ from mo_math.randoms import Random
|
|||
from mo_threads import ThreadedQueue, Till
|
||||
from mo_times import Timer
|
||||
from pyLibrary import convert
|
||||
from pyLibrary.queries.jx_usingMySQL import esfilter2sqlwhere
|
||||
from pyLibrary.sql.mysql import all_db, MySQL
|
||||
from pyLibrary.testing import elasticsearch
|
||||
from util import database, compare_es
|
||||
|
@ -618,7 +619,7 @@ def compare_both(candidate, reference, settings, some_bugs):
|
|||
ref_versions = \
|
||||
jx.sort(
|
||||
#ADDED TO FIX OLD PRODUCTION BUG VERSIONS
|
||||
[compare_es.old2new(x, settings.bugzilla.expires_on) for x in pre_ref_versions],
|
||||
[compare_es.old2new(x, settings.bugzilla.expires_on) for x in pre_ref_versions],
|
||||
"modified_ts"
|
||||
)
|
||||
|
||||
|
|
|
@ -32,7 +32,7 @@ class TestOneETL(unittest.TestCase):
|
|||
I USE THIS TO IDENTIFY CANDIDATES TO ADD TO THE TEST SUITE
|
||||
"""
|
||||
def setUp(self):
|
||||
self.settings = startup.read_settings(filename="test_one_settings.json")
|
||||
self.settings = startup.read_settings(filename="tests/resources/config/test_one.json")
|
||||
constants.set(self.settings.constants)
|
||||
Log.start(self.settings.debug)
|
||||
|
||||
|
@ -81,7 +81,7 @@ class TestOneETL(unittest.TestCase):
|
|||
# "port": 9200,
|
||||
# "index": ElasticSearch.proto_name("test_public_bugs"),
|
||||
# "type": "bug_version",
|
||||
# "schema_file": "./resources/json/bug_version.json"
|
||||
# "schema_file": "resources/json/bug_version.json"
|
||||
# })
|
||||
# es = ElasticSearch.create_index(es_settings, File(es_settings.schema_file).read())
|
||||
# es.delete_all_but("test_public_bugs", es_settings.index)
|
||||
|
|
|
@ -56,7 +56,7 @@ return output.toString()
|
|||
|
||||
|
||||
class Painless(Expression):
|
||||
__slots__ = ("miss", "type", "expr", "many")
|
||||
__slots__ = ("miss", "data_type", "expr", "many")
|
||||
|
||||
def __init__(self, type, expr, frum, miss=None, many=False):
|
||||
self.miss = coalesce(miss, FALSE) # Expression that will return true/false to indicate missing result
|
||||
|
|
|
@ -591,11 +591,15 @@ def value_compare(left, right, ordering=1):
|
|||
|
||||
ltype = type(left)
|
||||
rtype = type(right)
|
||||
type_diff = TYPE_ORDER.get(ltype, 10) - TYPE_ORDER.get(rtype, 10)
|
||||
ltype_num = TYPE_ORDER.get(ltype, 10)
|
||||
rtype_num = TYPE_ORDER.get(rtype, 10)
|
||||
type_diff = ltype_num - rtype_num
|
||||
if type_diff != 0:
|
||||
return ordering if type_diff > 0 else -ordering
|
||||
|
||||
if ltype is builtin_tuple:
|
||||
if ltype_num == 9:
|
||||
return 0
|
||||
elif ltype is builtin_tuple:
|
||||
for a, b in zip(left, right):
|
||||
c = value_compare(a, b)
|
||||
if c != 0:
|
||||
|
@ -1084,6 +1088,8 @@ def accumulate(vals):
|
|||
|
||||
def reverse(vals):
|
||||
# TODO: Test how to do this fastest
|
||||
if not hasattr(vals, "len"):
|
||||
vals = list(vals)
|
||||
l = len(vals)
|
||||
output = [None] * l
|
||||
|
||||
|
|
|
@ -51,7 +51,7 @@ class Multiset(object):
|
|||
class _Multiset(Multiset):
|
||||
|
||||
def __new__(cls, *args):
|
||||
return object.__new__(cls, *args)
|
||||
return object.__new__(cls)
|
||||
|
||||
|
||||
def __init__(self, list=None, key_field=None, count_field=None, **kwargs):
|
||||
|
@ -150,7 +150,7 @@ class _Multiset(Multiset):
|
|||
|
||||
class _NegMultiset(Multiset):
|
||||
def __new__(cls, *args, **kwargs):
|
||||
return object.__new__(cls, *args, **kwargs)
|
||||
return object.__new__(cls)
|
||||
|
||||
def __init__(self, list=None, key_field=None, count_field=None, **kwargs):
|
||||
if not key_field and not count_field:
|
||||
|
|
|
@ -16,7 +16,7 @@ from collections import Mapping, Iterable, Set
|
|||
|
||||
from mo_dots import unwrap, tuplewrap, wrap
|
||||
from mo_dots.objects import datawrap
|
||||
from mo_future import PY2
|
||||
from mo_future import PY2, iteritems
|
||||
from mo_logs import Log
|
||||
from mo_logs.exceptions import suppress_exception
|
||||
|
||||
|
@ -73,7 +73,7 @@ class UniqueIndex(Set, Mapping):
|
|||
return self._data.keys()
|
||||
|
||||
def pop(self):
|
||||
output = self._data.iteritems().next()[1]
|
||||
output = iteritems(self._data).next()[1]
|
||||
self.remove(output)
|
||||
return wrap(output)
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@ from collections import MutableMapping, Mapping
|
|||
from copy import deepcopy
|
||||
|
||||
from mo_dots import _getdefault, hash_value, literal_field, coalesce, listwrap, get_logger
|
||||
from mo_future import text_type
|
||||
from mo_future import text_type, PY2
|
||||
|
||||
_get = object.__getattribute__
|
||||
_set = object.__setattr__
|
||||
|
@ -210,7 +210,7 @@ class Data(MutableMapping):
|
|||
def iteritems(self):
|
||||
# LOW LEVEL ITERATION, NO WRAPPING
|
||||
d = _get(self, "_dict")
|
||||
return ((k, wrap(v)) for k, v in d.iteritems())
|
||||
return ((k, wrap(v)) for k, v in iteritems(d))
|
||||
|
||||
def keys(self):
|
||||
d = _get(self, "_dict")
|
||||
|
@ -429,9 +429,15 @@ class _DictUsingSelf(dict):
|
|||
output.append((prefix + literal_field(k), v))
|
||||
return output
|
||||
|
||||
def iteritems(self):
|
||||
for k, v in dict.iteritems(self):
|
||||
yield k, wrap(v)
|
||||
if PY2:
|
||||
def iteritems(self):
|
||||
for k, v in dict.iteritems(self):
|
||||
yield k, wrap(v)
|
||||
else:
|
||||
def iteritems(self):
|
||||
for k, v in dict.items(self):
|
||||
yield k, wrap(v)
|
||||
|
||||
|
||||
def keys(self):
|
||||
return set(dict.keys(self))
|
||||
|
|
|
@ -48,6 +48,9 @@ if PY3:
|
|||
from io import BytesIO
|
||||
from _thread import allocate_lock, get_ident, start_new_thread, interrupt_main
|
||||
|
||||
def iteritems(d):
|
||||
return d.items()
|
||||
|
||||
def transpose(*args):
|
||||
return list(zip(*args))
|
||||
|
||||
|
@ -107,6 +110,9 @@ else:
|
|||
from io import BytesIO
|
||||
from thread import allocate_lock, get_ident, start_new_thread, interrupt_main
|
||||
|
||||
def iteritems(d):
|
||||
return d.iteritems()
|
||||
|
||||
def get_function_name(func):
|
||||
return func.func_name
|
||||
|
||||
|
|
|
@ -19,7 +19,7 @@ from datetime import datetime
|
|||
import sys
|
||||
|
||||
from mo_dots import coalesce, listwrap, wrap, unwrap, unwraplist, set_default, FlatList
|
||||
from mo_future import text_type, PY3
|
||||
from mo_future import text_type, PY3, iteritems
|
||||
from mo_logs import constants
|
||||
from mo_logs.exceptions import Except, suppress_exception
|
||||
from mo_logs.strings import indent
|
||||
|
@ -469,7 +469,7 @@ def write_profile(profile_settings, stats):
|
|||
"line": f[1],
|
||||
"method": f[2].lstrip("<").rstrip(">")
|
||||
}
|
||||
for f, d, in acc.stats.iteritems()
|
||||
for f, d, in iteritems(acc.stats)
|
||||
]
|
||||
stats_file = File(profile_settings.filename, suffix=convert.datetime2string(datetime.now(), "_%Y%m%d_%H%M%S"))
|
||||
stats_file.write(convert.list2tab(stats))
|
||||
|
|
|
@ -102,7 +102,6 @@ class Except(Exception):
|
|||
with suppress_exception:
|
||||
cause_strings.append(text_type(c))
|
||||
|
||||
|
||||
output += "caused by\n\t" + "and caused by\n\t".join(cause_strings)
|
||||
|
||||
return output
|
||||
|
|
|
@ -84,7 +84,7 @@ def make_log_from_settings(settings):
|
|||
path = ".".join(path[:-1])
|
||||
constructor = None
|
||||
try:
|
||||
temp = __import__(path, globals(), locals(), [class_name], -1)
|
||||
temp = __import__(path, globals(), locals(), [class_name], 0)
|
||||
constructor = object.__getattribute__(temp, class_name)
|
||||
except Exception as e:
|
||||
if settings.stream and not constructor:
|
||||
|
|
|
@ -16,7 +16,7 @@ from __future__ import unicode_literals
|
|||
import sys
|
||||
from time import time
|
||||
|
||||
from mo_future import text_type
|
||||
from mo_future import text_type, PY2
|
||||
from mo_logs import Log
|
||||
from mo_logs.log_usingNothing import StructuredLogger
|
||||
from mo_logs.strings import expand_template
|
||||
|
@ -45,7 +45,7 @@ class StructuredLogger_usingThreadedStream(StructuredLogger):
|
|||
# WRITE TO STREAMS CAN BE *REALLY* SLOW, WE WILL USE A THREAD
|
||||
from mo_threads import Queue
|
||||
|
||||
if use_UTF8:
|
||||
if use_UTF8 and PY2:
|
||||
def utf8_appender(value):
|
||||
if isinstance(value, text_type):
|
||||
value = value.encode('utf8')
|
||||
|
|
|
@ -83,7 +83,7 @@ def read_settings(filename=None, defs=None, env_filename=None):
|
|||
"help": "path to JSON file with settings",
|
||||
"type": str,
|
||||
"dest": "filename",
|
||||
"default": "./config.json",
|
||||
"default": "config.json",
|
||||
"required": False
|
||||
})
|
||||
args = argparse(defs)
|
||||
|
|
|
@ -22,6 +22,8 @@ from datetime import datetime as builtin_datetime
|
|||
from datetime import timedelta, date
|
||||
from json.encoder import encode_basestring
|
||||
|
||||
import sys
|
||||
|
||||
from mo_dots import coalesce, wrap, get_module
|
||||
from mo_future import text_type, xrange, binary_type, round as _round, PY3, get_function_name
|
||||
from mo_logs.convert import datetime2unix, datetime2string, value2json, milli2datetime, unix2datetime
|
||||
|
@ -512,17 +514,26 @@ THE REST OF THIS FILE IS TEMPLATE EXPANSION CODE USED BY mo-logs
|
|||
"""
|
||||
|
||||
|
||||
recursive_counter = 0
|
||||
def expand_template(template, value):
|
||||
"""
|
||||
:param template: A UNICODE STRING WITH VARIABLE NAMES IN MOUSTACHES `{{}}`
|
||||
:param value: Data HOLDING THE PARAMTER VALUES
|
||||
:return: UNICODE STRING WITH VARIABLES EXPANDED
|
||||
"""
|
||||
value = wrap(value)
|
||||
if isinstance(template, text_type):
|
||||
return _simple_expand(template, (value,))
|
||||
global recursive_counter
|
||||
if recursive_counter>10:
|
||||
sys.stderr.write("problem with template expansion")
|
||||
return
|
||||
recursive_counter +=1
|
||||
try:
|
||||
value = wrap(value)
|
||||
if isinstance(template, text_type):
|
||||
return _simple_expand(template, (value,))
|
||||
|
||||
return _expand(template, (value,))
|
||||
return _expand(template, (value,))
|
||||
finally:
|
||||
recursive_counter -=1
|
||||
|
||||
|
||||
def common_prefix(*args):
|
||||
|
|
|
@ -36,7 +36,7 @@ selecting only the properties it requires.
|
|||
"type" : "test_result",
|
||||
"debug" : true,
|
||||
"limit_replicas" : true,
|
||||
"schema_file" : "./resources/schema/test_schema.json"
|
||||
"schema_file" : "resources/schema/test_schema.json"
|
||||
},
|
||||
|
||||
|
||||
|
|
|
@ -93,11 +93,9 @@ class Index(Features):
|
|||
Log.error("not allowed")
|
||||
if type == None:
|
||||
# NO type PROVIDED, MAYBE THERE IS A SUITABLE DEFAULT?
|
||||
with self.cluster.metadata_locker:
|
||||
index_ = self.cluster._metadata.indices[self.settings.index]
|
||||
index_ = self.cluster.get_metadata().indices[self.settings.index]
|
||||
if not index_:
|
||||
indices = self.cluster.get_metadata().indices
|
||||
index_ = indices[self.settings.index]
|
||||
Log.error("can not find index {{index}}", index=self.settings.index)
|
||||
|
||||
candidate_types = list(index_.mappings.keys())
|
||||
if len(candidate_types) != 1:
|
||||
|
|
|
@ -45,11 +45,19 @@ DEBUG = False
|
|||
FILE_SIZE_LIMIT = 100 * 1024 * 1024
|
||||
MIN_READ_SIZE = 8 * 1024
|
||||
ZIP_REQUEST = False
|
||||
|
||||
default_headers = Data() # TODO: MAKE THIS VARIABLE A SPECIAL TYPE OF EXPECTED MODULE PARAMETER SO IT COMPLAINS IF NOT SET
|
||||
default_timeout = 600
|
||||
DEFAULTS = {
|
||||
"allow_redirects": True,
|
||||
"stream": True,
|
||||
"verify": True,
|
||||
"timeout": 600,
|
||||
"zip": False,
|
||||
"retry": {"times": 1, "sleep": 0}
|
||||
}
|
||||
|
||||
_warning_sent = False
|
||||
|
||||
request_count = 0
|
||||
|
||||
|
||||
|
@ -87,7 +95,7 @@ def request(method, url, zip=None, retry=None, **kwargs):
|
|||
failures = []
|
||||
for remaining, u in jx.countdown(url):
|
||||
try:
|
||||
response = request(method, u, zip=zip, retry=retry, **kwargs)
|
||||
response = request(method, u, retry=retry, **kwargs)
|
||||
if Math.round(response.status_code, decimal=-2) not in [400, 500]:
|
||||
return response
|
||||
if not remaining:
|
||||
|
@ -106,16 +114,10 @@ def request(method, url, zip=None, retry=None, **kwargs):
|
|||
session.headers.update(default_headers)
|
||||
|
||||
with closing(sess):
|
||||
if zip is None:
|
||||
zip = ZIP_REQUEST
|
||||
|
||||
if isinstance(url, text_type):
|
||||
if PY2 and isinstance(url, text_type):
|
||||
# httplib.py WILL **FREAK OUT** IF IT SEES ANY UNICODE
|
||||
url = url.encode('ascii')
|
||||
|
||||
_to_ascii_dict(kwargs)
|
||||
timeout = kwargs['timeout'] = coalesce(kwargs.get('timeout'), default_timeout)
|
||||
|
||||
if retry == None:
|
||||
retry = Data(times=1, sleep=0)
|
||||
elif isinstance(retry, Number):
|
||||
|
@ -126,6 +128,9 @@ def request(method, url, zip=None, retry=None, **kwargs):
|
|||
retry.sleep = retry.sleep.seconds
|
||||
set_default(retry, {"times": 1, "sleep": 0})
|
||||
|
||||
_to_ascii_dict(kwargs)
|
||||
set_default(kwargs, DEFAULTS)
|
||||
|
||||
if 'json' in kwargs:
|
||||
kwargs['data'] = value2json(kwargs['json']).encode('utf8')
|
||||
del kwargs['json']
|
||||
|
@ -134,7 +139,7 @@ def request(method, url, zip=None, retry=None, **kwargs):
|
|||
headers = kwargs['headers'] = unwrap(coalesce(kwargs.get('headers'), {}))
|
||||
set_default(headers, {'Accept-Encoding': 'compress, gzip'})
|
||||
|
||||
if zip and len(coalesce(kwargs.get('data'))) > 1000:
|
||||
if kwargs['zip'] and len(coalesce(kwargs.get('data'))) > 1000:
|
||||
compressed = convert.bytes2zip(kwargs['data'])
|
||||
headers['content-encoding'] = 'gzip'
|
||||
kwargs['data'] = compressed
|
||||
|
@ -154,12 +159,15 @@ def request(method, url, zip=None, retry=None, **kwargs):
|
|||
if DEBUG:
|
||||
Log.note(u"http {{method}} to {{url}}", method=method, url=url)
|
||||
request_count += 1
|
||||
|
||||
del kwargs['retry']
|
||||
del kwargs['zip']
|
||||
return session.request(method=method, url=url, **kwargs)
|
||||
except Exception as e:
|
||||
errors.append(Except.wrap(e))
|
||||
|
||||
if " Read timed out." in errors[0]:
|
||||
Log.error(u"Tried {{times}} times: Timeout failure (timeout was {{timeout}}", timeout=timeout, times=retry.times, cause=errors[0])
|
||||
Log.error(u"Tried {{times}} times: Timeout failure (timeout was {{timeout}}", timeout=kwargs['timeout'], times=retry.times, cause=errors[0])
|
||||
else:
|
||||
Log.error(u"Tried {{times}} times: Request failure of {{url}}", url=url, times=retry.times, cause=errors[0])
|
||||
|
||||
|
@ -183,8 +191,6 @@ else:
|
|||
|
||||
|
||||
def get(url, **kwargs):
|
||||
kwargs.setdefault('allow_redirects', True)
|
||||
kwargs.setdefault('stream', True)
|
||||
return HttpResponse(request('get', url, **kwargs))
|
||||
|
||||
|
||||
|
@ -204,23 +210,19 @@ def get_json(url, **kwargs):
|
|||
|
||||
|
||||
def options(url, **kwargs):
|
||||
kwargs.setdefault('allow_redirects', True)
|
||||
kwargs.setdefault('stream', True)
|
||||
return HttpResponse(request('options', url, **kwargs))
|
||||
|
||||
|
||||
def head(url, **kwargs):
|
||||
kwargs.setdefault('allow_redirects', False)
|
||||
kwargs.setdefault('stream', True)
|
||||
return HttpResponse(request('head', url, **kwargs))
|
||||
|
||||
|
||||
def post(url, **kwargs):
|
||||
kwargs.setdefault('stream', True)
|
||||
return HttpResponse(request('post', url, **kwargs))
|
||||
|
||||
|
||||
def delete(url, **kwargs):
|
||||
kwargs.setdefault('stream', False)
|
||||
return HttpResponse(request('delete', url, **kwargs))
|
||||
|
||||
|
||||
|
@ -253,15 +255,9 @@ def put(url, **kwargs):
|
|||
|
||||
|
||||
def patch(url, **kwargs):
|
||||
kwargs.setdefault('stream', True)
|
||||
return HttpResponse(request('patch', url, **kwargs))
|
||||
|
||||
|
||||
def delete(url, **kwargs):
|
||||
kwargs.setdefault('stream', False)
|
||||
return HttpResponse(request('delete', url, **kwargs))
|
||||
|
||||
|
||||
class HttpResponse(Response):
|
||||
def __new__(cls, resp):
|
||||
resp.__class__ = HttpResponse
|
||||
|
|
|
@ -22,7 +22,7 @@ import mo_json
|
|||
from jx_python import jx
|
||||
from mo_dots import coalesce, wrap, listwrap, unwrap
|
||||
from mo_files import File
|
||||
from mo_future import text_type, utf8_json_encoder
|
||||
from mo_future import text_type, utf8_json_encoder, binary_type
|
||||
from mo_kwargs import override
|
||||
from mo_logs import Log
|
||||
from mo_logs.exceptions import Except, suppress_exception
|
||||
|
@ -159,7 +159,15 @@ class MySQL(object):
|
|||
self.transaction_level += 1
|
||||
self.execute("SET TIME_ZONE='+00:00'")
|
||||
if EXECUTE_TIMEOUT:
|
||||
self.execute("SET MAX_EXECUTION_TIME=" + text_type(EXECUTE_TIMEOUT))
|
||||
try:
|
||||
self.execute("SET MAX_EXECUTION_TIME=" + text_type(EXECUTE_TIMEOUT))
|
||||
self._execute_backlog()
|
||||
except Exception as e:
|
||||
e = Except.wrap(e)
|
||||
if "Unknown system variable 'MAX_EXECUTION_TIME'" in e:
|
||||
globals()['EXECUTE_TIMEOUT'] = 0 # THIS VERSION OF MYSQL DOES NOT HAVE SESSION LEVEL VARIABLE
|
||||
else:
|
||||
raise e
|
||||
|
||||
def close(self):
|
||||
if self.transaction_level > 0:
|
||||
|
@ -623,7 +631,7 @@ class MySQL(object):
|
|||
|
||||
def utf8_to_unicode(v):
|
||||
try:
|
||||
if isinstance(v, str):
|
||||
if isinstance(v, binary_type):
|
||||
return v.decode("utf8")
|
||||
else:
|
||||
return v
|
||||
|
|
|
@ -13,7 +13,7 @@ from __future__ import division
|
|||
from __future__ import unicode_literals
|
||||
|
||||
from mo_dots import wrap
|
||||
from jx_python.jx_usingMySQL import esfilter2sqlwhere
|
||||
from pyLibrary.queries.jx_usingMySQL import esfilter2sqlwhere
|
||||
|
||||
|
||||
def find_holes(db, table_name, column_name, _range, filter=None):
|
||||
|
|
|
@ -60,10 +60,10 @@ class FakeES():
|
|||
@override
|
||||
def __init__(self, filename, host="fake", index="fake", kwargs=None):
|
||||
self.settings = kwargs
|
||||
self.filename = kwargs.filename
|
||||
self.filename = filename
|
||||
try:
|
||||
self.data = mo_json.json2value(File(self.filename).read())
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
self.data = Data()
|
||||
|
||||
def search(self, query):
|
||||
|
|
Загрузка…
Ссылка в новой задаче