This commit is contained in:
Kyle Lahnakoski 2018-08-17 10:57:21 -04:00
Родитель 404b3177f8
Коммит bacb028321
6 изменённых файлов: 23 добавлений и 24 удалений

Просмотреть файл

@ -182,6 +182,9 @@ def incremental_etl(param, db, esq, esq_comments, bug_output_queue, comment_outp
# REMOVE PRIVATE BUGS # REMOVE PRIVATE BUGS
private_bugs = get_private_bugs_for_delete(db, param) private_bugs = get_private_bugs_for_delete(db, param)
alias_analyzer = AliasAnalyzer(kwargs.alias)
Log.note("Ensure the following private bugs are deleted:\n{{private_bugs|indent}}", private_bugs=sorted(private_bugs)) Log.note("Ensure the following private bugs are deleted:\n{{private_bugs|indent}}", private_bugs=sorted(private_bugs))
for g, delete_bugs in jx.groupby(private_bugs, size=1000): for g, delete_bugs in jx.groupby(private_bugs, size=1000):
still_existing = get_bug_ids(esq, {"terms": {"bug_id": delete_bugs}}) still_existing = get_bug_ids(esq, {"terms": {"bug_id": delete_bugs}})
@ -216,8 +219,7 @@ def incremental_etl(param, db, esq, esq_comments, bug_output_queue, comment_outp
refresh_param.start_time_str = extract_bugzilla.milli2string(db, MIN_TIMESTAMP) refresh_param.start_time_str = extract_bugzilla.milli2string(db, MIN_TIMESTAMP)
try: try:
analyzer = AliasAnalyzer(kwargs.alias) etl(db, bug_output_queue, refresh_param.copy(), alias_analyzer, please_stop=None)
etl(db, bug_output_queue, refresh_param.copy(), analyzer, please_stop=None)
etl_comments(db, esq_comments.es, refresh_param.copy(), please_stop=None) etl_comments(db, esq_comments.es, refresh_param.copy(), please_stop=None)
except Exception as e: except Exception as e:
Log.error( Log.error(
@ -277,13 +279,14 @@ def incremental_etl(param, db, esq, esq_comments, bug_output_queue, comment_outp
bug_output_queue=bug_output_queue, bug_output_queue=bug_output_queue,
comment_output_queue=comment_output_queue, comment_output_queue=comment_output_queue,
param=param.copy(), param=param.copy(),
alias_analyzer=AliasAnalyzer(kwargs.alias) alias_analyzer=alias_analyzer
) )
@override @override
def full_etl(resume_from_last_run, param, db, esq, esq_comments, bug_output_queue, comment_output_queue, kwargs): def full_etl(resume_from_last_run, param, db, esq, esq_comments, bug_output_queue, comment_output_queue, kwargs):
end = coalesce(param.end, db.query("SELECT max(bug_id) bug_id FROM bugs")[0].bug_id) end = coalesce(param.end, db.query("SELECT max(bug_id) bug_id FROM bugs")[0].bug_id)
start = coalesce(param.start, 0) start = coalesce(param.start, 0)
alias_analyzer = AliasAnalyzer(kwargs=kwargs.alias)
if resume_from_last_run: if resume_from_last_run:
# FIND THE LAST GOOD BUG NUMBER PROCESSED (WE GO BACKWARDS, SO LOOK FOR MINIMUM BUG, AND ROUND UP) # FIND THE LAST GOOD BUG NUMBER PROCESSED (WE GO BACKWARDS, SO LOOK FOR MINIMUM BUG, AND ROUND UP)
end = coalesce(param.end, Math.ceiling(get_min_bug_id(esq), param.increment), end) end = coalesce(param.end, Math.ceiling(get_min_bug_id(esq), param.increment), end)
@ -341,7 +344,7 @@ def full_etl(resume_from_last_run, param, db, esq, esq_comments, bug_output_queu
bug_output_queue, bug_output_queue,
comment_output_queue, comment_output_queue,
param.copy(), param.copy(),
alias_analyzer=AliasAnalyzer(kwargs=kwargs.alias) alias_analyzer=alias_analyzer
) )
except Exception as e: except Exception as e:

Просмотреть файл

@ -51,7 +51,7 @@ from mo_dots import inverse, coalesce, wrap, unwrap, literal_field, listwrap
from mo_dots.datas import Data from mo_dots.datas import Data
from mo_dots.lists import FlatList from mo_dots.lists import FlatList
from mo_dots.nones import Null from mo_dots.nones import Null
from mo_future import text_type, long from mo_future import text_type, long, PYPY
from mo_json import value2json from mo_json import value2json
from mo_logs import Log, strings, Except from mo_logs import Log, strings, Except
from mo_logs.strings import apply_diff from mo_logs.strings import apply_diff
@ -125,7 +125,7 @@ class BugHistoryParser(object):
if row_in.bug_id == STOP_BUG: if row_in.bug_id == STOP_BUG:
return return
self.startNewBug(row_in) self.startNewBug(row_in)
if DEBUG_MEMORY: if DEBUG_MEMORY and not PYPY:
import objgraph import objgraph
result = objgraph.growth() result = objgraph.growth()

Просмотреть файл

@ -58,6 +58,7 @@
}, },
"mappings": { "mappings": {
"bug_version": { "bug_version": {
"date_detection": false,
"properties": { "properties": {
"cf_user_story": { "cf_user_story": {
"type": "text", "type": "text",

8
vendor/mo_future/__init__.py поставляемый
Просмотреть файл

@ -17,6 +17,14 @@ import sys
PY3 = sys.version_info[0] == 3 PY3 = sys.version_info[0] == 3
PY2 = sys.version_info[0] == 2 PY2 = sys.version_info[0] == 2
PYPY = False
try:
import __pypy__ as _
PYPY=True
except Exception:
PYPY=False
none_type = type(None) none_type = type(None)
boolean_type = type(True) boolean_type = type(True)

21
vendor/mo_json/encoder.py поставляемый
Просмотреть файл

@ -13,7 +13,6 @@ from __future__ import unicode_literals
import json import json
import math import math
import sys
import time import time
from collections import Mapping from collections import Mapping
from datetime import datetime, date, timedelta from datetime import datetime, date, timedelta
@ -22,7 +21,7 @@ from json.encoder import encode_basestring
from math import floor from math import floor
from mo_dots import Data, FlatList, NullType, Null from mo_dots import Data, FlatList, NullType, Null
from mo_future import text_type, binary_type, long, utf8_json_encoder, sort_using_key, xrange from mo_future import text_type, binary_type, long, utf8_json_encoder, sort_using_key, xrange, PYPY
from mo_json import ESCAPE_DCT, scrub, float2json from mo_json import ESCAPE_DCT, scrub, float2json
from mo_logs import Except from mo_logs import Except
from mo_logs.strings import utf82unicode, quote from mo_logs.strings import utf82unicode, quote
@ -43,8 +42,6 @@ _ = Except
# 2) WHEN USING PYPY, WE USE CLEAR-AND-SIMPLE PROGRAMMING SO THE OPTIMIZER CAN DO # 2) WHEN USING PYPY, WE USE CLEAR-AND-SIMPLE PROGRAMMING SO THE OPTIMIZER CAN DO
# ITS JOB. ALONG WITH THE UnicodeBuilder WE GET NEAR C SPEEDS # ITS JOB. ALONG WITH THE UnicodeBuilder WE GET NEAR C SPEEDS
use_pypy = False
COMMA = u"," COMMA = u","
QUOTE = u'"' QUOTE = u'"'
COLON = u":" COLON = u":"
@ -54,20 +51,10 @@ COMMA_QUOTE = COMMA + QUOTE
PRETTY_COMMA = u", " PRETTY_COMMA = u", "
PRETTY_COLON = u": " PRETTY_COLON = u": "
try: if PYPY:
# UnicodeBuilder IS ABOUT 2x FASTER THAN list() # UnicodeBuilder IS ABOUT 2x FASTER THAN list()
from __pypy__.builders import UnicodeBuilder from __pypy__.builders import UnicodeBuilder
else:
use_pypy = True
except Exception as e:
if use_pypy:
sys.stdout.write(
b"*********************************************************\n"
b"** The PyLibrary JSON serializer for PyPy is in use!\n"
b"** Currently running CPython: This will run sloooow!\n"
b"*********************************************************\n"
)
class UnicodeBuilder(list): class UnicodeBuilder(list):
def __init__(self, length=None): def __init__(self, length=None):
list.__init__(self) list.__init__(self)
@ -509,7 +496,7 @@ def unicode_key(key):
# OH HUM, cPython with uJSON, OR pypy WITH BUILTIN JSON? # OH HUM, cPython with uJSON, OR pypy WITH BUILTIN JSON?
# http://liangnuren.wordpress.com/2012/08/13/python-json-performance/ # http://liangnuren.wordpress.com/2012/08/13/python-json-performance/
# http://morepypy.blogspot.ca/2011/10/speeding-up-json-encoding-in-pypy.html # http://morepypy.blogspot.ca/2011/10/speeding-up-json-encoding-in-pypy.html
if use_pypy: if PYPY:
json_encoder = pypy_json_encode json_encoder = pypy_json_encode
else: else:
# from ujson import dumps as ujson_dumps # from ujson import dumps as ujson_dumps