diff --git a/bugzilla_etl/bz_etl.py b/bugzilla_etl/bz_etl.py index 0b7b7bb..cf483ef 100644 --- a/bugzilla_etl/bz_etl.py +++ b/bugzilla_etl/bz_etl.py @@ -182,6 +182,9 @@ def incremental_etl(param, db, esq, esq_comments, bug_output_queue, comment_outp # REMOVE PRIVATE BUGS private_bugs = get_private_bugs_for_delete(db, param) + + alias_analyzer = AliasAnalyzer(kwargs.alias) + Log.note("Ensure the following private bugs are deleted:\n{{private_bugs|indent}}", private_bugs=sorted(private_bugs)) for g, delete_bugs in jx.groupby(private_bugs, size=1000): still_existing = get_bug_ids(esq, {"terms": {"bug_id": delete_bugs}}) @@ -216,8 +219,7 @@ def incremental_etl(param, db, esq, esq_comments, bug_output_queue, comment_outp refresh_param.start_time_str = extract_bugzilla.milli2string(db, MIN_TIMESTAMP) try: - analyzer = AliasAnalyzer(kwargs.alias) - etl(db, bug_output_queue, refresh_param.copy(), analyzer, please_stop=None) + etl(db, bug_output_queue, refresh_param.copy(), alias_analyzer, please_stop=None) etl_comments(db, esq_comments.es, refresh_param.copy(), please_stop=None) except Exception as e: Log.error( @@ -277,13 +279,14 @@ def incremental_etl(param, db, esq, esq_comments, bug_output_queue, comment_outp bug_output_queue=bug_output_queue, comment_output_queue=comment_output_queue, param=param.copy(), - alias_analyzer=AliasAnalyzer(kwargs.alias) + alias_analyzer=alias_analyzer ) @override def full_etl(resume_from_last_run, param, db, esq, esq_comments, bug_output_queue, comment_output_queue, kwargs): end = coalesce(param.end, db.query("SELECT max(bug_id) bug_id FROM bugs")[0].bug_id) start = coalesce(param.start, 0) + alias_analyzer = AliasAnalyzer(kwargs=kwargs.alias) if resume_from_last_run: # FIND THE LAST GOOD BUG NUMBER PROCESSED (WE GO BACKWARDS, SO LOOK FOR MINIMUM BUG, AND ROUND UP) end = coalesce(param.end, Math.ceiling(get_min_bug_id(esq), param.increment), end) @@ -341,7 +344,7 @@ def full_etl(resume_from_last_run, param, db, esq, esq_comments, bug_output_queu bug_output_queue, comment_output_queue, param.copy(), - alias_analyzer=AliasAnalyzer(kwargs=kwargs.alias) + alias_analyzer=alias_analyzer ) except Exception as e: diff --git a/bugzilla_etl/parse_bug_history.py b/bugzilla_etl/parse_bug_history.py index 3a14773..0fd4555 100644 --- a/bugzilla_etl/parse_bug_history.py +++ b/bugzilla_etl/parse_bug_history.py @@ -51,7 +51,7 @@ from mo_dots import inverse, coalesce, wrap, unwrap, literal_field, listwrap from mo_dots.datas import Data from mo_dots.lists import FlatList from mo_dots.nones import Null -from mo_future import text_type, long +from mo_future import text_type, long, PYPY from mo_json import value2json from mo_logs import Log, strings, Except from mo_logs.strings import apply_diff @@ -125,7 +125,7 @@ class BugHistoryParser(object): if row_in.bug_id == STOP_BUG: return self.startNewBug(row_in) - if DEBUG_MEMORY: + if DEBUG_MEMORY and not PYPY: import objgraph result = objgraph.growth() diff --git a/requirements.txt b/requirements.txt index 2118588..8f81537 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ pymysql -requests \ No newline at end of file +requests diff --git a/resources/schema/bug_version.json b/resources/schema/bug_version.json index 4a221b5..84288b9 100644 --- a/resources/schema/bug_version.json +++ b/resources/schema/bug_version.json @@ -58,6 +58,7 @@ }, "mappings": { "bug_version": { + "date_detection": false, "properties": { "cf_user_story": { "type": "text", diff --git a/vendor/mo_future/__init__.py b/vendor/mo_future/__init__.py index b494ae3..579b86a 100644 --- a/vendor/mo_future/__init__.py +++ b/vendor/mo_future/__init__.py @@ -17,6 +17,14 @@ import sys PY3 = sys.version_info[0] == 3 PY2 = sys.version_info[0] == 2 +PYPY = False +try: + import __pypy__ as _ + PYPY=True +except Exception: + PYPY=False + + none_type = type(None) boolean_type = type(True) diff --git a/vendor/mo_json/encoder.py b/vendor/mo_json/encoder.py index 198911e..364f102 100644 --- a/vendor/mo_json/encoder.py +++ b/vendor/mo_json/encoder.py @@ -13,7 +13,6 @@ from __future__ import unicode_literals import json import math -import sys import time from collections import Mapping from datetime import datetime, date, timedelta @@ -22,7 +21,7 @@ from json.encoder import encode_basestring from math import floor from mo_dots import Data, FlatList, NullType, Null -from mo_future import text_type, binary_type, long, utf8_json_encoder, sort_using_key, xrange +from mo_future import text_type, binary_type, long, utf8_json_encoder, sort_using_key, xrange, PYPY from mo_json import ESCAPE_DCT, scrub, float2json from mo_logs import Except from mo_logs.strings import utf82unicode, quote @@ -43,8 +42,6 @@ _ = Except # 2) WHEN USING PYPY, WE USE CLEAR-AND-SIMPLE PROGRAMMING SO THE OPTIMIZER CAN DO # ITS JOB. ALONG WITH THE UnicodeBuilder WE GET NEAR C SPEEDS -use_pypy = False - COMMA = u"," QUOTE = u'"' COLON = u":" @@ -54,20 +51,10 @@ COMMA_QUOTE = COMMA + QUOTE PRETTY_COMMA = u", " PRETTY_COLON = u": " -try: +if PYPY: # UnicodeBuilder IS ABOUT 2x FASTER THAN list() from __pypy__.builders import UnicodeBuilder - - use_pypy = True -except Exception as e: - if use_pypy: - sys.stdout.write( - b"*********************************************************\n" - b"** The PyLibrary JSON serializer for PyPy is in use!\n" - b"** Currently running CPython: This will run sloooow!\n" - b"*********************************************************\n" - ) - +else: class UnicodeBuilder(list): def __init__(self, length=None): list.__init__(self) @@ -509,7 +496,7 @@ def unicode_key(key): # OH HUM, cPython with uJSON, OR pypy WITH BUILTIN JSON? # http://liangnuren.wordpress.com/2012/08/13/python-json-performance/ # http://morepypy.blogspot.ca/2011/10/speeding-up-json-encoding-in-pypy.html -if use_pypy: +if PYPY: json_encoder = pypy_json_encode else: # from ujson import dumps as ujson_dumps