1252 строки
56 KiB
Python
1252 строки
56 KiB
Python
# encoding: utf-8
|
|
#
|
|
# This Source Code Form is subject to the terms of the Mozilla Public
|
|
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
|
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
#
|
|
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
|
#
|
|
|
|
# Workflow:
|
|
# 1. Create the current state object
|
|
#
|
|
# 2. for each row containing latest state data (fields from bugs table record, fields from other tables (i.e. attachments, dependencies)
|
|
# Update the current state object with the latest field values
|
|
#
|
|
# 3. Walk backward through activity records from bugs_activity (and other activity type tables). for set of activities:
|
|
# Create a new bug version object with the meta data about this activity
|
|
# Set id based on modification time
|
|
# * Set valid_from field as modification time
|
|
# * Set valid_to field as the modification time of the later version - 1 second
|
|
# Add modification data (who, when, what)
|
|
# For single value fields (i.e. assigned_to, status):
|
|
# Update the original state object by replacing the field value with the contents of the activities "removed" column
|
|
# For multi-value fields (i.e. blocks, CC, attachments):
|
|
# If a deletion, update the original state object by adding the value from the "removed" column to the field values array.
|
|
# If an addition, find and remove the added item from the original state object
|
|
#
|
|
# When finished with all activities, the current state object should reflect the original state of the bug when created.
|
|
# Now, build the full state of each intermediate version of the bug.
|
|
#
|
|
# for bug version object that was created above:
|
|
# Merge the current state object into this version object
|
|
# Update fields according to the modification data
|
|
#
|
|
# When doing an incremental update (ie. with start_time specified), Look at any bug that has been modified since the
|
|
# cutoff time, and build all versions. Only index versions after start_time in ElasticSearch.
|
|
|
|
|
|
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import unicode_literals
|
|
|
|
import math
|
|
import re
|
|
|
|
from bugzilla_etl.alias_analysis import AliasAnalyzer
|
|
from bugzilla_etl.extract_bugzilla import MAX_TIMESTAMP
|
|
from bugzilla_etl.transform_bugzilla import normalize, NUMERIC_FIELDS, MULTI_FIELDS, DIFF_FIELDS, NULL_VALUES, TIME_FIELDS, LONG_FIELDS
|
|
from jx_base import meta_columns
|
|
from jx_elasticsearch.meta import python_type_to_es_type
|
|
from jx_python import jx
|
|
from mo_dots import inverse, coalesce, wrap, unwrap, literal_field, listwrap
|
|
from mo_dots.datas import Data
|
|
from mo_dots.lists import FlatList
|
|
from mo_dots.nones import Null
|
|
from mo_future import text_type, long, PYPY, PY2
|
|
from mo_json import value2json, python_type_to_json_type, STRING
|
|
from mo_logs import Log, strings, Except
|
|
from mo_logs.strings import apply_diff
|
|
from mo_math import MIN, is_integer
|
|
from mo_times import Date
|
|
from pyLibrary import convert
|
|
# Used to split a flag into (type, status [,requestee])
|
|
# Example: "review?(mreid@mozilla.com)" -> (review, ?, mreid@mozilla.com)
|
|
# Example: "review-" -> (review, -)
|
|
from pyLibrary.convert import value2number
|
|
|
|
FLAG_PATTERN = re.compile("^(.*)([?+-])(\\([^)]*\\))?$")
|
|
|
|
DEBUG_CHANGES = True # SHOW ACTIVITY RECORDS BEING PROCESSED
|
|
DEBUG_STATUS = False # SHOW CURRENT STATE OF PROCESSING
|
|
DEBUG_CC_CHANGES = False # SHOW MISMATCHED CC CHANGES
|
|
DEBUG_FLAG_MATCHES = False
|
|
DEBUG_MISSING_ATTACHMENTS = False
|
|
DEBUG_MEMORY = False
|
|
DEBUG_DIFF = False
|
|
USE_PREVIOUS_VALUE_OBJECTS = False
|
|
|
|
# Fields that could have been truncated per bug 55161
|
|
TRUNC_FIELDS = ["cc", "blocked", "dependson", "keywords"]
|
|
KNOWN_MISSING_KEYWORDS = {
|
|
"dogfood", "beta1", "nsbeta1", "nsbeta2", "nsbeta3", "patch", "mozilla1.0", "correctness",
|
|
"mozilla0.9", "mozilla0.9.9+", "nscatfood", "mozilla0.9.3", "fcc508", "nsbeta1+", "mostfreq"
|
|
}
|
|
KNOWN_INCONSISTENT_FIELDS = {
|
|
"cf_last_resolved", # CHANGES IN DATABASE TIMEZONE
|
|
"cf_crash_signature"
|
|
}
|
|
FIELDS_CHANGED = wrap({
|
|
# SOME FIELD VALUES ARE CHANGED WITHOUT HISTORY BEING CHANGED TOO https://bugzilla.mozilla.org/show_bug.cgi?id=997228
|
|
# MAP FROM PROPERTY NAME TO (MAP FROM OLD VALUE TO LIST OF OBSERVED NEW VALUES}
|
|
"cf_blocking_b2g":{"1.5":["2.0"]}
|
|
})
|
|
EMAIL_FIELDS = {'cc', 'assigned_to', 'modified_by', 'created_by', 'qa_contact', 'bug_mentor'}
|
|
|
|
STOP_BUG = 999999999 # AN UNFORTUNATE SIDE EFFECT OF DATAFLOW PROGRAMMING (http://en.wikipedia.org/wiki/Dataflow_programming)
|
|
|
|
|
|
class BugHistoryParser(object):
|
|
def __init__(self, settings, alias_analyzer, output_queue):
|
|
self.startNewBug(wrap({"bug_id": 0, "modified_ts": 0, "_merge_order": 1}))
|
|
self.prevActivityID = Null
|
|
self.prev_row = Null
|
|
self.settings = settings
|
|
self.output = output_queue
|
|
self.alias_analyzer = alias_analyzer
|
|
|
|
if not isinstance(alias_analyzer, AliasAnalyzer):
|
|
Log.error("expecting an AliasAnalyzer")
|
|
|
|
def processRow(self, row_in):
|
|
if not row_in:
|
|
return
|
|
try:
|
|
self.currBugID = row_in.bug_id
|
|
if self.settings.debug:
|
|
Log.note("process row: {{row}}", row=row_in)
|
|
|
|
# If we have switched to a new bug
|
|
if self.prevBugID < self.currBugID:
|
|
if self.prevBugID > 0:
|
|
# Start replaying versions in ascending order to build full data on each version
|
|
if DEBUG_STATUS:
|
|
Log.note("[Bug {{bug_id}}]: Emitting intermediate versions", bug_id=self.prevBugID)
|
|
self.populateIntermediateVersionObjects()
|
|
if row_in.bug_id == STOP_BUG:
|
|
return
|
|
self.startNewBug(row_in)
|
|
if DEBUG_MEMORY and not PYPY:
|
|
import objgraph
|
|
|
|
result = objgraph.growth()
|
|
if result:
|
|
width = max(len(name) for name, _, _ in result)
|
|
Log.note("objgraph.growth:\n{{data}}", data="\n".join('%-*s%9d %+9d' % (width, name, count, delta) for name, count, delta in result))
|
|
|
|
# Bugzilla bug workaround - some values were truncated, introducing uncertainty / errors:
|
|
# https://bugzilla.mozilla.org/show_bug.cgi?id=55161
|
|
if row_in.field_name in TRUNC_FIELDS:
|
|
added = convert.value2string(row_in.new_value)
|
|
removed = convert.value2string(row_in.old_value)
|
|
uncertain = False
|
|
|
|
if added in ["? ?", "?"]: # Unknown value extracted from a possibly truncated field
|
|
uncertain = True
|
|
Log.note("[Bug {{bug_id}}]: PROBLEM Encountered uncertain added value. Skipping.", bug_id=self.currBugID)
|
|
row_in.new_value = Null
|
|
elif added != None and added.startswith("? "): # Possibly truncated value extracted from a possibly truncated field
|
|
uncertain = True
|
|
row_in.new_value = added[2:]
|
|
|
|
if removed in ["? ?", "?"]:# Unknown value extracted from a possibly truncated field
|
|
uncertain = True
|
|
Log.note("[Bug {{bug_id}}]: PROBLEM Encountered uncertain removed value. Skipping.", bug_id=self.currBugID)
|
|
row_in.old_value = Null
|
|
elif removed != None and removed.startswith("? "): # Possibly truncated value extracted from a possibly truncated field
|
|
uncertain = True
|
|
row_in.old_value = removed[2:]
|
|
|
|
if uncertain and self.currBugState.uncertain == None:
|
|
# Process the "uncertain" flag as an activity
|
|
# WE ARE GOING BACKWARDS IN TIME, SO MARKUP PAST
|
|
Log.note("[Bug {{bug_id}}]: PROBLEM Setting this bug to be uncertain.", bug_id=self.currBugID)
|
|
self.processBugsActivitiesTableItem(wrap({
|
|
"modified_ts": row_in.modified_ts,
|
|
"modified_by": row_in.modified_by,
|
|
"field_name": "uncertain",
|
|
"new_value": Null,
|
|
"old_value": "1",
|
|
"attach_id": Null
|
|
}))
|
|
if row_in.new_value == None and row_in.old_value == None:
|
|
Log.note("[Bug {{bug_id}}]: Nothing added or removed. Skipping update.", bug_id=self.currBugID)
|
|
return
|
|
|
|
# Treat timestamps as int values
|
|
new_value = convert.value2int(row_in.new_value) if row_in.field_name.endswith("_ts") else row_in.new_value
|
|
|
|
|
|
# Determine where we are in the bug processing workflow
|
|
if row_in._merge_order == 1:
|
|
self.processSingleValueTableItem(row_in.field_name, new_value)
|
|
elif row_in._merge_order == 2:
|
|
self.processMultiValueTableItem(row_in.field_name, new_value)
|
|
elif row_in._merge_order == 7:
|
|
self.processAttachmentsTableItem(row_in)
|
|
elif row_in._merge_order == 8:
|
|
self.processFlagsTableItem(row_in)
|
|
elif row_in._merge_order == 9:
|
|
self.processBugsActivitiesTableItem(row_in)
|
|
else:
|
|
Log.warning("Unhandled merge_order: {{order|quote}}", order=row_in._merge_order)
|
|
|
|
except Exception as e:
|
|
Log.warning("Problem processing row: {{row}}", row=row_in, cause=e)
|
|
finally:
|
|
if row_in._merge_order > 1 and self.currBugState.created_ts == None:
|
|
Log.note("PROBLEM expecting a created_ts (did you install the timezone database into your MySQL instance?)", bug_id=self.currBugID)
|
|
|
|
for b in self.currBugState.blocked:
|
|
if isinstance(b, text_type):
|
|
Log.note("PROBLEM error {{bug_id}}", bug_id=self.currBugID)
|
|
self.prev_row = row_in
|
|
|
|
@staticmethod
|
|
def uid(bug_id, modified_ts):
|
|
if modified_ts == None:
|
|
Log.error("modified_ts can not be Null")
|
|
|
|
return text_type(bug_id) + "_" + text_type(modified_ts)[0:-3]
|
|
|
|
def startNewBug(self, row_in):
|
|
self.prevBugID = row_in.bug_id
|
|
self.bugVersions = FlatList()
|
|
self.bugVersionsMap = Data()
|
|
self.currActivity = Data()
|
|
self.currBugAttachmentsMap = {}
|
|
self.currBugState = Data(
|
|
_id=BugHistoryParser.uid(row_in.bug_id, row_in.modified_ts),
|
|
bug_id=row_in.bug_id,
|
|
modified_ts=row_in.modified_ts,
|
|
modified_by=row_in.modified_by,
|
|
reported_by=row_in.modified_by,
|
|
attachments=[],
|
|
flags=[]
|
|
)
|
|
|
|
#WE FORCE ADD ALL SETS, AND WE WILL scrub() THEM OUT LATER IF NOT USED
|
|
for f in MULTI_FIELDS:
|
|
self.currBugState[f] = set()
|
|
|
|
if row_in._merge_order != 1:
|
|
# Problem: No entry found in the 'bugs' table.
|
|
Log.warning("Current bugs table record not found for bug_id: {{bug_id}} (merge order should have been 1, but was {{start_time}})", **row_in)
|
|
|
|
def processSingleValueTableItem(self, field_name, new_value):
|
|
self.currBugState[field_name] = self.canonical(field_name, new_value)
|
|
|
|
def processMultiValueTableItem(self, field_name, new_value):
|
|
if field_name in NUMERIC_FIELDS:
|
|
new_value = int(new_value)
|
|
try:
|
|
self.currBugState[field_name].add(new_value)
|
|
return Null
|
|
except Exception as e:
|
|
Log.warning(
|
|
"Unable to push {{value}} to array field {{start_time}} on bug {{curr_value}} current value: {{curr_value}}",
|
|
value=new_value,
|
|
field=field_name,
|
|
bug_id=self.currBugID,
|
|
curr_value=self.currBugState[field_name],
|
|
cause=e
|
|
)
|
|
|
|
def processAttachmentsTableItem(self, row_in):
|
|
currActivityID = BugHistoryParser.uid(self.currBugID, row_in.modified_ts)
|
|
if currActivityID != self.prevActivityID:
|
|
self.prevActivityID = currActivityID
|
|
|
|
self.currActivity = Data(
|
|
_id=currActivityID,
|
|
modified_ts=row_in.modified_ts,
|
|
modified_by=row_in.modified_by,
|
|
changes=[{
|
|
"field_name": "attachment_added",
|
|
"attach_id": row_in.attach_id
|
|
}]
|
|
)
|
|
|
|
if not self.currActivity.modified_ts:
|
|
Log.error("should not happen")
|
|
self.bugVersions.append(self.currActivity)
|
|
self.bugVersionsMap[currActivityID] = self.currActivity
|
|
|
|
att = self.currBugAttachmentsMap.get(row_in.attach_id)
|
|
if att is None:
|
|
att = Data(
|
|
attach_id=row_in.attach_id,
|
|
modified_ts=row_in.modified_ts,
|
|
created_ts=row_in.created_ts,
|
|
modified_by=row_in.modified_by,
|
|
flags=[]
|
|
)
|
|
self.currBugAttachmentsMap[row_in.attach_id] = att
|
|
|
|
att["created_ts"] = MIN([row_in.modified_ts, att["created_ts"]])
|
|
if row_in.field_name == "created_ts" and row_in.new_value == None:
|
|
pass
|
|
else:
|
|
att[row_in.field_name] = row_in.new_value
|
|
|
|
def processFlagsTableItem(self, row_in):
|
|
flag = parse_flag(row_in.new_value, row_in.modified_ts, row_in.modified_by)
|
|
if row_in.attach_id != None:
|
|
if self.currBugAttachmentsMap.get(row_in.attach_id) == None:
|
|
if DEBUG_MISSING_ATTACHMENTS:
|
|
Log.note(
|
|
"[Bug {{bug_id}}]: Unable to find attachment {{attach_id}} for bug_id {{bug_id}}",
|
|
attach_id=row_in.attach_id,
|
|
bug_id=self.currBugID
|
|
)
|
|
else:
|
|
self.currBugAttachmentsMap[row_in.attach_id].flags.append(flag)
|
|
else:
|
|
self.currBugState.flags.append(flag)
|
|
|
|
def processBugsActivitiesTableItem(self, row_in):
|
|
if self.currBugState.created_ts == None:
|
|
Log.error("must have created_ts")
|
|
|
|
if row_in.field_name == "flagtypes_name":
|
|
row_in.field_name = "flags"
|
|
|
|
multi_field_new_value = parseMultiField(row_in.field_name, row_in.new_value)
|
|
multi_field_old_value = parseMultiField(row_in.field_name, row_in.old_value)
|
|
|
|
currActivityID = BugHistoryParser.uid(self.currBugID, row_in.modified_ts)
|
|
if currActivityID != self.prevActivityID:
|
|
self.currActivity = self.bugVersionsMap[currActivityID]
|
|
if self.currActivity == None:
|
|
self.currActivity = Data(
|
|
_id=currActivityID,
|
|
modified_ts=row_in.modified_ts,
|
|
modified_by=row_in.modified_by,
|
|
changes=[]
|
|
)
|
|
if not self.currActivity.modified_ts:
|
|
Log.error("should not happen")
|
|
self.bugVersions.append(self.currActivity)
|
|
|
|
self.prevActivityID = currActivityID
|
|
|
|
if row_in.attach_id != None:
|
|
attachment = self.currBugAttachmentsMap.get(row_in.attach_id)
|
|
if attachment == None:
|
|
# THIS HAPPENS WHEN ATTACHMENT IS PRIVATE
|
|
pass
|
|
else:
|
|
if row_in.field_name == "flags":
|
|
total = attachment[row_in.field_name]
|
|
total = self.processFlags(total, multi_field_old_value, multi_field_new_value, row_in.modified_ts, row_in.modified_by, "attachment", attachment)
|
|
attachment[row_in.field_name] = total
|
|
elif row_in.field_name in MULTI_FIELDS:
|
|
total = attachment[row_in.field_name]
|
|
# Can have both added and removed values.
|
|
total = self.removeValues(total, multi_field_new_value, "added", row_in.field_name, "attachment", attachment)
|
|
total = self.addValues(total, multi_field_old_value, "removed attachment", row_in.field_name, attachment)
|
|
attachment[row_in.field_name] = total
|
|
else:
|
|
attachment[row_in.field_name] = row_in.old_value
|
|
self.currActivity.changes.append({
|
|
"field_name": row_in.field_name,
|
|
"new_value": row_in.new_value,
|
|
"old_value": row_in.old_value,
|
|
"attach_id": row_in.attach_id
|
|
})
|
|
|
|
else:
|
|
if row_in.field_name == "flags":
|
|
# PROBLEM: WHEN GOING BACK IN HISTORY, AND THE ADDED VALUE IS NOT FOUND IN THE CURRENT
|
|
# STATE, IT IS STILL RECORDED (see above self.currActivity.changes.append...). THIS MEANS
|
|
# WHEN GOING THROUGH THE CHANGES IN IN ORDER THE VALUE WILL EXIST, BUT IT SHOULD NOT
|
|
total = self.currBugState[row_in.field_name]
|
|
total = self.processFlags(total, multi_field_old_value, multi_field_new_value, row_in.modified_ts, row_in.modified_by, "bug", self.currBugState)
|
|
self.currBugState[row_in.field_name] = total
|
|
elif row_in.field_name in MULTI_FIELDS:
|
|
# PROBLEM: WHEN GOING BACK IN HISTORY, AND THE ADDED VALUE IS NOT FOUND IN THE CURRENT
|
|
# STATE, IT IS STILL RECORDED (see above self.currActivity.changes.append...). THIS MEANS
|
|
# WHEN GOING THROUGH THE CHANGES IN IN ORDER THE VALUE WILL EXIST, BUT IT SHOULD NOT
|
|
total = self.currBugState[row_in.field_name]
|
|
# Can have both added and removed values.
|
|
total = self.removeValues(total, multi_field_new_value, "added", row_in.field_name, "currBugState", self.currBugState)
|
|
total = self.addValues(total, multi_field_old_value, "removed bug", row_in.field_name, self.currBugState)
|
|
self.currBugState[row_in.field_name] = total
|
|
elif row_in.field_name in DIFF_FIELDS:
|
|
diff = row_in.new_value
|
|
expected_value = self.currBugState[row_in.field_name]
|
|
try:
|
|
old_value = ApplyDiff(self.currBugID, row_in.modified_ts, expected_value, diff, reverse=True)
|
|
self.currBugState[row_in.field_name] = old_value
|
|
self.currActivity.changes.append({
|
|
"field_name": row_in.field_name,
|
|
"new_value": expected_value,
|
|
"old_value": old_value,
|
|
"attach_id": row_in.attach_id
|
|
})
|
|
except Exception as e:
|
|
Log.warning(
|
|
"[Bug {{bug_id}}]: PROBLEM Unable to process {{field_name}} diff:\n{{diff|indent}}",
|
|
bug_id=self.currBugID,
|
|
field_name=row_in.field_name,
|
|
diff=diff,
|
|
cause=e
|
|
)
|
|
elif row_in.field_name in LONG_FIELDS:
|
|
new_value = row_in.new_value
|
|
curr_value = self.currBugState[row_in.field_name]
|
|
try:
|
|
old_value = LongField(self.currBugID, row_in.modified_ts, curr_value, row_in.old_value)
|
|
self.currBugState[row_in.field_name] = old_value
|
|
self.currActivity.changes.append({
|
|
"field_name": row_in.field_name,
|
|
"new_value": curr_value,
|
|
"old_value": old_value,
|
|
"attach_id": row_in.attach_id
|
|
})
|
|
except Exception as e:
|
|
Log.warning(
|
|
"[Bug {{bug_id}}]: PROBLEM Unable to process {{field_name}} text:\n{{text|indent}}",
|
|
bug_id=self.currBugID,
|
|
field_name=row_in.field_name,
|
|
text=new_value,
|
|
cause=e
|
|
)
|
|
else:
|
|
old_value = self.canonical(row_in.field_name, row_in.old_value)
|
|
|
|
if DEBUG_CHANGES and row_in.field_name not in KNOWN_INCONSISTENT_FIELDS:
|
|
expected_value = self.canonical(row_in.field_name, self.currBugState[row_in.field_name])
|
|
new_value = self.canonical(row_in.field_name, row_in.new_value)
|
|
|
|
if text_type(new_value) != text_type(expected_value):
|
|
if row_in.field_name in EMAIL_FIELDS:
|
|
if is_integer(new_value) or is_integer(expected_value) and row_in.modified_ts<=927814152000:
|
|
pass # BEFORE 1999-05-27 14:09:12 THE qa_contact FIELD WAS A NUMBER, NOT THE EMAIL
|
|
elif not new_value or not expected_value:
|
|
pass
|
|
else:
|
|
pass
|
|
# WE CAN NOT ASSUME WE FOUND AN ALIAS WITH JUST A SINGLE MISMATCH
|
|
# self.alias_analyzer.add_alias(lost=new_value, found=expected_value)
|
|
else:
|
|
# RECORD INCONSISTENCIES, MAYBE WE WILL FIND PATTERNS
|
|
expected_list = FIELDS_CHANGED[row_in.field_name][literal_field(text_type(new_value))]
|
|
if expected_value not in expected_list:
|
|
# expected_list += [expected_value]
|
|
# File("expected_values.json").write(value2json(FIELDS_CHANGED, pretty=True))
|
|
|
|
Log.note(
|
|
"[Bug {{bug_id}}]: PROBLEM inconsistent change at {{timestamp}}: {{field}} was {{expecting|quote}} got {{observed|quote}}",
|
|
bug_id=self.currBugID,
|
|
timestamp=row_in.modified_ts,
|
|
field=row_in.field_name,
|
|
expecting=expected_value,
|
|
observed=new_value
|
|
)
|
|
|
|
# WE DO NOT ATTEMPT TO CHANGE THE VALUES IN HISTORY TO BE CONSISTENT WITH THE FUTURE
|
|
self.currActivity.changes.append({
|
|
"field_name": row_in.field_name,
|
|
"new_value": self.currBugState[row_in.field_name],
|
|
"old_value": old_value,
|
|
"attach_id": row_in.attach_id
|
|
})
|
|
self.currBugState[row_in.field_name] = old_value
|
|
|
|
def populateIntermediateVersionObjects(self):
|
|
# Make sure the self.bugVersions are in descending order by modification time.
|
|
# They could be mixed because of attachment activity
|
|
self.bugVersions = jx.sort(self.bugVersions, [
|
|
{"field": "modified_ts", "sort": -1}
|
|
])
|
|
|
|
# Tracks the previous distinct value for field
|
|
prevValues = {}
|
|
currVersion = Null
|
|
# Prime the while loop with an empty next version so our first iteration outputs the initial bug state
|
|
nextVersion = Data(_id=self.currBugState._id, changes=[])
|
|
|
|
# A monotonically increasing version number (useful for debugging)
|
|
self.bug_version_num = 1
|
|
|
|
# continue if there are more bug versions, or there is one final nextVersion
|
|
while nextVersion:
|
|
try:
|
|
currVersion = nextVersion
|
|
if self.bugVersions:
|
|
try:
|
|
nextVersion = self.bugVersions.pop() # Oldest version
|
|
if nextVersion.modified_ts > self.settings.end_time:
|
|
if DEBUG_STATUS:
|
|
Log.note(
|
|
"[Bug {{bug_id}}]: Not outputting {{_id}} - it is after self.end_time ({{end_time|datetime}})",
|
|
_id=nextVersion._id,
|
|
end_time=self.settings.end_time,
|
|
bug_id=self.currBugState.bug_id
|
|
)
|
|
nextVersion = Null
|
|
except Exception as e:
|
|
Log.error("problem", e)
|
|
else:
|
|
nextVersion = Null
|
|
|
|
if DEBUG_STATUS:
|
|
Log.note("[Bug {{bug_id}}]: Populating JSON for version {{id}}", {
|
|
"id": currVersion._id,
|
|
"bug_id": self.currBugState.bug_id
|
|
})
|
|
# Decide whether to merge this bug activity into the current state (without emitting
|
|
# a separate JSON document). This addresses the case where an attachment is created
|
|
# at exactly the same time as the bug itself.
|
|
# Effectively, we combine all the changes for a given timestamp into the last one.
|
|
mergeBugVersion = False
|
|
if nextVersion != None and currVersion._id == nextVersion._id:
|
|
if DEBUG_STATUS:
|
|
Log.note(
|
|
"[Bug {{bug_id}}]: Merge mode: activated {{id}}",
|
|
id=self.currBugState._id,
|
|
bug_id=self.currBugState.bug_id
|
|
)
|
|
mergeBugVersion = True
|
|
|
|
# Link this version to the next one (if there is a next one)
|
|
self.currBugState.expires_on = coalesce(nextVersion.modified_ts, MAX_TIMESTAMP)
|
|
|
|
# Copy all attributes from the current version into self.currBugState
|
|
for propName, propValue in currVersion.items():
|
|
self.currBugState[propName] = propValue
|
|
# self.currBugState.previous_values = self.currBugState.previous_values.copy()
|
|
|
|
# Now walk self.currBugState forward in time by applying the changes from currVersion
|
|
# BE SURE TO APPLY REMOVES BEFORE ADDS, JUST IN CASE BOTH HAPPENED TO ONE FIELD
|
|
changes = jx.sort(currVersion.changes, ["attach_id", "field_name", {"field": "old_value", "sort": -1}, "new_value"])
|
|
self.currBugState.changes = currVersion.changes = changes
|
|
|
|
for c, change in enumerate(changes):
|
|
if change.old_value == change.new_value and not change.attach_id:
|
|
# THIS HAPPENS FOR LONG FIELDS AND DIFF FIELDS
|
|
changes[c] = Null
|
|
continue
|
|
if c + 1 < len(changes):
|
|
# PACK ADDS AND REMOVES TO SINGLE CHANGE TO MATCH ORIGINAL
|
|
next = changes[c + 1]
|
|
if change.attach_id == next.attach_id and change.field_name == next.field_name:
|
|
if change.new_value == next.old_value:
|
|
next.old_value = change.old_value
|
|
changes[c] = Null
|
|
continue
|
|
|
|
if not is_null(change.old_value) and is_null(next.old_value):
|
|
next.old_value = change.old_value
|
|
change.old_value = Null
|
|
elif not is_null(change.new_value) and is_null(next.new_value):
|
|
next.new_value = change.new_value
|
|
change.new_value = Null
|
|
|
|
if (
|
|
is_null(change.new_value) and
|
|
is_null(change.old_value) and
|
|
change.field_name != "attachment_added"
|
|
):
|
|
changes[c] = Null
|
|
continue
|
|
|
|
target = self.currBugState
|
|
targetName = "currBugState"
|
|
attach_id = change.attach_id
|
|
if attach_id != None:
|
|
# Handle the special change record that signals the creation of the attachment
|
|
if change.field_name == "attachment_added":
|
|
# This change only exists when the attachment has been added to the map, so no missing case needed.
|
|
att = self.currBugAttachmentsMap[attach_id]
|
|
self.currBugState.attachments.append(att)
|
|
continue
|
|
else:
|
|
# Attachment change
|
|
target = self.currBugAttachmentsMap.get(attach_id)
|
|
targetName = "attachment"
|
|
if target == None:
|
|
if DEBUG_MISSING_ATTACHMENTS:
|
|
Log.note("[Bug {{bug_id}}]: Encountered a change to missing attachment: {{change}}", {
|
|
"bug_id": self.currBugState.bug_id,
|
|
"change": change
|
|
})
|
|
|
|
# treat it as a change to the main bug instead :(
|
|
target = self.currBugState
|
|
targetName = "currBugState"
|
|
|
|
if change.field_name == "flags":
|
|
self.processFlagChange(target, change, currVersion.modified_ts, currVersion.modified_by)
|
|
elif change.field_name in MULTI_FIELDS:
|
|
a = target[change.field_name]
|
|
multi_field_value = change.new_value
|
|
multi_field_value_removed = change.old_value
|
|
|
|
# This was a deletion, find and delete the value(s)
|
|
a = self.removeValues(a, multi_field_value_removed, "removed", change.field_name, targetName, target)
|
|
# Handle addition(s) (if any)
|
|
a = self.addValues(a, multi_field_value, "added", change.field_name, target)
|
|
target[change.field_name] = a
|
|
else:
|
|
# Simple field change.
|
|
# Track the previous value
|
|
# Single-value field has changed in bug or attachment
|
|
# Make sure its actually changing. We seem to get change
|
|
# entries for attachments that show the current field value.
|
|
if target[change.field_name] != change.new_value:
|
|
self.setPrevious(target, change.field_name, target[change.field_name], currVersion.modified_ts)
|
|
|
|
target[change.field_name] = change.new_value
|
|
|
|
self.currBugState.bug_version_num = self.bug_version_num
|
|
|
|
if not mergeBugVersion:
|
|
# This is not a "merge", so output a row for this bug version.
|
|
self.bug_version_num += 1
|
|
state = normalize(self.currBugState)
|
|
|
|
try:
|
|
value2json(state)
|
|
except Exception as e:
|
|
Log.error("problem with {{bug}}", bug=state.bug_id, cause=e)
|
|
|
|
if DEBUG_STATUS:
|
|
Log.note("[Bug {{bug_state.bug_id}}]: v{{bug_state.bug_version_num}} (id = {{bug_state.id}})", bug_state=state)
|
|
self.output.add({"id": state.id, "value": state}) #ES EXPECTED FORMAT
|
|
else:
|
|
if DEBUG_STATUS:
|
|
Log.note("[Bug {{bug_state.bug_id}}]: Merging a change with the same timestamp = {{bug_state._id}}: {{bug_state}}", bug_state=currVersion)
|
|
finally:
|
|
if self.currBugState.blocked == None:
|
|
Log.note("[Bug {{bug_id}}]: expecting a created_ts", bug_id= currVersion.bug_id)
|
|
pass
|
|
|
|
def findFlag(self, flag_list, flag):
|
|
for f in flag_list:
|
|
if (
|
|
f.request_type and flag.request_type and
|
|
deformat(f.request_type) == deformat(flag.request_type) and
|
|
f.request_status == flag.request_status and
|
|
(
|
|
(f.request_status!='?' and self.email_alias(f.modified_by) == self.email_alias(flag.modified_by)) or
|
|
(f.request_status=='?' and self.email_alias(f.requestee) == self.email_alias(flag.requestee))
|
|
)
|
|
):
|
|
return f
|
|
|
|
for f in flag_list:
|
|
if f.value == flag.value:
|
|
return f # PROBABLY NEVER HAPPENS, IF THE FLAG CAN'T BE MATCHED, IT'S BECAUSE IT CAN'T BE PARSED, WHICH IS BECAUSE IT HAS BEEN CHOPPED OFF BY THE 255 CHAR LIMIT IN BUGS_ACTIVIY TABLE
|
|
|
|
# BUGS_ACTIVITY HAS LOTS OF GARBAGE (255 CHAR LIMIT WILL CUT OFF REVIEW REQUEST LISTS)
|
|
# TRY A LESS STRICT MATCH
|
|
for f in flag_list:
|
|
min_len=min(len(f.value), len(flag.value))
|
|
if f.value[:min_len] == flag.value[:min_len]:
|
|
return f
|
|
|
|
return Null
|
|
|
|
|
|
def processFlagChange(self, target, change, modified_ts, modified_by):
|
|
target.flags = listwrap(target.flags)
|
|
|
|
added_flags, change.new_value = change.new_value, set(c.value for c in change.new_value)
|
|
removed_flags, change.old_value = change.old_value, set(c.value for c in change.old_value)
|
|
|
|
# First, mark any removed flags as straight-up deletions.
|
|
for removed_flag in removed_flags:
|
|
existing_flag = self.findFlag(target.flags, removed_flag)
|
|
|
|
if existing_flag:
|
|
# Carry forward some previous values:
|
|
existing_flag["previous_modified_ts"] = existing_flag["modified_ts"]
|
|
existing_flag["modified_ts"] = modified_ts
|
|
if existing_flag["modified_by"] != modified_by:
|
|
existing_flag["previous_modified_by"] = existing_flag["modified_by"]
|
|
existing_flag["modified_by"] = modified_by
|
|
|
|
# Add changed stuff:
|
|
existing_flag["previous_status"] = removed_flag["request_status"]
|
|
existing_flag["request_status"] = "d"
|
|
existing_flag["previous_value"] = removed_flag.value
|
|
existing_flag["value"] = Null # SPECIAL INDICATOR FOR DELETED FLAG
|
|
# request_type stays the same.
|
|
# requestee stays the same.
|
|
|
|
duration_ms = existing_flag["modified_ts"] - existing_flag["previous_modified_ts"]
|
|
# existingFlag["duration_days"] = math.floor(duration_ms / (1000.0 * 60 * 60 * 24)) # TODO: REMOVE floor
|
|
else:
|
|
self.findFlag(target.flags, removed_flag)
|
|
Log.note(
|
|
"[Bug {{bug_id}}]: PROBLEM: Did not find removed FLAG {{removed}} in {{existing}}",
|
|
removed=removed_flag.value,
|
|
existing=target.flags,
|
|
bug_id=self.currBugState.bug_id
|
|
)
|
|
|
|
# See if we can align any of the added flags with previous deletions.
|
|
# If so, try to match them up with a "dangling" removed flag
|
|
for added_flag in added_flags:
|
|
candidates = wrap([
|
|
unwrap(element)
|
|
for element in target.flags
|
|
if (
|
|
element["value"] == None # SPECIAL INDICATOR FOR DELETED FLAG
|
|
and added_flag["request_type"] == element["request_type"]
|
|
and added_flag["request_status"] != element["previous_status"] # Skip "r?(dre@mozilla)" -> "r?(mark@mozilla)"
|
|
)
|
|
])
|
|
|
|
if not candidates:
|
|
# No matching candidate. Totally new flag.
|
|
target.flags.append(added_flag)
|
|
continue
|
|
|
|
chosen_one = candidates[0]
|
|
if len(candidates) > 1:
|
|
# Multiple matches - use the best one.
|
|
if DEBUG_FLAG_MATCHES:
|
|
Log.note(
|
|
"[Bug {{bug_id}}]: Matched added flag {{flag}} to multiple removed flags {{candidates}}. Finding the best...",
|
|
flag=added_flag,
|
|
candidates=candidates,
|
|
bug_id=self.currBugState.bug_id
|
|
)
|
|
|
|
matched_ts = [
|
|
element
|
|
for element in candidates
|
|
if added_flag.modified_ts == element.modified_ts
|
|
]
|
|
|
|
matched_req = [
|
|
element
|
|
for element in candidates
|
|
if self.email_alias(added_flag["modified_by"]) == self.email_alias(element["requestee"])
|
|
]
|
|
|
|
if not matched_ts and not matched_req:
|
|
# No matching candidate. Totally new flag.
|
|
target.flags.append(added_flag)
|
|
continue
|
|
elif len(matched_ts) == 1 or (not matched_req and matched_ts):
|
|
chosen_one = matched_ts[0]
|
|
if DEBUG_FLAG_MATCHES:
|
|
Log.note(
|
|
"[Bug {{bug_id}}]: Matching on modified_ts:\n{{best|indent}}",
|
|
bug_id=self.currBugState.bug_id,
|
|
best=chosen_one
|
|
)
|
|
elif not matched_ts and matched_req:
|
|
chosen_one = matched_req[0] #PICK ANY
|
|
if DEBUG_FLAG_MATCHES:
|
|
Log.note(
|
|
"[Bug {{bug_id}}]: Matching on requestee",
|
|
bug_id=self.currBugState.bug_id,
|
|
best=chosen_one
|
|
)
|
|
else:
|
|
matched_both = [
|
|
element
|
|
for element in candidates
|
|
if added_flag.modified_ts == element.modified_ts and self.email_alias(added_flag["modified_by"]) == self.email_alias(element["requestee"])
|
|
]
|
|
|
|
if matched_both:
|
|
if DEBUG_FLAG_MATCHES:
|
|
Log.note("[Bug {{bug_id}}]: Matching on modified_ts and requestee fixed it", bug_id=self.currBugState.bug_id)
|
|
chosen_one = matched_both[0] #PICK ANY
|
|
else:
|
|
if DEBUG_FLAG_MATCHES:
|
|
Log.note("[Bug {{bug_id}}]: Matching on modified_ts fixed it", bug_id=self.currBugState.bug_id)
|
|
chosen_one = matched_ts[0]
|
|
else:
|
|
# Obvious case - matched exactly one.
|
|
if DEBUG_STATUS:
|
|
Log.note(
|
|
"[Bug {{bug_id}}]: Matched added flag {{added}} to removed flag {{removed}}",
|
|
added=added_flag,
|
|
removed=chosen_one,
|
|
bug_id=self.currBugState.bug_id
|
|
)
|
|
|
|
if chosen_one != None:
|
|
for f in ["value", "request_status", "requestee"]:
|
|
chosen_one[f] = coalesce(added_flag[f], chosen_one[f])
|
|
|
|
# We need to avoid later adding this flag twice, since we rolled an add into a delete.
|
|
|
|
|
|
def setPrevious(self, dest, field_name, previous_value, change_ts):
|
|
if dest["previous_values"] == None:
|
|
dest["previous_values"] = {}
|
|
pv = dest["previous_values"]
|
|
|
|
if USE_PREVIOUS_VALUE_OBJECTS:
|
|
prev_field_name = field_name + ".value"
|
|
caField = field_name + ".end_time"
|
|
ctField = field_name + ".start_time"
|
|
ddField = Null
|
|
else:
|
|
prev_field_name = field_name + "_value"
|
|
caField = field_name + "_change_away_ts"
|
|
ctField = field_name + "_change_to_ts"
|
|
ddField = field_name + "_duration_days"
|
|
|
|
pv[prev_field_name] = previous_value
|
|
# If we have a previous change for this field, then use the
|
|
# change-away time as the new change-to time.
|
|
if pv[caField] != None:
|
|
pv[ctField] = pv[caField]
|
|
else:
|
|
# Otherwise, this is the first change for this field, so
|
|
# use the creation timestamp.
|
|
pv[ctField] = dest["created_ts"]
|
|
|
|
pv[caField] = change_ts
|
|
try:
|
|
duration_ms = pv[caField] - pv[ctField]
|
|
pv[ddField] = math.floor(duration_ms / (1000.0 * 60 * 60 * 24))
|
|
except Exception as e:
|
|
Log.error("", e)
|
|
|
|
def addValues(self, total, add, valueType, field_name, target):
|
|
if not add:
|
|
return total
|
|
# Log.note("[Bug {{bug_id}}]: Adding " + valueType + " " + fieldName + " values:" + value2json(someValues))
|
|
if field_name == "flags":
|
|
Log.error("use processFlags")
|
|
else:
|
|
diff = add - total
|
|
removed = total & add
|
|
|
|
#WE CAN NOT REMOVE VALUES WE KNOW TO BE THERE AFTER
|
|
if removed and (field_name != 'cc' or DEBUG_CC_CHANGES) and field_name not in KNOWN_MISSING_KEYWORDS:
|
|
Log.note(
|
|
"[Bug {{bug_id}}]: PROBLEM: Found {{type}} {{field_name}} value: (Removing {{removed}} can not result in {{existing}})",
|
|
bug_id= target.bug_id,
|
|
type=valueType,
|
|
field_name=field_name,
|
|
removed=removed,
|
|
existing=target[field_name]
|
|
)
|
|
|
|
if valueType != "added" and diff:
|
|
self.currActivity.changes.append({
|
|
"field_name": field_name,
|
|
"new_value": set(),
|
|
"old_value": diff,
|
|
"attach_id": target.attach_id
|
|
})
|
|
|
|
return total | add
|
|
|
|
|
|
def removeValues(self, total, remove, valueType, field_name, arrayDesc, target):
|
|
if field_name == "flags":
|
|
Log.error("use processFlags")
|
|
elif field_name == "cc":
|
|
# MAP CANONICAL TO EXISTING (BETWEEN map_* AND self.email_aliases WE HAVE A BIJECTION)
|
|
map_total = inverse({t: self.email_alias(t) for t in total})
|
|
map_remove = inverse({r: self.email_alias(r) for r in remove})
|
|
# CANONICAL VALUES
|
|
c_total = set(map_total.keys())
|
|
c_remove = set(map_remove.keys())
|
|
|
|
removed = c_total & c_remove
|
|
diff = c_remove - c_total
|
|
output = c_total - c_remove
|
|
|
|
if not target.uncertain:
|
|
if diff and DEBUG_CC_CHANGES:
|
|
Log.note("[Bug {{bug_id}}]: PROBLEM: Unable to find CC:\n{{missing|indent}}\nnot in:\n{{existing|indent}}\ncurrent alias info:\n{{candidates|indent}}", {
|
|
"type": valueType,
|
|
"object": arrayDesc,
|
|
"field_name": field_name,
|
|
"missing": jx.sort(jx.map2set(diff, map_remove)),
|
|
"existing": jx.sort(total),
|
|
"candidates": {d: self.email_aliases.get(d, None) for d in diff},
|
|
"bug_id": self.currBugID
|
|
})
|
|
else:
|
|
# PATTERN MATCH EMAIL ADDRESSES
|
|
# self.cc_list_ok = False
|
|
for lost in diff:
|
|
best_score = 0.3
|
|
best = Null
|
|
for found in output:
|
|
score = MIN([
|
|
strings.edit_distance(found, lost),
|
|
strings.edit_distance(found.split("@")[0], lost.split("@")[0]),
|
|
strings.edit_distance(map_total[found][0], lost),
|
|
strings.edit_distance(map_total[found][0].split("@")[0], lost.split("@")[0])
|
|
])
|
|
if score < best_score:
|
|
# best_score=score
|
|
best = found
|
|
|
|
if best != Null:
|
|
if DEBUG_CC_CHANGES:
|
|
Log.note("[Bug {{bug_id}}]: UNCERTAIN ALIAS FOUND: {{lost}} == {{found}}", {
|
|
"lost": lost,
|
|
"found": best,
|
|
"bug_id": self.currBugID
|
|
})
|
|
#DO NOT SAVE THE ALIAS, IT MAY BE WRONG
|
|
removed.add(best)
|
|
output.discard(best)
|
|
elif DEBUG_CC_CHANGES:
|
|
Log.note("[Bug {{bug_id}}]: PROBLEM Unable to pattern match {{type}} value: {{object}}.{{field_name}}: ({{missing}}" + " not in : {{existing}})", {
|
|
"type": valueType,
|
|
"object": arrayDesc,
|
|
"field_name": field_name,
|
|
"missing": lost,
|
|
"existing": total,
|
|
"bug_id": self.currBugID
|
|
})
|
|
|
|
if valueType == "added":
|
|
# DURING WALK BACK IN TIME, WE POPULATE THE changes
|
|
try:
|
|
if removed - set(map_total.keys()):
|
|
Log.error("problem with alias finding:\n" +
|
|
"map_total={{map_total}}\n" +
|
|
"map_remove={{map_remove}}\n" +
|
|
"c_total={{c_total}}\n" +
|
|
"c_remove={{c_remove}}\n" +
|
|
"removed={{removed}}\n" +
|
|
"diff={{diff}}\n" +
|
|
"output={{output}}\n", {
|
|
"map_total": map_total,
|
|
"c_total": c_total,
|
|
"map_remove": map_remove,
|
|
"c_remove": c_remove,
|
|
"removed": removed,
|
|
"diff": diff,
|
|
"output": output
|
|
})
|
|
final_removed = jx.map2set(removed, map_total)
|
|
if final_removed:
|
|
self.currActivity.changes.append({
|
|
"field_name": field_name,
|
|
"new_value": final_removed,
|
|
"old_value": set(),
|
|
"attach_id": target.attach_id
|
|
})
|
|
except Exception as email:
|
|
Log.error("issues", email)
|
|
|
|
return jx.map2set(output, map_total)
|
|
else:
|
|
removed = total & remove
|
|
diff = remove - total
|
|
output = total - remove
|
|
|
|
if valueType == "added" and removed:
|
|
self.currActivity.changes.append({
|
|
"field_name": field_name,
|
|
"new_value": removed,
|
|
"old_value": set(),
|
|
"attach_id": target.attach_id
|
|
})
|
|
|
|
if diff and field_name not in ['blocked', 'dependson']: # HAPPENS BECAUSE OF MISSING PRIVATE BUGS
|
|
Log.note("[Bug {{bug_id}}]: PROBLEM Unable to find {{type}} value in {{object}}.{{field_name}}: (All {{missing}}" + " not in : {{existing}})", {
|
|
"bug_id": target.bug_id,
|
|
"type": valueType,
|
|
"object": arrayDesc,
|
|
"field_name": field_name,
|
|
"missing": diff,
|
|
"existing": total
|
|
})
|
|
if field_name == "keywords":
|
|
KNOWN_MISSING_KEYWORDS.update(diff)
|
|
|
|
return output
|
|
|
|
def processFlags(self, total, old_values, new_values, modified_ts, modified_by, target_type, target):
|
|
added_values = [] #FOR SOME REASON, REMOVAL BY OBJECT DOES NOT WORK, SO WE USE THIS LIST OF STRING VALUES
|
|
for v in new_values:
|
|
flag = parse_flag(v, modified_ts, modified_by)
|
|
|
|
if flag.request_type == None:
|
|
Log.note("[Bug {{bug_id}}]: PROBLEM Unable to parse flag {{flag}} (caused by 255 char limit?)", {
|
|
"flag": convert.value2quote(flag.value),
|
|
"bug_id": self.currBugID
|
|
})
|
|
continue
|
|
|
|
found = self.findFlag(total, flag)
|
|
if found:
|
|
before=len(total)
|
|
total.remove(found)
|
|
after = len(total)
|
|
if before != after+1:
|
|
Log.error("")
|
|
# total = wrap([unwrap(a) for a in total if tuple(a.items()) != tuple(found.items())]) # COMPARE DICTS
|
|
added_values.append(flag)
|
|
else:
|
|
Log.note(
|
|
"[Bug {{bug_id}}]: PROBLEM Unable to find {{type}} FLAG: {{object}}.{{field_name}}: (All {{missing}}" + " not in : {{existing}})",
|
|
type=target_type,
|
|
object=coalesce(target.attach_id, target.bug_id),
|
|
field_name="flags",
|
|
missing=v,
|
|
existing=total,
|
|
bug_id=self.currBugID
|
|
)
|
|
|
|
if added_values:
|
|
self.currActivity.changes.append({
|
|
"field_name": "flags",
|
|
"new_value": added_values,
|
|
"old_value": [],
|
|
"attach_id": target.attach_id
|
|
})
|
|
|
|
if old_values:
|
|
removed_values = [
|
|
parse_flag(v, modified_ts, modified_by)
|
|
for v in old_values
|
|
]
|
|
total.extend(removed_values)
|
|
|
|
self.currActivity.changes.append({
|
|
"field_name": "flags",
|
|
"new_value": [],
|
|
"old_value": removed_values,
|
|
"attach_id": target.attach_id
|
|
})
|
|
|
|
return total
|
|
|
|
def canonical(self, field, value):
|
|
try:
|
|
if value in NULL_VALUES:
|
|
return None
|
|
elif field in EMAIL_FIELDS:
|
|
return self.email_alias(value)
|
|
elif field in TIME_FIELDS:
|
|
value = long(Date(value).unix) * 1000
|
|
elif field in NUMERIC_FIELDS:
|
|
value = value2number(value)
|
|
|
|
# candidates = FIELDS_CHANGED[field][literal_field(str(value))]
|
|
# if candidates == None:
|
|
# return value
|
|
# elif len(candidates) == 1:
|
|
# return candidates[0]
|
|
# else:
|
|
return value
|
|
except Exception:
|
|
return value
|
|
|
|
|
|
def email_alias(self, name):
|
|
return self.alias_analyzer.get_canonical(name)
|
|
|
|
|
|
def parse_flag(flag, modified_ts, modified_by):
|
|
flagParts = Data(
|
|
modified_ts=modified_ts,
|
|
modified_by=modified_by,
|
|
value=flag
|
|
)
|
|
|
|
matches = FLAG_PATTERN.match(flag)
|
|
if matches:
|
|
flagParts.request_type = matches.group(1)
|
|
flagParts.request_status = matches.group(2)
|
|
if matches.start(3) != -1 and len(matches.group(3)) > 2:
|
|
flagParts.requestee = matches.group(3)[1:-1]
|
|
|
|
return flagParts
|
|
|
|
|
|
def parseMultiField(name, value):
|
|
if name == "flags":
|
|
if value == None:
|
|
return []
|
|
else:
|
|
return list(s.strip() for s in value.split(",") if s.strip() != "")
|
|
elif value == None:
|
|
return set()
|
|
elif isinstance(value, (list, set)):
|
|
Log.error("do not parse lists")
|
|
elif name in MULTI_FIELDS:
|
|
if name in NUMERIC_FIELDS:
|
|
return set(int(s.strip()) for s in value.split(",") if s.strip() != "")
|
|
else:
|
|
return set(s.strip() for s in value.split(",") if s.strip() != "")
|
|
|
|
return {value}
|
|
|
|
|
|
def deformat(value):
|
|
if value == None:
|
|
Log.error("not expected")
|
|
return value.lower().replace(u"\u2011", u"-")
|
|
|
|
|
|
def is_null(value):
|
|
if value == None:
|
|
return True
|
|
if isinstance(value, (set, list)):
|
|
return len(value)==0
|
|
return False
|
|
|
|
|
|
class ApplyDiff(object):
|
|
|
|
def __init__(self, bug_id, timestamp, text, diff, reverse=None):
|
|
"""
|
|
THE BUGZILLA DIFF IS ACROSS MULTIPLE RECORDS, THEY MUST BE APPENDED TO MAKE THE DIFF
|
|
:param timestamp: DATABASE bug_activity TIMESTAMP THAT WILL BE THE SAME FOR ALL IN A HUNK
|
|
:param text: THE ORIGINAL TEXT (OR A PROMISE OF TEXT)
|
|
:param diff: THE PARTITAL DIFF
|
|
:param reverse: DIRECTION TO APPLY THE DIFF
|
|
:return: A PROMISE TO RETURN THE diff APPLIED TO THE text
|
|
"""
|
|
self.bug_id = bug_id
|
|
self.timestamp = timestamp
|
|
self._text = coalesce(text, "")
|
|
self._diff = diff
|
|
self.reverse = reverse
|
|
self.parent = None
|
|
self.result = None
|
|
|
|
if isinstance(text, ApplyDiff):
|
|
if text.timestamp != timestamp:
|
|
# DIFFERNT DIFF
|
|
self._text = text_type(text) # ACTUALIZE THE EFFECTS OF THE OTHER DIFF
|
|
else:
|
|
# CHAIN THE DIFF
|
|
text.parent = self
|
|
text.parent.result = None # JUST IN CASE THIS HAS BEEN ACTUALIZED
|
|
|
|
@property
|
|
def text(self):
|
|
if isinstance(self._text, ApplyDiff):
|
|
return self._text.text
|
|
else:
|
|
return self._text
|
|
|
|
@property
|
|
def diff(self):
|
|
# WHEN GOING BACKWARDS IN TIME, THE DIFF WILL ARRIVE IN REVERSE ORDER
|
|
# LUCKY THAT THE STACK OF DiffApply REVERSES THE REVERSE ORDER
|
|
if isinstance(self._text, ApplyDiff):
|
|
return self._diff + self._text.diff
|
|
else:
|
|
return self._diff
|
|
|
|
def __data__(self):
|
|
output = text_type(self)
|
|
return output if output else None
|
|
|
|
def __gt__(self, other):
|
|
return text_type(self)>other
|
|
|
|
def __lt__(self, other):
|
|
return text_type(self)<other
|
|
|
|
def __eq__(self, other):
|
|
if other == None:
|
|
return False # DO NOT ACTUALIZE
|
|
try:
|
|
return text_type(self)==other
|
|
except Exception as e:
|
|
e = Except.wrap(e)
|
|
text_type(self)
|
|
|
|
def __unicode__(self):
|
|
if self.parent:
|
|
return text_type(self.parent)
|
|
|
|
text = self.text
|
|
diff = self.diff
|
|
if self.result == None:
|
|
try:
|
|
new_text = apply_diff(coalesce(text, "").split("\n"), diff.split("\n"), reverse=self.reverse, verify=DEBUG_DIFF)
|
|
self.result = "\n".join(new_text)
|
|
except Exception as e:
|
|
e = Except.wrap(e)
|
|
self.result = "<ERROR>"
|
|
Log.warning("problem applying diff for bug {{bug}}", bug=self.bug_id, cause=e)
|
|
|
|
return self.result
|
|
|
|
if PY2:
|
|
def __str__(self):
|
|
self.__unicode__().encode('utf8')
|
|
else:
|
|
__str__ = __unicode__
|
|
|
|
|
|
class LongField(object):
|
|
|
|
def __init__(self, bug_id, timestamp, next_value, text):
|
|
"""
|
|
THE BUGZILLA LONG FIELDS ARE ACROSS MULTIPLE RECORDS, THEY MUST BE APPENDED
|
|
:param timestamp: DATABASE bug_activity TIMESTAMP THAT WILL BE THE SAME FOR ALL IN A HUNK
|
|
:param next_value: THE ORIGINAL TEXT (OR A PROMISE OF TEXT)
|
|
:param text: THE PARTITAL CONTENT
|
|
:return: A PROMISE TO RETURN THE FULL TEXT
|
|
"""
|
|
self.bug_id = bug_id
|
|
self.timestamp = timestamp
|
|
self.value = text
|
|
self.prev_value = None
|
|
self.next_value = None
|
|
|
|
if isinstance(next_value, LongField) and next_value.timestamp == timestamp:
|
|
# CHAIN THE DIFF
|
|
self.next_value = next_value
|
|
next_value.prev_value = self
|
|
|
|
@property
|
|
def text(self):
|
|
# WHEN GOING BACKWARDS IN TIME, THE DIFF WILL ARRIVE IN REVERSE ORDER
|
|
# LUCKY THAT THE STACK OF DiffApply REVERSES THE REVERSE ORDER
|
|
if self.next_value is not None:
|
|
return self.value + self.next_value.text
|
|
else:
|
|
return self.value
|
|
|
|
def __data__(self):
|
|
return text_type(self)
|
|
|
|
def __gt__(self, other):
|
|
return text_type(self) > text_type(other)
|
|
|
|
def __lt__(self, other):
|
|
return text_type(self) < text_type(other)
|
|
|
|
def __eq__(self, other):
|
|
if other == None:
|
|
return False # DO NOT ACTUALIZE
|
|
return text_type(self) == text_type(other)
|
|
|
|
def __str__(self):
|
|
if self.prev_value:
|
|
return str(self.prev_value)
|
|
return self.value
|
|
|
|
def __unicode__(self):
|
|
if self.prev_value:
|
|
return text_type(self.prev_value)
|
|
return self.value
|
|
|
|
|
|
|
|
# ENSURE WE REGISTER THIS PROMISE AS A STRING
|
|
meta_columns._merge_order['ApplyDiff'] = 6
|
|
meta_columns._merge_order['LongField'] = 6
|
|
python_type_to_json_type[ApplyDiff] = STRING
|
|
python_type_to_json_type[LongField] = STRING
|
|
python_type_to_json_type['ApplyDiff'] = STRING
|
|
python_type_to_json_type['LongField'] = STRING
|