Bug 1321787 - Autoclassify through the TextLogError rather than through the FailureLine (#2179)

This commit is contained in:
jgraham 2017-03-04 20:58:31 +00:00 коммит произвёл GitHub
Родитель 76fc09fd1e
Коммит 3c03bb9fba
17 изменённых файлов: 1333 добавлений и 331 удалений

Просмотреть файл

@ -1,23 +1,19 @@
from datetime import (datetime,
timedelta)
from treeherder.autoclassify.detectors import (ManualDetector,
TestFailureDetector)
from treeherder.autoclassify.matchers import (CrashSignatureMatcher,
ElasticSearchTestMatcher,
PreciseTestMatcher,
time_window)
PreciseTestMatcher)
from treeherder.autoclassify.tasks import autoclassify
from treeherder.model.models import (BugJobMap,
ClassifiedFailure,
FailureMatch,
JobNote,
TextLogError,
TextLogErrorMatch,
TextLogErrorMetadata)
from .utils import (crash_line,
create_failure_lines,
create_text_log_errors,
create_lines,
log_line,
register_detectors,
register_matchers,
@ -37,16 +33,6 @@ def do_autoclassify(job, test_failure_lines, matchers, status="testfailed"):
item.refresh_from_db()
def create_lines(test_job, lines):
error_lines = create_text_log_errors(test_job, lines)
failure_lines = create_failure_lines(test_job, lines)
for error_line, failure_line in zip(error_lines, failure_lines):
TextLogErrorMetadata.objects.create(text_log_error=error_line,
failure_line=failure_line)
return error_lines, failure_lines
def test_classify_test_failure(text_log_errors_failure_lines,
classified_failures,
test_job_2):
@ -120,7 +106,7 @@ def test_autoclassify_update_job_classification(failure_lines, classified_failur
def test_autoclassify_no_update_job_classification(test_job, test_job_2,
failure_lines,
text_log_errors_failure_lines,
classified_failures):
lines = [(test_line, {})]
@ -134,13 +120,18 @@ def test_autoclassify_no_update_job_classification(test_job, test_job_2,
assert JobNote.objects.filter(job=test_job_2).count() == 0
def test_autoclassified_after_manual_classification(test_user, test_job_2,
failure_lines, failure_classifications):
def test_autoclassified_after_manual_classification(test_user,
test_job_2,
text_log_errors_failure_lines,
failure_classifications):
register_detectors(ManualDetector, TestFailureDetector)
lines = [(test_line, {})]
test_error_lines, test_failure_lines = create_lines(test_job_2, lines)
BugJobMap.objects.create(job=test_job_2,
bug_id=1234,
user=test_user)
JobNote.objects.create(job=test_job_2,
failure_classification_id=4,
user=test_user,
@ -186,9 +177,9 @@ def test_autoclassified_no_update_after_manual_classification_2(test_user, test_
register_detectors(ManualDetector, TestFailureDetector)
# Too many failure lines
test_failure_lines = create_failure_lines(test_job_2,
[(log_line, {}),
(test_line, {"subtest": "subtest2"})])
test_error_lines, test_failure_lines = create_lines(test_job_2,
[(log_line, {}),
(test_line, {"subtest": "subtest2"})])
JobNote.objects.create(job=test_job_2,
failure_classification_id=4,
@ -214,9 +205,9 @@ def test_classify_skip_ignore(test_job_2,
failure_lines[1].best_classification = None
failure_lines[1].save()
test_failure_lines = create_failure_lines(test_job_2,
[(test_line, {}),
(test_line, {"subtest": "subtest2"})])
test_error_lines, test_failure_lines = create_lines(test_job_2,
[(test_line, {}),
(test_line, {"subtest": "subtest2"})])
do_autoclassify(test_job_2, test_failure_lines, [PreciseTestMatcher])
@ -231,14 +222,14 @@ def test_classify_skip_ignore(test_job_2,
def test_classify_es(test_job_2, failure_lines, classified_failures):
test_failure_lines = create_failure_lines(test_job_2,
[(test_line, {}),
(test_line, {"message": "message2"}),
(test_line, {"message": "message 1.2"}),
(test_line, {"message": "message 0x1F"}),
(test_line, {"subtest": "subtest3"}),
(test_line, {"status": "TIMEOUT"}),
(test_line, {"expected": "ERROR"})])
test_error_lines, test_failure_lines = create_lines(test_job_2,
[(test_line, {}),
(test_line, {"message": "message2"}),
(test_line, {"message": "message 1.2"}),
(test_line, {"message": "message 0x1F"}),
(test_line, {"subtest": "subtest3"}),
(test_line, {"status": "TIMEOUT"}),
(test_line, {"expected": "ERROR"})])
do_autoclassify(test_job_2, test_failure_lines, [ElasticSearchTestMatcher])
@ -253,9 +244,9 @@ def test_classify_es(test_job_2, failure_lines, classified_failures):
def test_classify_multiple(test_job_2, failure_lines, classified_failures):
test_failure_lines = create_failure_lines(test_job_2,
[(test_line, {}),
(test_line, {"message": "message 1.2"})])
test_error_lines, test_failure_lines = create_lines(test_job_2,
[(test_line, {}),
(test_line, {"message": "message 1.2"})])
expected_classified_precise = [test_failure_lines[0]]
expected_classified_fuzzy = [test_failure_lines[1]]
@ -273,20 +264,24 @@ def test_classify_multiple(test_job_2, failure_lines, classified_failures):
def test_classify_crash(test_repository, test_job, test_job_2, test_matcher):
failure_lines_ref = create_failure_lines(test_job,
[(crash_line, {})])
error_lines_ref, failure_lines_ref = create_lines(test_job,
[(crash_line, {})])
failure_lines = create_failure_lines(test_job_2,
[(crash_line, {}),
(crash_line, {"test": "test1"}),
(crash_line, {"signature": "signature1"}),
(crash_line, {"signature": None})])
error_lines, failure_lines = create_lines(test_job_2,
[(crash_line, {}),
(crash_line, {"test": "test1"}),
(crash_line, {"signature": "signature1"}),
(crash_line, {"signature": None})])
classified_failure = ClassifiedFailure.objects.create()
FailureMatch.objects.create(failure_line=failure_lines_ref[0],
classified_failure=classified_failure,
matcher=test_matcher.db_object,
score=1.0)
TextLogErrorMatch.objects.create(text_log_error=error_lines_ref[0],
classified_failure=classified_failure,
matcher=test_matcher.db_object,
score=1.0)
do_autoclassify(test_job_2, failure_lines, [CrashSignatureMatcher])
expected_classified = failure_lines[0:2]
@ -297,22 +292,3 @@ def test_classify_crash(test_repository, test_job, test_job_2, test_matcher):
for item in expected_unclassified:
assert item.classified_failures.count() == 0
def test_classify_test_failure_window(failure_lines, classified_failures):
failure_lines[0].created = datetime.now() - timedelta(days=2)
failure_lines[0].save()
failure_matches = FailureMatch.objects.all()
failure_matches[1].score = 0.5
failure_matches[1].save()
best_match = time_window(FailureMatch.objects.all(), timedelta(days=1), 0,
lambda x: x.score)
assert best_match == failure_matches[1]
best_match = time_window(FailureMatch.objects.all(), timedelta(days=1), None,
lambda x: x.score)
assert best_match == failure_matches[1]

Просмотреть файл

@ -3,15 +3,31 @@ import datetime
from mozlog.formatters.tbplformatter import TbplFormatter
from treeherder.model.models import (FailureLine,
Job,
MatcherManager,
TextLogError,
TextLogErrorMetadata,
TextLogStep)
from treeherder.model.search import refresh_all
test_line = {"action": "test_result", "test": "test1", "subtest": "subtest1",
"status": "FAIL", "expected": "PASS", "message": "message1"}
log_line = {"action": "log", "level": "ERROR", "message": "message1"}
crash_line = {"action": "crash", "signature": "signature"}
crash_line = {"action": "crash", "signature": "signature", "test": "test1"}
def create_lines(test_job, lines):
error_lines = create_text_log_errors(test_job, lines)
failure_lines = create_failure_lines(test_job, lines)
for error_line, failure_line in zip(error_lines, failure_lines):
TextLogErrorMetadata.objects.create(text_log_error=error_line,
failure_line=failure_line)
test_job.autoclassify_status = Job.CROSSREFERENCED
test_job.save()
return error_lines, failure_lines
def create_failure_lines(job, failure_line_list,
@ -46,6 +62,8 @@ def get_data(base_data, updates):
elif data["action"] == "log":
if data["level"] not in ("ERROR", "CRITICAL"):
return
elif data["action"] == "crash":
pass
else:
return
return data

Просмотреть файл

@ -52,13 +52,12 @@ def test_update_autoclassification_bug(test_job, test_job_2,
# Job 1 has two failure lines so nothing should be updated
assert test_job.update_autoclassification_bug(1234) is None
failure_lines = create_failure_lines(test_job_2,
[(test_line, {})])
failure_lines[0].best_classification = classified_failures[0]
failure_lines[0].save()
classified_failures[0].bug_number = None
lines = [(item, {}) for item in FailureLine.objects.filter(job_guid=test_job_2.guid).values()]
create_text_log_errors(test_job_2, lines)
lines = [(test_line, {})]
create_failure_lines(test_job_2, lines)
error_lines = create_text_log_errors(test_job_2, lines)
error_lines[0].mark_best_classification(classified_failures[0])
assert classified_failures[0].bug_number is None
assert test_job_2.update_autoclassification_bug(1234) == classified_failures[0]
classified_failures[0].refresh_from_db()

Просмотреть файл

@ -433,7 +433,9 @@ def test_text_log_steps_and_errors(webapp, test_job):
'search_terms': ['failure 1'],
'bugs': {'open_recent': [], 'all_others': []}
},
'metadata': None
'metadata': None,
'matches': [],
'classified_failures': []
},
{
'id': 2,
@ -444,7 +446,9 @@ def test_text_log_steps_and_errors(webapp, test_job):
'search_terms': ['failure 2'],
'bugs': {'open_recent': [], 'all_others': []}
},
'metadata': None
'metadata': None,
'matches': [],
'classified_failures': []
}
],
'finished': '1970-01-01T00:03:20',
@ -494,7 +498,9 @@ def test_text_log_errors(webapp, test_job):
'search_terms': ['failure 1'],
'bugs': {'open_recent': [], 'all_others': []}
},
'metadata': None
'metadata': None,
'matches': [],
'classified_failures': []
},
{
'id': 2,
@ -505,7 +511,9 @@ def test_text_log_errors(webapp, test_job):
'search_terms': ['failure 2'],
'bugs': {'open_recent': [], 'all_others': []}
},
'metadata': None
'metadata': None,
'matches': [],
'classified_failures': []
}
]

Просмотреть файл

@ -0,0 +1,628 @@
from django.core.urlresolvers import reverse
from rest_framework.test import APIClient
from tests.autoclassify.utils import (create_failure_lines,
create_text_log_errors,
test_line)
from treeherder.autoclassify.detectors import ManualDetector
from treeherder.model.models import (BugJobMap,
ClassifiedFailure,
FailureLine,
Job,
JobNote,
Matcher,
MatcherManager,
TextLogError,
TextLogErrorMetadata)
from treeherder.model.search import TestFailureLine
def test_get_error(text_log_errors_failure_lines):
"""
test getting a single failure line
"""
text_log_errors, failure_lines = text_log_errors_failure_lines
client = APIClient()
resp = client.get(
reverse("text-log-error-detail", kwargs={"pk": text_log_errors[0].id}))
assert resp.status_code == 200
data = resp.json()
assert isinstance(data, object)
exp_error_keys = ["id", "line", "line_number", "matches",
"classified_failures", "bug_suggestions", "metadata"]
assert set(data.keys()) == set(exp_error_keys)
exp_meta_keys = ["text_log_error", "failure_line", "best_classification",
"best_is_verified"]
assert set(data["metadata"].keys()) == set(exp_meta_keys)
def test_update_error_verify(test_repository,
text_log_errors_failure_lines,
classified_failures,
test_user):
text_log_errors, failure_lines = text_log_errors_failure_lines
client = APIClient()
client.force_authenticate(user=test_user)
failure_line = failure_lines[0]
error_line = text_log_errors[0]
assert failure_line.best_classification == classified_failures[0]
assert failure_line.best_is_verified is False
assert error_line.metadata.failure_line == failure_line
assert error_line.metadata.best_classification == classified_failures[0]
assert error_line.metadata.best_is_verified is False
body = {"best_classification": classified_failures[0].id}
resp = client.put(
reverse("text-log-error-detail", kwargs={"pk": error_line.id}),
body, format="json")
assert resp.status_code == 200
failure_line.refresh_from_db()
error_line.metadata.refresh_from_db()
assert failure_line.best_classification == classified_failures[0]
assert failure_line.best_is_verified
assert error_line.metadata.best_classification == classified_failures[0]
assert error_line.metadata.best_is_verified
es_line = TestFailureLine.get(failure_line.id, routing=failure_line.test)
assert es_line.best_classification == classified_failures[0].id
assert es_line.best_is_verified
def test_update_error_replace(test_repository,
text_log_errors_failure_lines,
classified_failures,
test_user):
MatcherManager.register_detector(ManualDetector)
client = APIClient()
client.force_authenticate(user=test_user)
text_log_errors, failure_lines = text_log_errors_failure_lines
failure_line = failure_lines[0]
error_line = text_log_errors[0]
assert failure_line.best_classification == classified_failures[0]
assert failure_line.best_is_verified is False
assert error_line.metadata.failure_line == failure_line
assert error_line.metadata.best_classification == classified_failures[0]
assert error_line.metadata.best_is_verified is False
body = {"best_classification": classified_failures[1].id}
resp = client.put(
reverse("text-log-error-detail", kwargs={"pk": error_line.id}),
body, format="json")
assert resp.status_code == 200
failure_line.refresh_from_db()
error_line.metadata.refresh_from_db()
assert failure_line.best_classification == classified_failures[1]
assert failure_line.best_is_verified
assert len(failure_line.classified_failures.all()) == 2
assert error_line.metadata.failure_line == failure_line
assert error_line.metadata.best_classification == classified_failures[1]
assert error_line.metadata.best_is_verified
expected_matcher = Matcher.objects.get(name="ManualDetector")
assert failure_line.matches.get(classified_failure_id=classified_failures[1].id).matcher == expected_matcher
assert error_line.matches.get(classified_failure_id=classified_failures[1].id).matcher == expected_matcher
def test_update_error_mark_job(test_job,
text_log_errors_failure_lines,
classified_failures,
test_user):
text_log_errors, failure_lines = text_log_errors_failure_lines
MatcherManager.register_detector(ManualDetector)
client = APIClient()
client.force_authenticate(user=test_user)
classified_failures[1].bug_number = 1234
classified_failures[1].save()
for text_log_error, failure_line in zip(text_log_errors, failure_lines):
assert failure_line.best_is_verified is False
assert text_log_error.metadata.best_is_verified is False
body = {"best_classification": classified_failures[1].id}
resp = client.put(reverse("text-log-error-detail", kwargs={"pk": text_log_error.id}),
body, format="json")
assert resp.status_code == 200
failure_line.refresh_from_db()
text_log_error.metadata.refresh_from_db()
assert failure_line.best_classification == classified_failures[1]
assert failure_line.best_is_verified
assert text_log_error.metadata.best_classification == classified_failures[1]
assert text_log_error.metadata.best_is_verified
assert test_job.is_fully_verified()
# should only be one, will assert if that isn't the case
note = JobNote.objects.get(job=test_job)
assert note.failure_classification.id == 4
assert note.user == test_user
job_bugs = BugJobMap.objects.filter(job=test_job)
assert job_bugs.count() == 1
assert job_bugs[0].bug_id == 1234
def test_update_error_mark_job_with_human_note(test_job,
text_log_errors_failure_lines,
classified_failures, test_user):
text_log_errors, failure_lines = text_log_errors_failure_lines
MatcherManager.register_detector(ManualDetector)
client = APIClient()
client.force_authenticate(user=test_user)
JobNote.objects.create(job=test_job,
failure_classification_id=4,
user=test_user,
text="note")
for error_line in text_log_errors:
body = {"best_classification": classified_failures[1].id}
resp = client.put(reverse("text-log-error-detail", kwargs={"pk": error_line.id}),
body, format="json")
assert resp.status_code == 200
assert test_job.is_fully_verified()
# should only be one, will assert if that isn't the case
note = JobNote.objects.get(job=test_job)
assert note.failure_classification.id == 4
assert note.user == test_user
def test_update_error_line_mark_job_with_auto_note(test_job,
mock_autoclassify_jobs_true,
text_log_errors_failure_lines,
classified_failures,
test_user):
text_log_errors, failure_lines = text_log_errors_failure_lines
MatcherManager.register_detector(ManualDetector)
client = APIClient()
client.force_authenticate(user=test_user)
JobNote.objects.create(job=test_job,
failure_classification_id=7,
text="note")
for text_log_error in text_log_errors:
body = {"best_classification": classified_failures[1].id}
resp = client.put(reverse("text-log-error-detail", kwargs={"pk": text_log_error.id}),
body, format="json")
assert resp.status_code == 200
assert test_job.is_fully_verified()
notes = JobNote.objects.filter(job=test_job).order_by('-created')
assert notes.count() == 2
assert notes[0].failure_classification.id == 4
assert notes[0].user == test_user
assert notes[0].text == ''
assert notes[1].failure_classification.id == 7
assert not notes[1].user
assert notes[1].text == "note"
def test_update_errors(mock_autoclassify_jobs_true,
test_repository,
text_log_errors_failure_lines,
classified_failures,
eleven_jobs_stored,
test_user):
jobs = (Job.objects.get(id=1), Job.objects.get(id=2))
MatcherManager.register_detector(ManualDetector)
client = APIClient()
client.force_authenticate(user=test_user)
lines = [(test_line, {}),
(test_line, {"subtest": "subtest2"})]
new_failure_lines = create_failure_lines(jobs[1], lines)
new_text_log_errors = create_text_log_errors(jobs[1], lines)
for text_log_error, failure_line in zip(new_text_log_errors,
new_failure_lines):
TextLogErrorMetadata.objects.create(text_log_error=text_log_error,
failure_line=failure_line)
failure_lines = FailureLine.objects.filter(
job_guid__in=[job.guid for job in jobs]).all()
text_log_errors = TextLogError.objects.filter(
step__job__in=jobs).all()
for text_log_error, failure_line in zip(text_log_errors, failure_lines):
assert text_log_error.metadata.best_is_verified is False
assert failure_line.best_is_verified is False
body = [{"id": failure_line.id,
"best_classification": classified_failures[1].id}
for failure_line in failure_lines]
resp = client.put(reverse("text-log-error-list"), body, format="json")
assert resp.status_code == 200
for text_log_error, failure_line in zip(text_log_errors, failure_lines):
text_log_error.metadata.refresh_from_db()
failure_line.refresh_from_db()
assert failure_line.best_classification == classified_failures[1]
assert failure_line.best_is_verified
assert text_log_error.metadata.best_classification == classified_failures[1]
assert text_log_error.metadata.best_is_verified
for job in jobs:
assert job.is_fully_verified()
# will assert if we don't have exactly one job, which is what we want
note = JobNote.objects.get(job=job)
assert note.failure_classification.id == 4
assert note.user == test_user
def test_update_error_ignore(test_job, text_log_errors_failure_lines,
classified_failures, test_user):
text_log_errors, failure_lines = text_log_errors_failure_lines
client = APIClient()
client.force_authenticate(user=test_user)
MatcherManager.register_detector(ManualDetector)
text_log_error = text_log_errors[0]
failure_line = failure_lines[0]
assert text_log_error.metadata.best_classification == classified_failures[0]
assert text_log_error.metadata.best_is_verified is False
assert failure_line.best_classification == classified_failures[0]
assert failure_line.best_is_verified is False
body = {"project": test_job.repository.name,
"best_classification": None}
resp = client.put(
reverse("text-log-error-detail", kwargs={"pk": text_log_error.id}),
body, format="json")
assert resp.status_code == 200
failure_line.refresh_from_db()
text_log_error.metadata.refresh_from_db()
assert failure_line.best_classification is None
assert failure_line.best_is_verified
assert text_log_error.metadata.best_classification is None
assert text_log_error.metadata.best_is_verified
def test_update_error_all_ignore_mark_job(test_job,
mock_autoclassify_jobs_true,
text_log_errors_failure_lines,
classified_failures,
test_user):
text_log_errors, failure_lines = text_log_errors_failure_lines
MatcherManager.register_detector(ManualDetector)
client = APIClient()
client.force_authenticate(user=test_user)
job_failure_lines = [line for line in failure_lines if
line.job_guid == test_job.guid]
job_text_log_errors = [error for error in text_log_errors if
error.step.job == test_job]
for error_line, failure_line in zip(job_text_log_errors, job_failure_lines):
error_line.best_is_verified = False
error_line.best_classification = None
failure_line.best_is_verified = False
failure_line.best_classification = None
assert JobNote.objects.count() == 0
for error_line, failure_line in zip(job_text_log_errors, job_failure_lines):
assert error_line.metadata.best_is_verified is False
assert failure_line.best_is_verified is False
body = {"best_classification": None}
resp = client.put(reverse("text-log-error-detail", kwargs={"pk": error_line.id}),
body, format="json")
assert resp.status_code == 200
error_line.metadata.refresh_from_db()
failure_line.refresh_from_db()
assert error_line.metadata.best_classification is None
assert error_line.metadata.best_is_verified
assert failure_line.best_classification is None
assert failure_line.best_is_verified
assert test_job.is_fully_verified()
assert JobNote.objects.count() == 1
def test_update_error_partial_ignore_mark_job(test_job,
mock_autoclassify_jobs_true,
text_log_errors_failure_lines,
classified_failures,
test_user):
text_log_errors, failure_lines = text_log_errors_failure_lines
MatcherManager.register_detector(ManualDetector)
client = APIClient()
client.force_authenticate(user=test_user)
for i, (error_line, failure_line) in enumerate(zip(text_log_errors, failure_lines)):
assert error_line.metadata.best_is_verified is False
assert failure_line.best_is_verified is False
body = {"best_classification": None if i == 0 else classified_failures[0].id}
resp = client.put(reverse("text-log-error-detail", kwargs={"pk": error_line.id}),
body, format="json")
assert resp.status_code == 200
error_line.metadata.refresh_from_db()
failure_line.refresh_from_db()
if i == 0:
assert error_line.metadata.best_classification is None
assert failure_line.best_classification is None
else:
assert error_line.metadata.best_classification == classified_failures[0]
assert failure_line.best_classification == classified_failures[0]
assert failure_line.best_is_verified
assert test_job.is_fully_verified()
# will assert if we don't have exactly one note for this job, which is
# what we want
note = JobNote.objects.get(job=test_job)
assert note.failure_classification.id == 4
assert note.user == test_user
def test_update_error_verify_bug(test_repository,
text_log_errors_failure_lines,
classified_failures,
test_user):
MatcherManager.register_detector(ManualDetector)
text_log_errors, failure_lines = text_log_errors_failure_lines
client = APIClient()
client.force_authenticate(user=test_user)
failure_line = failure_lines[0]
error_line = text_log_errors[0]
assert failure_line.best_classification == classified_failures[0]
assert failure_line.best_is_verified is False
assert error_line.metadata.failure_line == failure_line
assert error_line.metadata.best_classification == classified_failures[0]
assert error_line.metadata.best_is_verified is False
classified_failures[0].bug_number = 1234
classified_failures[0].save()
body = {"bug_number": classified_failures[0].bug_number}
resp = client.put(
reverse("text-log-error-detail", kwargs={"pk": error_line.id}),
body, format="json")
assert resp.status_code == 200
failure_line.refresh_from_db()
error_line.metadata.refresh_from_db()
assert failure_line.best_classification == classified_failures[0]
assert failure_line.best_is_verified
assert error_line.metadata.best_classification == classified_failures[0]
assert error_line.metadata.best_is_verified
es_line = TestFailureLine.get(failure_line.id, routing=failure_line.test)
assert es_line.best_classification == classified_failures[0].id
assert es_line.best_is_verified
def test_update_error_verify_new_bug(test_repository,
text_log_errors_failure_lines,
classified_failures,
test_user):
MatcherManager.register_detector(ManualDetector)
text_log_errors, failure_lines = text_log_errors_failure_lines
client = APIClient()
client.force_authenticate(user=test_user)
failure_line = failure_lines[0]
error_line = text_log_errors[0]
assert failure_line.best_classification == classified_failures[0]
assert failure_line.best_is_verified is False
assert error_line.metadata.failure_line == failure_line
assert error_line.metadata.best_classification == classified_failures[0]
assert error_line.metadata.best_is_verified is False
assert 78910 not in [item.bug_number for item in classified_failures]
body = {"bug_number": 78910}
resp = client.put(
reverse("text-log-error-detail", kwargs={"pk": error_line.id}),
body, format="json")
assert resp.status_code == 200
failure_line.refresh_from_db()
error_line.metadata.refresh_from_db()
assert failure_line.best_classification not in classified_failures
assert failure_line.best_classification.bug_number == 78910
assert failure_line.best_is_verified
assert error_line.metadata.best_classification not in classified_failures
assert error_line.metadata.best_is_verified
assert error_line.metadata.best_classification.bug_number == 78910
def test_update_error_verify_ignore_now(test_repository,
text_log_errors_failure_lines,
classified_failures,
test_user):
MatcherManager.register_detector(ManualDetector)
text_log_errors, failure_lines = text_log_errors_failure_lines
client = APIClient()
client.force_authenticate(user=test_user)
failure_line = failure_lines[0]
error_line = text_log_errors[0]
assert failure_line.best_classification == classified_failures[0]
assert failure_line.best_is_verified is False
assert error_line.metadata.failure_line == failure_line
assert error_line.metadata.best_classification == classified_failures[0]
assert error_line.metadata.best_is_verified is False
assert 78910 not in [item.bug_number for item in classified_failures]
body = {}
resp = client.put(
reverse("text-log-error-detail", kwargs={"pk": error_line.id}),
body, format="json")
assert resp.status_code == 200
failure_line.refresh_from_db()
error_line.metadata.refresh_from_db()
assert failure_line.best_classification is None
assert failure_line.best_is_verified
assert error_line.metadata.best_classification is None
assert error_line.metadata.best_is_verified
def test_update_error_change_bug(test_repository,
text_log_errors_failure_lines,
classified_failures,
test_user):
MatcherManager.register_detector(ManualDetector)
text_log_errors, failure_lines = text_log_errors_failure_lines
client = APIClient()
client.force_authenticate(user=test_user)
failure_line = failure_lines[0]
error_line = text_log_errors[0]
assert failure_line.best_classification == classified_failures[0]
assert failure_line.best_is_verified is False
assert error_line.metadata.failure_line == failure_line
assert error_line.metadata.best_classification == classified_failures[0]
assert error_line.metadata.best_is_verified is False
assert 78910 not in [item.bug_number for item in classified_failures]
body = {"best_classification": classified_failures[0].id,
"bug_number": 78910}
resp = client.put(
reverse("text-log-error-detail", kwargs={"pk": error_line.id}),
body, format="json")
assert resp.status_code == 200
classified_failures[0].refresh_from_db()
failure_line.refresh_from_db()
error_line.metadata.refresh_from_db()
assert failure_line.best_classification == classified_failures[0]
assert failure_line.best_classification.bug_number == 78910
assert failure_line.best_is_verified
assert error_line.metadata.best_classification == classified_failures[0]
assert error_line.metadata.best_is_verified
assert error_line.metadata.best_classification.bug_number == 78910
def test_update_error_bug_change_cf(test_repository,
text_log_errors_failure_lines,
classified_failures,
test_user):
MatcherManager.register_detector(ManualDetector)
text_log_errors, failure_lines = text_log_errors_failure_lines
client = APIClient()
client.force_authenticate(user=test_user)
failure_line = failure_lines[0]
error_line = text_log_errors[0]
assert failure_line.best_classification == classified_failures[0]
assert failure_line.best_is_verified is False
assert error_line.metadata.failure_line == failure_line
assert error_line.metadata.best_classification == classified_failures[0]
assert error_line.metadata.best_is_verified is False
assert 78910 not in [item.bug_number for item in classified_failures]
classified_failures[1].bug_number = 78910
classified_failures[1].save()
body = {"best_classification": classified_failures[0].id,
"bug_number": 78910}
resp = client.put(
reverse("text-log-error-detail", kwargs={"pk": error_line.id}),
body, format="json")
assert resp.status_code == 200
classified_failures[1].refresh_from_db()
failure_line.refresh_from_db()
error_line.metadata.refresh_from_db()
assert failure_line.best_classification == classified_failures[1]
assert failure_line.best_classification.bug_number == 78910
assert failure_line.best_is_verified
assert error_line.metadata.best_classification == classified_failures[1]
assert error_line.metadata.best_is_verified
assert error_line.metadata.best_classification.bug_number == 78910
assert ClassifiedFailure.objects.count() == len(classified_failures) - 1

Просмотреть файл

@ -4,6 +4,8 @@ from rest_framework.test import APIClient
from treeherder.model.models import (BugJobMap,
FailureLine,
JobNote,
TextLogError,
TextLogErrorMetadata,
TextLogSummary)
@ -136,6 +138,7 @@ def test_put_verify_job(webapp, test_repository, test_job, text_summary_lines, t
client = APIClient()
client.force_authenticate(user=test_user)
TextLogErrorMetadata.objects.filter(text_log_error__step__job=test_job).update(best_is_verified=True)
FailureLine.objects.filter(job_guid=test_job.guid).update(best_is_verified=True)
text_summary_lines = TextLogSummary.objects.filter(job_guid=test_job.guid).get().lines.all()

Просмотреть файл

@ -3,10 +3,12 @@ from collections import defaultdict
from django.db.utils import IntegrityError
from treeherder.model.models import (FailureLine,
from treeherder.model.models import (ClassifiedFailure,
FailureMatch,
Job,
JobNote,
Matcher,
TextLogError,
TextLogErrorMatch)
logger = logging.getLogger(__name__)
@ -21,66 +23,85 @@ def match_errors(job):
# Only try to autoclassify where we have a failure status; sometimes there can be
# error lines even in jobs marked as passing.
if job.autoclassify_status < Job.CROSSREFERENCED:
logger.error("Tried to autoclassify job %i without crossreferenced error lines" % job.id)
return
if job.autoclassify_status == Job.AUTOCLASSIFIED:
logger.error("Tried to autoclassify job %i which was already autoclassified" % job.id)
return
if job.result not in ["testfailed", "busted", "exception"]:
return
unmatched_failures = set(FailureLine.objects.unmatched_for_job(job))
unmatched_errors = set(TextLogError.objects.unmatched_for_job(job))
if not unmatched_failures:
if not unmatched_errors:
logger.info("Skipping autoclassify of job %i because it has no unmatched errors" % job.id)
return
matches, all_matched = find_matches(unmatched_failures)
update_db(job, matches, all_matched)
try:
matches, all_matched = find_matches(unmatched_errors)
update_db(job, matches, all_matched)
except:
logger.error("Autoclassification of job %s failed" % job.id)
job.autoclassify_status = Job.FAILED
raise
else:
logger.debug("Autoclassification of job %s suceeded" % job.id)
job.autoclassify_status = Job.AUTOCLASSIFIED
finally:
job.save(update_fields=['autoclassify_status'])
def find_matches(unmatched_failures):
def find_matches(unmatched_errors):
all_matches = set()
for matcher in Matcher.objects.registered_matchers():
matches = matcher(unmatched_failures)
matches = matcher(unmatched_errors)
for match in matches:
logger.info("Matched failure %i with intermittent %i" %
(match.failure_line.id, match.classified_failure.id))
logger.info("Matched error %i with intermittent %i" %
(match.text_log_error.id, match.classified_failure_id))
all_matches.add((matcher.db_object, match))
if match.score >= AUTOCLASSIFY_GOOD_ENOUGH_RATIO:
unmatched_failures.remove(match.failure_line)
unmatched_errors.remove(match.text_log_error)
if not unmatched_failures:
if not unmatched_errors:
break
return all_matches, len(unmatched_failures) == 0
return all_matches, len(unmatched_errors) == 0
def update_db(job, matches, all_matched):
matches_by_failure_line = defaultdict(set)
for item in matches:
matches_by_failure_line[item[1].failure_line].add(item)
matches_by_error = defaultdict(set)
classified_failures = {item.id: item for item in
ClassifiedFailure.objects.filter(
id__in=[match.classified_failure_id for _, match in matches])}
for matcher, match in matches:
classified_failure = classified_failures[match.classified_failure_id]
matches_by_error[match.text_log_error].add((matcher, match, classified_failure))
for failure_line, matches in matches_by_failure_line.iteritems():
for matcher, match in matches:
for text_log_error, matches in matches_by_error.iteritems():
for (matcher, match, classified_failure) in matches:
try:
FailureMatch.objects.create(
TextLogErrorMatch.objects.create(
score=match.score,
matcher=matcher,
classified_failure=match.classified_failure,
failure_line=failure_line)
if failure_line.error:
TextLogErrorMatch.objects.create(
classified_failure=classified_failure,
text_log_error=match.text_log_error)
if match.text_log_error.metadata and match.text_log_error.metadata.failure_line:
FailureMatch.objects.create(
score=match.score,
matcher=matcher,
classified_failure=match.classified_failure,
text_log_error=failure_line.error)
classified_failure=classified_failure,
failure_line=match.text_log_error.metadata.failure_line)
except IntegrityError:
logger.warning(
"Tried to create duplicate match for failure line %i with matcher %i and classified_failure %i" %
(failure_line.id, matcher.id, match.classified_failure.id))
best_match = failure_line.best_automatic_match(AUTOCLASSIFY_CUTOFF_RATIO)
"Tried to create duplicate match for TextLogError %i with matcher %i and classified_failure %i" %
(text_log_error.id, matcher.id, classified_failure.id))
best_match = text_log_error.best_automatic_match(AUTOCLASSIFY_CUTOFF_RATIO)
if best_match:
failure_line.best_classification = best_match.classified_failure
failure_line.save(update_fields=['best_classification'])
if failure_line.error:
failure_line.error.metadata.best_classification = best_match.classified_failure
failure_line.error.metadata.save(update_fields=['best_classification'])
text_log_error.mark_best_classification(classified_failure)
if all_matched:
if job.is_fully_autoclassified():

Просмотреть файл

@ -25,11 +25,14 @@ class Detector(object):
class TestFailureDetector(Detector):
def __call__(self, failure_lines):
def __call__(self, text_log_errors):
rv = []
for i, failure in enumerate(failure_lines):
if (failure.action == "test_result" and failure.test and failure.status
and failure.expected):
with_failure_lines = [(i, item) for (i, item) in enumerate(text_log_errors)
if item.metadata and item.metadata.failure_line]
for i, text_log_error in with_failure_lines:
failure = text_log_error.metadata.failure_line
if (failure.action == "test_result" and failure.test and failure.status and
failure.expected):
rv.append(i)
return rv
@ -37,7 +40,7 @@ class TestFailureDetector(Detector):
class ManualDetector(Detector):
"""Small hack; this ensures that there's a matcher object indicating that a match
was by manual association, but which never automatically matches any lines"""
def __call__(self, failure_lines):
def __call__(self, text_log_errors):
return []

Просмотреть файл

@ -3,8 +3,6 @@ import time
from abc import (ABCMeta,
abstractmethod)
from collections import namedtuple
from datetime import (datetime,
timedelta)
from difflib import SequenceMatcher
from django.conf import settings
@ -12,16 +10,15 @@ from django.db.models import Q
from elasticsearch_dsl.query import Match as ESMatch
from treeherder.autoclassify.autoclassify import AUTOCLASSIFY_GOOD_ENOUGH_RATIO
from treeherder.model.models import (ClassifiedFailure,
FailureLine,
FailureMatch,
MatcherManager)
from treeherder.model.models import (MatcherManager,
TextLogError,
TextLogErrorMatch)
from treeherder.model.search import (TestFailureLine,
es_connected)
logger = logging.getLogger(__name__)
Match = namedtuple('Match', ['failure_line', 'classified_failure', 'score'])
Match = namedtuple('Match', ['text_log_error', 'classified_failure_id', 'score'])
class Matcher(object):
@ -37,83 +34,134 @@ class Matcher(object):
def __init__(self, db_object):
self.db_object = db_object
def __call__(self, text_log_errors):
rv = []
for text_log_error in text_log_errors:
match = self.match(text_log_error)
if match:
rv.append(match)
return rv
def match(self, text_log_error):
best_match = self.query_best(text_log_error)
if best_match:
classified_failure_id, score = best_match
logger.debug("Matched using %s" % self.__class__.__name__)
return Match(text_log_error,
classified_failure_id,
score)
@abstractmethod
def __call__(self, failure_lines):
def query_best(self, text_log_error):
pass
ignored_line = (Q(failure_line__best_classification=None) &
Q(failure_line__best_is_verified=True))
ignored_line = (Q(text_log_error___metadata__best_classification=None) &
Q(text_log_error___metadata__best_is_verified=True))
def time_window(queryset, interval, time_budget_ms, match_filter):
upper_cutoff = datetime.now()
lower_cutoff = upper_cutoff - interval
matches = []
class id_window(object):
def __init__(self, size, time_budget):
self.size = size
self.time_budget_ms = time_budget
time_budget = time_budget_ms / 1000. if time_budget_ms is not None else None
t0 = time.time()
def __call__(self, f):
outer = self
min_date = FailureLine.objects.order_by("id")[0].created
def inner(self, text_log_error):
queries = f(self, text_log_error)
if not queries:
return
count = 0
while True:
count += 1
window_queryset = queryset.filter(
failure_line__created__range=(lower_cutoff, upper_cutoff))
logger.debug("[time_window] Queryset: %s" % window_queryset.query)
match = window_queryset.first()
if match is not None:
matches.append(match)
if match.score >= AUTOCLASSIFY_GOOD_ENOUGH_RATIO:
for item in queries:
if isinstance(item, tuple):
query, score_multiplier = item
else:
query = item
score_multiplier = (1, 1)
result = outer.run(query, score_multiplier)
if result:
return result
inner.__name__ = f.__name__
inner.__doc__ = f.__doc__
return inner
def run(self, query, score_multiplier):
matches = []
time_budget = self.time_budget_ms / 1000. if self.time_budget_ms is not None else None
t0 = time.time()
upper_cutoff = (TextLogError.objects
.order_by('-id')
.values_list('id', flat=True)[0])
count = 0
while upper_cutoff > 0:
count += 1
lower_cutoff = max(upper_cutoff - self.size, 0)
window_queryset = query.filter(
text_log_error__id__range=(lower_cutoff, upper_cutoff))
logger.debug("[time_window] Queryset: %s" % window_queryset.query)
match = window_queryset.first()
if match is not None:
score = match.score * score_multiplier[0] / score_multiplier[1]
matches.append((match, score))
if score >= AUTOCLASSIFY_GOOD_ENOUGH_RATIO:
break
upper_cutoff -= self.size
if time_budget is not None and time.time() - t0 > time_budget:
# Putting the condition at the end of the loop ensures that we always
# run it once, which is useful for testing
break
upper_cutoff = lower_cutoff
lower_cutoff = upper_cutoff - interval
if upper_cutoff < min_date:
break
if time_budget_ms is not None and time.time() - t0 > time_budget:
# Putting the condition at the end of the loop ensures that we always
# run it once, which is useful for testing
break
logger.debug("[time_window] Used %i queries" % count)
if matches:
matches.sort(key=lambda x: (-x[1], -x[0].classified_failure_id))
best = matches[0]
return best[0].classified_failure_id, best[1]
logger.debug("[time_window] Used %i queries" % count)
if matches:
matches.sort(key=match_filter)
return matches[0]
return None
return None
def with_failure_lines(f):
def inner(self, text_log_errors):
with_failure_lines = [item for item in text_log_errors
if item.metadata and item.metadata.failure_line]
return f(self, with_failure_lines)
inner.__name__ = f.__name__
inner.__doc__ = f.__doc__
return inner
class PreciseTestMatcher(Matcher):
"""Matcher that looks for existing failures with identical tests and identical error
message."""
"""Matcher that looks for existing failures with identical tests and
identical error message."""
def __call__(self, failure_lines):
rv = []
for failure_line in failure_lines:
logger.debug("Looking for test match in failure %d" % failure_line.id)
@with_failure_lines
def __call__(self, text_log_errors):
return super(PreciseTestMatcher, self).__call__(text_log_errors)
if failure_line.action != "test_result" or failure_line.message is None:
continue
@id_window(size=20000,
time_budget=500)
def query_best(self, text_log_error):
failure_line = text_log_error.metadata.failure_line
logger.debug("Looking for test match in failure %d" % failure_line.id)
matching_failures = FailureMatch.objects.filter(
failure_line__action="test_result",
failure_line__test=failure_line.test,
failure_line__subtest=failure_line.subtest,
failure_line__status=failure_line.status,
failure_line__expected=failure_line.expected,
failure_line__message=failure_line.message).exclude(
ignored_line | Q(failure_line__job_guid=failure_line.job_guid)
).order_by("-score", "-classified_failure")
if failure_line.action != "test_result" or failure_line.message is None:
return
best_match = time_window(matching_failures, timedelta(days=7), 500,
lambda x: (-x.score, -x.classified_failure_id))
if best_match:
logger.debug("Matched using precise test matcher")
rv.append(Match(failure_line,
best_match.classified_failure,
best_match.score))
return rv
return [(TextLogErrorMatch.objects
.filter(text_log_error___metadata__failure_line__action="test_result",
text_log_error___metadata__failure_line__test=failure_line.test,
text_log_error___metadata__failure_line__subtest=failure_line.subtest,
text_log_error___metadata__failure_line__status=failure_line.status,
text_log_error___metadata__failure_line__expected=failure_line.expected,
text_log_error___metadata__failure_line__message=failure_line.message)
.exclude(ignored_line |
Q(text_log_error__step__job=text_log_error.step.job))
.order_by("-score", "-classified_failure"))]
class ElasticSearchTestMatcher(Matcher):
@ -126,79 +174,68 @@ class ElasticSearchTestMatcher(Matcher):
self.calls = 0
@es_connected(default=[])
def __call__(self, failure_lines):
rv = []
self.lines += len(failure_lines)
for failure_line in failure_lines:
if failure_line.action != "test_result" or not failure_line.message:
logger.debug("Skipped elasticsearch matching")
continue
match = ESMatch(message={"query": failure_line.message[:1024],
"type": "phrase"})
search = (TestFailureLine.search()
.filter("term", test=failure_line.test)
.filter("term", status=failure_line.status)
.filter("term", expected=failure_line.expected)
.filter("exists", field="best_classification")
.query(match))
if failure_line.subtest:
search = search.filter("term", subtest=failure_line.subtest)
try:
self.calls += 1
resp = search.execute()
except:
logger.error("Elastic search lookup failed: %s %s %s %s %s",
failure_line.test, failure_line.subtest, failure_line.status,
failure_line.expected, failure_line.message)
raise
scorer = MatchScorer(failure_line.message)
matches = [(item, item.message) for item in resp]
best_match = scorer.best_match(matches)
if best_match:
logger.debug("Matched using elastic search test matcher")
rv.append(Match(failure_line,
ClassifiedFailure.objects.get(
id=best_match[1].best_classification),
best_match[0]))
return rv
@with_failure_lines
def __call__(self, text_log_errors):
return super(ElasticSearchTestMatcher, self).__call__(text_log_errors)
def query_best(self, text_log_error):
failure_line = text_log_error.metadata.failure_line
if failure_line.action != "test_result" or not failure_line.message:
logger.debug("Skipped elasticsearch matching")
return
match = ESMatch(message={"query": failure_line.message[:1024],
"type": "phrase"})
search = (TestFailureLine.search()
.filter("term", test=failure_line.test)
.filter("term", status=failure_line.status)
.filter("term", expected=failure_line.expected)
.filter("exists", field="best_classification")
.query(match))
if failure_line.subtest:
search = search.filter("term", subtest=failure_line.subtest)
try:
self.calls += 1
resp = search.execute()
except:
logger.error("Elastic search lookup failed: %s %s %s %s %s",
failure_line.test, failure_line.subtest, failure_line.status,
failure_line.expected, failure_line.message)
raise
scorer = MatchScorer(failure_line.message)
matches = [(item, item.message) for item in resp]
best_match = scorer.best_match(matches)
if best_match:
return (best_match[1].best_classification, best_match[0])
class CrashSignatureMatcher(Matcher):
"""Matcher that looks for crashes with identical signature"""
def __call__(self, failure_lines):
rv = []
@with_failure_lines
def __call__(self, text_log_errors):
return super(CrashSignatureMatcher, self).__call__(text_log_errors)
for failure_line in failure_lines:
if (failure_line.action != "crash" or failure_line.signature is None
or failure_line.signature == "None"):
continue
matching_failures = FailureMatch.objects.filter(
failure_line__action="crash",
failure_line__signature=failure_line.signature).exclude(
ignored_line | Q(failure_line__job_guid=failure_line.job_guid)
).select_related('failure_line').order_by(
"-score",
"-classified_failure")
@id_window(size=20000,
time_budget=250)
def query_best(self, text_log_error):
failure_line = text_log_error.metadata.failure_line
score_multiplier = 10
matching_failures_same_test = matching_failures.filter(
failure_line__test=failure_line.test)
if (failure_line.action != "crash" or
failure_line.signature is None or
failure_line.signature == "None"):
return
best_match = time_window(matching_failures_same_test, timedelta(days=7), 250,
lambda x: (-x.score, -x.classified_failure_id))
if not best_match:
score_multiplier = 8
best_match = time_window(matching_failures, timedelta(days=7), 250,
lambda x: (-x.score, -x.classified_failure_id))
matching_failures = (TextLogErrorMatch.objects
.filter(text_log_error___metadata__failure_line__action="crash",
text_log_error___metadata__failure_line__signature=failure_line.signature)
.exclude(ignored_line |
Q(text_log_error__step__job=text_log_error.step.job))
.select_related('text_log_error',
'text_log_error___metadata')
.order_by("-score", "-classified_failure"))
if best_match:
logger.debug("Matched using crash signature matcher")
score = best_match.score * score_multiplier / 10
rv.append(Match(failure_line,
best_match.classified_failure,
score))
return rv
return [matching_failures.filter(text_log_error___metadata__failure_line__test=failure_line.test),
(matching_failures, (8, 10))]
class MatchScorer(object):

Просмотреть файл

@ -6,6 +6,7 @@ from django.db import (IntegrityError,
from mozlog.formatters.tbplformatter import TbplFormatter
from treeherder.model.models import (FailureLine,
Job,
TextLogError,
TextLogErrorMetadata,
TextLogSummary,
@ -14,7 +15,6 @@ from treeherder.model.models import (FailureLine,
logger = logging.getLogger(__name__)
@transaction.atomic
def crossreference_job(job):
"""Populate the TextLogSummary and TextLogSummaryLine tables for a
job. Specifically this function tries to match the
@ -27,9 +27,21 @@ def crossreference_job(job):
"""
try:
return _crossreference(job)
if job.autoclassify_status >= Job.CROSSREFERENCED:
logger.debug("Job %i already crossreferenced" % job.id)
return (TextLogError.objects
.filter(step__job=job)
.exists() and
FailureLine.objects
.filter(job_guid=job.guid)
.exists())
rv = _crossreference(job)
job.autoclassify_status = Job.CROSSREFERENCED
job.save(update_fields=['autoclassify_status'])
return rv
except IntegrityError:
logger.warning("IntegrityError crossreferencing error lines for job %s" % job.id)
return False
@transaction.atomic
@ -43,7 +55,7 @@ def _crossreference(job):
text_log_errors = TextLogError.objects.filter(
step__job=job).order_by('line_number')
# If we don't have failure lines and text log errors nothing will happen
# If we don't have both failure lines and text log errors nothing will happen
# so return early
if not (failure_lines.exists() and text_log_errors.exists()):
return False

Просмотреть файл

@ -96,5 +96,8 @@ def crossreference_error_lines(job_id, priority):
autoclassify.apply_async(
args=[job_id],
routing_key="autoclassify.%s" % priority)
elif not settings.AUTOCLASSIFY_JOBS:
job.autoclassify_status = Job.SKIPPED
job.save(update_fields=['autoclassify_status'])
else:
logger.debug("Job %i didn't have any crossreferenced lines, skipping autoclassify " % job_id)

Просмотреть файл

@ -0,0 +1,44 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from django.db import migrations, models
# Original SQL:
#
# BEGIN;
# --
# -- Add field autoclassify_status to job
# --
# ALTER TABLE `job` ADD COLUMN `autoclassify_status` integer DEFAULT 0 NOT NULL;
# ALTER TABLE `job` ALTER COLUMN `autoclassify_status` DROP DEFAULT;
# COMMIT;
class Migration(migrations.Migration):
dependencies = [
('model', '0004_duplicate_failure_classifications'),
]
operations = [migrations.RunSQL(
sql="""
--
-- Add field autoclassify_status to job
--
SET FOREIGN_KEY_CHECKS=0;
ALTER TABLE `job` ADD COLUMN `autoclassify_status` integer DEFAULT 0 NOT NULL;
ALTER TABLE `job` ALTER COLUMN `autoclassify_status` DROP DEFAULT;
""",
reverse_sql="""
--
-- Add field autoclassify_status to job
--
ALTER TABLE `job` DROP COLUMN `autoclassify_status` CASCADE;
""",
state_operations=[
migrations.AddField(
model_name='job',
name='autoclassify_status',
field=models.IntegerField(default=0, choices=[(0, 'pending'), (1, 'crossreferenced'), (2, 'autoclassified'), (3, 'skipped'), (255, 'failed')]),
),
])]

Просмотреть файл

@ -7,7 +7,6 @@ import time
from collections import (OrderedDict,
defaultdict)
from hashlib import sha1
from itertools import chain
from django.conf import settings
from django.contrib.auth.models import User
@ -667,6 +666,7 @@ class Job(models.Model):
repository = models.ForeignKey(Repository)
guid = models.CharField(max_length=50, unique=True)
project_specific_id = models.PositiveIntegerField(null=True)
autoclassify_status = models.IntegerField(choices=AUTOCLASSIFY_STATUSES, default=PENDING)
coalesced_to_guid = models.CharField(max_length=50, null=True,
default=None)
@ -736,45 +736,31 @@ class Job(models.Model):
action="truncated").count() > 0:
return False
classified_failure_lines_count = FailureLine.objects.filter(
best_classification__isnull=False,
job_guid=self.guid).count()
classified_error_count = TextLogError.objects.filter(
_metadata__best_classification__isnull=False,
step__job=self).count()
if classified_failure_lines_count == 0:
if classified_error_count == 0:
return False
from treeherder.model.error_summary import get_filtered_error_lines
return classified_failure_lines_count == len(get_filtered_error_lines(self))
return classified_error_count == len(get_filtered_error_lines(self))
def is_fully_verified(self):
if FailureLine.objects.filter(job_guid=self.guid,
action="truncated").count() > 0:
logger.error("Job %s truncated storage of FailureLines" % self.guid)
return False
# Line is not fully verified if there are either structured failure lines
# with no best failure, or unverified unstructured lines not associated with
# a structured line
unverified_failure_lines = FailureLine.objects.filter(
best_is_verified=False,
job_guid=self.guid).count()
unverified_errors = TextLogError.objects.filter(
_metadata__best_is_verified=False,
step__job=self).count()
if unverified_failure_lines:
logger.error("Job %s has unverified FailureLines" % self.guid)
if unverified_errors:
logger.error("Job %r has unverified TextLogErrors" % self)
return False
unverified_text_lines = TextLogSummaryLine.objects.filter(
verified=False,
failure_line=None,
summary__job_guid=self.guid).count()
if unverified_text_lines:
logger.error("Job %s has unverified TextLogSummary" % self.guid)
return False
logger.info("Job %s is fully verified" % self.guid)
logger.info("Job %r is fully verified" % self)
return True
def update_after_verification(self, user):
@ -798,13 +784,13 @@ class Job(models.Model):
def get_manual_classification_line(self):
"""
Return the FailureLine from a job if it can be manually classified as a side effect
Return the TextLogError from a job if it can be manually classified as a side effect
of the overall job being classified.
Otherwise return None.
"""
try:
failure_lines = [FailureLine.objects.get(job_guid=self.guid)]
except (FailureLine.DoesNotExist, FailureLine.MultipleObjectsReturned):
text_log_error = TextLogError.objects.get(step__job=self)
except (TextLogError.DoesNotExist, TextLogError.MultipleObjectsReturned):
return None
# Only propagate the classification if there is exactly one unstructured failure
@ -815,19 +801,20 @@ class Job(models.Model):
# Check that some detector would match this. This is being used as an indication
# that the autoclassifier will be able to work on this classification
if not any(detector(failure_lines)
if not any(detector([text_log_error])
for detector in Matcher.objects.registered_detectors()):
return None
return failure_lines[0]
return text_log_error
def update_autoclassification_bug(self, bug_number):
failure_line = self.get_manual_classification_line()
text_log_error = self.get_manual_classification_line()
if failure_line is None:
if text_log_error is None:
return
classification = failure_line.best_classification
classification = (text_log_error.metadata.best_classification if text_log_error.metadata
else None)
if classification and classification.bug_number is None:
return classification.set_bug(bug_number)
@ -975,18 +962,18 @@ class JobNoteManager(models.Manager):
# Only insert bugs for verified failures since these are automatically
# mirrored to ES and the mirroring can't be undone
classified_failures = ClassifiedFailure.objects.filter(
best_for_lines__job_guid=job.guid,
best_for_lines__best_is_verified=True)
bug_numbers = set(ClassifiedFailure.objects
.filter(best_for_errors__text_log_error__step__job=job,
best_for_errors__best_is_verified=True)
.exclude(bug_number=None)
.values_list('bug_number', flat=True))
text_log_summary_lines = TextLogSummaryLine.objects.filter(
summary__job_guid=job.guid, verified=True).exclude(
bug_number=None)
bug_numbers = {item.bug_number
for item in chain(classified_failures,
text_log_summary_lines)
if item.bug_number}
# Legacy
bug_numbers |= set(TextLogSummaryLine.objects
.filter(summary__job_guid=job.guid,
verified=True)
.exclude(bug_number=None)
.values_list('bug_number', flat=True))
for bug_number in bug_numbers:
BugJobMap.objects.get_or_create(job=job,
@ -1045,12 +1032,34 @@ class JobNote(models.Model):
self.job.save()
# if a manually filed job, update the autoclassification information
if self.user:
if self.failure_classification.name in [
"intermittent", "intermittent needs filing"]:
failure_line = self.job.get_manual_classification_line()
if failure_line:
failure_line.update_autoclassification()
if not self.user:
return
if self.failure_classification.name not in [
"intermittent", "intermittent needs filing"]:
return
text_log_error = self.job.get_manual_classification_line()
if not text_log_error:
return
bug_numbers = set(BugJobMap.objects
.filter(job=self.job)
.values_list('bug_id', flat=True))
existing_bugs = set(ClassifiedFailure.objects
.filter(error_matches__text_log_error=text_log_error)
.values_list('bug_number', flat=True))
add_bugs = (bug_numbers - existing_bugs)
if not add_bugs:
return
manual_detector = Matcher.objects.get(name="ManualDetector")
for bug_number in add_bugs:
classification, _ = text_log_error.set_classification(manual_detector,
bug_number=bug_number)
if len(add_bugs) == 1 and not existing_bugs:
text_log_error.mark_best_classification_verified(classification)
def save(self, *args, **kwargs):
super(JobNote, self).save(*args, **kwargs)
@ -1067,24 +1076,6 @@ class JobNote(models.Model):
self.who)
class FailureLineManager(models.Manager):
def unmatched_for_job(self, job):
return FailureLine.objects.filter(
job_guid=job.guid,
repository=job.repository,
classified_failures=None,
)
def for_jobs(self, *jobs, **filters):
failures = FailureLine.objects.filter(
job_guid__in=[item.guid for item in jobs],
**filters)
failures_by_job = defaultdict(list)
for item in failures:
failures_by_job[item.job_guid].append(item)
return failures_by_job
class FailureLine(models.Model):
# We make use of prefix indicies for several columns in this table which
# can't be expressed in django syntax so are created with raw sql in migrations.
@ -1129,9 +1120,6 @@ class FailureLine(models.Model):
created = models.DateTimeField(auto_now_add=True)
modified = models.DateTimeField(auto_now=True)
objects = FailureLineManager()
# TODO: add indexes once we know which queries will be typically executed
class Meta:
db_table = 'failure_line'
index_together = (
@ -1199,7 +1187,9 @@ class FailureLine(models.Model):
return classification, new_link
def mark_best_classification_verified(self, classification):
if classification not in self.classified_failures.all():
if (classification and
classification.id not in self.classified_failures.values_list('id', flat=True)):
logger.debug("Adding new classification to TextLogError")
manual_detector = Matcher.objects.get(name="ManualDetector")
self.set_classification(manual_detector, classification=classification)
@ -1302,23 +1292,26 @@ class ClassifiedFailure(models.Model):
# ON matches.classified_failure_id = <other.id> AND
# matches.failure_line_id = failure_match.failue_line_id
delete_ids = []
for match in self.matches.all():
try:
existing = FailureMatch.objects.get(classified_failure=other,
failure_line=match.failure_line)
if match.score > existing.score:
existing.score = match.score
existing.save()
delete_ids.append(match.id)
except FailureMatch.DoesNotExist:
match.classified_failure = other
match.save()
FailureMatch.objects.filter(id__in=delete_ids).delete()
for Match, key, matches in [(TextLogErrorMatch, "text_log_error",
self.error_matches.all()),
(FailureMatch, "failure_line",
self.matches.all())]:
for match in matches:
kwargs = {key: getattr(match, key)}
existing = Match.objects.filter(classified_failure=other, **kwargs)
if existing:
for existing_match in existing:
if match.score > existing_match.score:
existing_match.score = match.score
existing_match.save()
delete_ids.append(match.id)
else:
match.classified_failure = other
match.save()
Match.objects.filter(id__in=delete_ids).delete()
FailureLine.objects.filter(best_classification=self).update(best_classification=other)
self.delete()
# TODO: add indexes once we know which queries will be typically executed
class Meta:
db_table = 'classified_failure'
@ -1510,6 +1503,32 @@ class TextLogStep(models.Model):
'finished_line_number')
class TextLogErrorManager(models.Manager):
def unmatched_for_job(self, job):
"""Return a the text log errors for a specific job that have
no associated ClassifiedFailure.
:param job: Job associated with the text log errors"""
return TextLogError.objects.filter(
step__job=job,
classified_failures=None,
).prefetch_related('step', '_metadata', '_metadata__failure_line')
def for_jobs(self, *jobs, **filters):
"""Return a dict of {job: [text log errors]} for a set of jobs, filtered by
caller-provided django filters.
:param jobs: Jobs associated with the text log errors
:param filters: filters to apply to text log errors"""
error_lines = TextLogError.objects.filter(
step__job__in=jobs,
**filters)
lines_by_job = defaultdict(list)
for item in error_lines:
lines_by_job[item.step.job].append(item)
return lines_by_job
class TextLogError(models.Model):
"""
A detected error line in the textual (unstructured) log
@ -1520,6 +1539,8 @@ class TextLogError(models.Model):
line = models.TextField()
line_number = models.PositiveIntegerField()
objects = TextLogErrorManager()
class Meta:
db_table = "text_log_error"
unique_together = ('step', 'line_number')
@ -1538,6 +1559,91 @@ class TextLogError(models.Model):
from treeherder.model import error_summary
return error_summary.bug_suggestions_line(self)
def best_automatic_match(self, min_score=0):
return (TextLogErrorMatch.objects
.filter(text_log_error__id=self.id,
score__gt=min_score)
.order_by("-score",
"-classified_failure_id")
.select_related('classified_failure')
.first())
def set_classification(self, matcher, classification=None, bug_number=None,
mark_best=False):
with transaction.atomic():
if classification is None:
if bug_number:
classification, _ = ClassifiedFailure.objects.get_or_create(
bug_number=bug_number)
else:
classification = ClassifiedFailure.objects.create()
new_link = TextLogErrorMatch(
text_log_error=self,
classified_failure=classification,
matcher=matcher,
score=1)
new_link.save()
if self.metadata and self.metadata.failure_line:
new_link_failure = FailureMatch(
failure_line=self.metadata.failure_line,
classified_failure=classification,
matcher=matcher,
score=1)
new_link_failure.save()
if mark_best:
self.mark_best_classification(classification)
return classification, new_link
def mark_best_classification(self, classification):
if self.metadata is None:
TextLogErrorMetadata.objects.create(
text_log_error=self,
best_classification=classification)
else:
self.metadata.best_classification = classification
self.metadata.save(update_fields=['best_classification'])
if self.metadata.failure_line:
self.metadata.failure_line.best_classification = classification
self.metadata.failure_line.save(update_fields=['best_classification'])
self.metadata.failure_line.elastic_search_insert()
def mark_best_classification_verified(self, classification):
if classification not in self.classified_failures.all():
manual_detector = Matcher.objects.get(name="ManualDetector")
self.set_classification(manual_detector, classification=classification)
if self.metadata is None:
TextLogErrorMetadata.objects.create(text_log_error=self,
best_classification=classification,
best_is_verified=True)
else:
self.metadata.best_classification = classification
self.metadata.best_is_verified = True
self.metadata.save()
if self.metadata.failure_line:
self.metadata.failure_line.best_classification = classification
self.metadata.failure_line.best_is_verified = True
self.metadata.failure_line.save()
self.metadata.failure_line.elastic_search_insert()
def update_autoclassification(self):
"""
If a job is manually classified and has a single line in the logs matching a single
TextLogError, but the TextLogError has not matched any ClassifiedFailure, add a
new match due to the manual classification.
"""
manual_detector = Matcher.objects.get(name="ManualDetector")
classification, _ = self.set_classification(manual_detector)
self.mark_best_classification_verified(classification)
class TextLogErrorMetadata(models.Model):
"""Optional, mutable, data that can be associated with a TextLogError."""

Просмотреть файл

@ -261,6 +261,9 @@ class JobsViewSet(viewsets.ViewSet):
except ObjectDoesNotExist:
pass
status_map = {k: v for k, v in Job.AUTOCLASSIFY_STATUSES}
resp["autoclassify_status"] = status_map[job.autoclassify_status]
return Response(resp)
def list(self, request, project):
@ -542,6 +545,11 @@ class JobsViewSet(viewsets.ViewSet):
status=HTTP_404_NOT_FOUND)
textlog_errors = (TextLogError.objects
.filter(step__job=job)
.select_related("_metadata",
"_metadata__failure_line")
.prefetch_related("classified_failures",
"matches",
"matches__matcher")
.order_by('id'))
return Response(serializers.TextLogErrorSerializer(textlog_errors,
many=True,

Просмотреть файл

@ -216,11 +216,16 @@ class FailureLineNoStackSerializer(serializers.ModelSerializer):
class TextLogErrorMetadataSerializer(serializers.ModelSerializer):
failure_line = FailureLineNoStackSerializer(read_only=True)
class Meta:
model = models.TextLogErrorMetadata
fields = '__all__'
class TextLogErrorSerializer(serializers.ModelSerializer):
matches = FailureMatchSerializer(many=True)
classified_failures = ClassifiedFailureSerializer(many=True)
bug_suggestions = NoOpSerializer(read_only=True)
metadata = TextLogErrorMetadataSerializer(read_only=True)

Просмотреть файл

@ -0,0 +1,127 @@
import logging
from django.db import transaction
from rest_framework import viewsets
from rest_framework.response import Response
from rest_framework.status import (HTTP_200_OK,
HTTP_400_BAD_REQUEST,
HTTP_404_NOT_FOUND)
from treeherder.model.models import (ClassifiedFailure,
TextLogError)
from treeherder.webapp.api import (pagination,
serializers)
from treeherder.webapp.api.utils import as_dict
logger = logging.getLogger(__name__)
class TextLogErrorViewSet(viewsets.ModelViewSet):
serializer_class = serializers.TextLogErrorSerializer
queryset = TextLogError.objects.prefetch_related("classified_failures",
"matches",
"matches__matcher",
"_metadata",
"_metadata__failure_line").all()
pagination_class = pagination.IdPagination
@transaction.atomic
def _update(self, data, user, many=True):
ids = []
error_line_ids = set()
classification_ids = set()
bug_number_classifications = {}
for item in data:
line_id = item.get("id")
if line_id is None:
return "No text log error id provided", HTTP_400_BAD_REQUEST
try:
line_id = int(line_id)
except ValueError:
return "Text log error id was not an integer", HTTP_400_BAD_REQUEST
error_line_ids.add(line_id)
classification_id = item.get("best_classification")
if classification_id is not None:
classification_ids.add(classification_id)
bug_number = item.get("bug_number")
if (not classification_id and
bug_number is not None and
bug_number not in bug_number_classifications):
bug_number_classifications[bug_number], _ = (
ClassifiedFailure.objects.get_or_create(bug_number=bug_number))
ids.append((line_id, classification_id, bug_number))
error_lines = as_dict(
TextLogError.objects
.prefetch_related('classified_failures')
.filter(id__in=error_line_ids), "id")
if len(error_lines) != len(error_line_ids):
missing = error_line_ids - set(error_lines.keys())
return ("No text log error with id: {0}".format(", ".join(missing)),
HTTP_404_NOT_FOUND)
classifications = as_dict(
ClassifiedFailure.objects.filter(id__in=classification_ids), "id")
if len(classifications) != len(classification_ids):
missing = classification_ids - set(classifications.keys())
return ("No classification with id: {0}".format(", ".join(missing)),
HTTP_404_NOT_FOUND)
for line_id, classification_id, bug_number in ids:
logger.debug("line_id: %s, classification_id: %s, bug_number: %s" %
(line_id, classification_id, bug_number))
error_line = error_lines[line_id]
if classification_id is not None:
logger.debug("Using classification id")
classification = classifications[classification_id]
if bug_number is not None and bug_number != classification.bug_number:
logger.debug("Updating classification bug number")
classification = classification.set_bug(bug_number)
elif bug_number is not None:
logger.debug("Using bug number")
classification = bug_number_classifications[bug_number]
else:
logger.debug("Using null classification")
classification = None
error_line.mark_best_classification_verified(classification)
error_line.step.job.update_after_verification(user)
# Force failure line to be reloaded, including .classified_failures
rv = (TextLogError.objects
.prefetch_related('classified_failures')
.filter(id__in=error_line_ids))
if not many:
rv = rv[0]
return (serializers.TextLogErrorSerializer(rv, many=many).data,
HTTP_200_OK)
def update(self, request, pk=None):
data = {"id": pk}
for k, v in request.data.iteritems():
if k not in data:
data[k] = v
body, status = self._update([data], request.user, many=False)
return Response(body, status=status)
def update_many(self, request):
body, status = self._update(request.data, request.user, many=True)
if status == HTTP_404_NOT_FOUND:
# 404 doesn't make sense for updating many since the path is always
# valid, so if we get an invalid id instead return 400
status = HTTP_400_BAD_REQUEST
return Response(body, status=status)

Просмотреть файл

@ -18,6 +18,7 @@ from treeherder.webapp.api import (artifact,
resultset,
runnable_jobs,
seta,
text_log_error,
text_log_summary,
text_log_summary_line)
@ -122,6 +123,9 @@ default_router.register(r'failure-line', failureline.FailureLineViewSet,
default_router.register(r'classified-failure',
classifiedfailure.ClassifiedFailureViewSet,
base_name='classified-failure')
default_router.register(r'text-log-error',
text_log_error.TextLogErrorViewSet,
base_name='text-log-error')
default_router.register(r'text-log-summary',
text_log_summary.TextLogSummaryViewSet,
base_name='text-log-summary')