зеркало из https://github.com/mozilla/treeherder.git
convert matchers to functions
Now that matchers are all confined to a single method it's easier to handle them as functions.
This commit is contained in:
Родитель
cf28cd2b29
Коммит
0cb2387540
|
@ -0,0 +1,8 @@
|
|||
from treeherder.autoclassify.autoclassify import get_matchers
|
||||
|
||||
|
||||
def test_get_matchers():
|
||||
matchers = list(get_matchers())
|
||||
|
||||
assert len(matchers) == 3
|
||||
assert all(m.__name__.endswith('_matcher') for m in matchers)
|
|
@ -1,7 +1,7 @@
|
|||
from treeherder.autoclassify.autoclassify import match_errors
|
||||
from treeherder.autoclassify.matchers import (CrashSignatureMatcher,
|
||||
ElasticSearchTestMatcher,
|
||||
PreciseTestMatcher)
|
||||
from treeherder.autoclassify.matchers import (crash_signature_matcher,
|
||||
elasticsearch_matcher,
|
||||
precise_matcher)
|
||||
from treeherder.model.models import (BugJobMap,
|
||||
ClassifiedFailure,
|
||||
JobNote,
|
||||
|
@ -39,7 +39,7 @@ def test_classify_test_failure(text_log_errors_failure_lines,
|
|||
(test_line, {"message": "message2"})]
|
||||
test_error_lines, test_failure_lines = create_lines(test_job_2, lines)
|
||||
|
||||
do_autoclassify(test_job_2, test_failure_lines, [PreciseTestMatcher])
|
||||
do_autoclassify(test_job_2, test_failure_lines, [precise_matcher])
|
||||
|
||||
expected_classified = test_error_lines[:2], test_failure_lines[:2]
|
||||
expected_unclassified = test_error_lines[2:], test_failure_lines[2:]
|
||||
|
@ -65,7 +65,7 @@ def test_no_autoclassify_job_success(text_log_errors_failure_lines,
|
|||
(test_line, {"message": "message2"})]
|
||||
test_error_lines, test_failure_lines = create_lines(test_job_2, lines)
|
||||
|
||||
do_autoclassify(test_job_2, test_failure_lines, [PreciseTestMatcher], status="success")
|
||||
do_autoclassify(test_job_2, test_failure_lines, [precise_matcher], status="success")
|
||||
|
||||
expected_classified = [], []
|
||||
expected_unclassified = test_error_lines, test_failure_lines
|
||||
|
@ -89,7 +89,7 @@ def test_autoclassify_update_job_classification(failure_lines, classified_failur
|
|||
lines = [(test_line, {})]
|
||||
_, test_failure_lines = create_lines(test_job_2, lines)
|
||||
|
||||
do_autoclassify(test_job_2, test_failure_lines, [PreciseTestMatcher])
|
||||
do_autoclassify(test_job_2, test_failure_lines, [precise_matcher])
|
||||
|
||||
assert JobNote.objects.filter(job=test_job_2).count() == 1
|
||||
|
||||
|
@ -107,7 +107,7 @@ def test_autoclassify_no_update_job_classification(test_job, test_job_2,
|
|||
line="Some error that isn't in the structured logs",
|
||||
line_number=2)
|
||||
|
||||
do_autoclassify(test_job_2, test_failure_lines, [PreciseTestMatcher])
|
||||
do_autoclassify(test_job_2, test_failure_lines, [precise_matcher])
|
||||
|
||||
assert JobNote.objects.filter(job=test_job_2).count() == 0
|
||||
|
||||
|
@ -200,7 +200,7 @@ def test_classify_skip_ignore(test_job_2,
|
|||
[(test_line, {}),
|
||||
(test_line, {"subtest": "subtest2"})])
|
||||
|
||||
do_autoclassify(test_job_2, test_failure_lines, [PreciseTestMatcher])
|
||||
do_autoclassify(test_job_2, test_failure_lines, [precise_matcher])
|
||||
|
||||
expected_classified = test_failure_lines[:1]
|
||||
expected_unclassified = test_failure_lines[1:]
|
||||
|
@ -222,7 +222,7 @@ def test_classify_es(test_job_2, failure_lines, classified_failures):
|
|||
(test_line, {"status": "TIMEOUT"}),
|
||||
(test_line, {"expected": "ERROR"})])
|
||||
|
||||
do_autoclassify(test_job_2, test_failure_lines, [ElasticSearchTestMatcher])
|
||||
do_autoclassify(test_job_2, test_failure_lines, [elasticsearch_matcher])
|
||||
|
||||
expected_classified = test_failure_lines[:4]
|
||||
expected_unclassified = test_failure_lines[4:]
|
||||
|
@ -242,16 +242,16 @@ def test_classify_multiple(test_job_2, failure_lines, classified_failures):
|
|||
expected_classified_precise = [test_failure_lines[0]]
|
||||
expected_classified_fuzzy = [test_failure_lines[1]]
|
||||
|
||||
do_autoclassify(test_job_2, test_failure_lines, [PreciseTestMatcher,
|
||||
ElasticSearchTestMatcher])
|
||||
do_autoclassify(test_job_2, test_failure_lines, [precise_matcher,
|
||||
elasticsearch_matcher])
|
||||
|
||||
for actual, expected in zip(expected_classified_precise, classified_failures):
|
||||
assert list(actual.error.classified_failures.values_list('id', flat=True)) == [expected.id]
|
||||
assert actual.error.matches.first().matcher_name == "PreciseTestMatcher"
|
||||
assert actual.error.matches.first().matcher_name == "precise_matcher"
|
||||
|
||||
for actual, expected in zip(expected_classified_fuzzy, classified_failures):
|
||||
assert list(actual.error.classified_failures.values_list('id', flat=True)) == [expected.id]
|
||||
assert actual.error.matches.first().matcher_name == "ElasticSearchTestMatcher"
|
||||
assert actual.error.matches.first().matcher_name == "elasticsearch_matcher"
|
||||
|
||||
|
||||
def test_classify_crash(test_repository, test_job, test_job_2, test_matcher):
|
||||
|
@ -269,7 +269,7 @@ def test_classify_crash(test_repository, test_job, test_job_2, test_matcher):
|
|||
classified_failure=classified_failure,
|
||||
matcher_name=test_matcher.__class__.__name__,
|
||||
score=1.0)
|
||||
do_autoclassify(test_job_2, failure_lines, [CrashSignatureMatcher])
|
||||
do_autoclassify(test_job_2, failure_lines, [crash_signature_matcher])
|
||||
|
||||
expected_classified = failure_lines[0:2]
|
||||
expected_unclassified = failure_lines[2:]
|
||||
|
|
|
@ -2,8 +2,8 @@ from decimal import Decimal
|
|||
|
||||
from first import first
|
||||
|
||||
from treeherder.autoclassify.matchers import (PreciseTestMatcher,
|
||||
score_matches)
|
||||
from treeherder.autoclassify.matchers import precise_matcher
|
||||
from treeherder.autoclassify.utils import score_matches
|
||||
from treeherder.model.models import (FailureLine,
|
||||
TextLogErrorMatch,
|
||||
TextLogErrorMetadata)
|
||||
|
@ -12,10 +12,10 @@ from .utils import (create_failure_lines,
|
|||
create_text_log_errors)
|
||||
|
||||
|
||||
def test_precise_test_matcher_with_matches(classified_failures):
|
||||
def test_precise_matcher_with_matches(classified_failures):
|
||||
tle = TextLogErrorMatch.objects.first().text_log_error
|
||||
|
||||
results = PreciseTestMatcher().query_best(tle)
|
||||
results = precise_matcher(tle)
|
||||
score, classified_failure_id = first(results)
|
||||
|
||||
match = tle.matches.first()
|
||||
|
@ -23,7 +23,7 @@ def test_precise_test_matcher_with_matches(classified_failures):
|
|||
assert score == match.score
|
||||
|
||||
|
||||
def test_precise_test_matcher_without_matches(test_job, test_matcher):
|
||||
def test_precise_matcher_without_matches(test_job, test_matcher):
|
||||
# create an error log group to match against
|
||||
data1 = {
|
||||
'action': 'test_result',
|
||||
|
@ -50,7 +50,7 @@ def test_precise_test_matcher_without_matches(test_job, test_matcher):
|
|||
TextLogErrorMetadata.objects.create(text_log_error=tle1, failure_line=failure_line1)
|
||||
TextLogErrorMetadata.objects.create(text_log_error=tle2, failure_line=failure_line2)
|
||||
|
||||
output = PreciseTestMatcher().query_best(tle2)
|
||||
output = precise_matcher(tle2)
|
||||
assert output is None # we should have no matches
|
||||
|
||||
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
import inspect
|
||||
import logging
|
||||
|
||||
from django.db.utils import IntegrityError
|
||||
|
@ -6,7 +7,6 @@ from first import first
|
|||
|
||||
from treeherder.model.models import (Job,
|
||||
JobNote,
|
||||
Matcher,
|
||||
TextLogError,
|
||||
TextLogErrorMatch)
|
||||
|
||||
|
@ -18,6 +18,26 @@ AUTOCLASSIFY_CUTOFF_RATIO = 0.7
|
|||
AUTOCLASSIFY_GOOD_ENOUGH_RATIO = 0.9
|
||||
|
||||
|
||||
def get_matchers():
|
||||
"""
|
||||
Get matcher functions from treeherder.autoclassify.matchers
|
||||
|
||||
We classify matchers as any function treeherder.autoclassify.matchers with
|
||||
a name ending in _matcher. This is currently overkill but protects against
|
||||
the unwarey engineer adding new functions to the matchers module that
|
||||
shouldn't be treated as matchers.
|
||||
"""
|
||||
from . import matchers
|
||||
|
||||
def is_matcher_func(member):
|
||||
return inspect.isfunction(member) and member.__name__.endswith("_matcher")
|
||||
|
||||
members = inspect.getmembers(matchers, is_matcher_func)
|
||||
|
||||
for name, func in members:
|
||||
yield func
|
||||
|
||||
|
||||
def match_errors(job, matchers=None):
|
||||
# Only try to autoclassify where we have a failure status; sometimes there can be
|
||||
# error lines even in jobs marked as passing.
|
||||
|
@ -42,7 +62,7 @@ def match_errors(job, matchers=None):
|
|||
return
|
||||
|
||||
if matchers is None:
|
||||
matchers = Matcher.__subclasses__()
|
||||
matchers = get_matchers()
|
||||
|
||||
try:
|
||||
matches = list(find_best_matches(errors, matchers))
|
||||
|
@ -89,18 +109,16 @@ def find_all_matches(text_log_error, matchers):
|
|||
|
||||
Returns *unsaved* TextLogErrorMatch instances.
|
||||
"""
|
||||
for matcher_class in matchers:
|
||||
matcher = matcher_class()
|
||||
|
||||
for matcher_func in matchers:
|
||||
matches = matcher_func(text_log_error)
|
||||
# matches: iterator of (score, ClassifiedFailure.id)
|
||||
matches = matcher.query_best(text_log_error)
|
||||
if not matches:
|
||||
continue
|
||||
|
||||
for score, classified_failure_id in matches:
|
||||
yield TextLogErrorMatch(
|
||||
score=score,
|
||||
matcher_name=matcher.__class__.__name__,
|
||||
matcher_name=matcher_func.__name__,
|
||||
classified_failure_id=classified_failure_id,
|
||||
text_log_error=text_log_error,
|
||||
)
|
||||
|
|
|
@ -1,15 +1,12 @@
|
|||
from __future__ import division
|
||||
|
||||
import logging
|
||||
from abc import (ABCMeta,
|
||||
abstractmethod)
|
||||
from difflib import SequenceMatcher
|
||||
from itertools import chain
|
||||
|
||||
import newrelic.agent
|
||||
from django.conf import settings
|
||||
from django.db.models import Q
|
||||
from six import add_metaclass
|
||||
|
||||
from treeherder.model.models import TextLogErrorMatch
|
||||
from treeherder.services.elasticsearch import search
|
||||
|
@ -21,179 +18,164 @@ from .utils import (score_matches,
|
|||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@add_metaclass(ABCMeta)
|
||||
class Matcher(object):
|
||||
"""Parent class for Matchers, providing the interface for query_best"""
|
||||
@abstractmethod
|
||||
def query_best(self, text_log_error):
|
||||
"""All child classes must implement this method."""
|
||||
pass
|
||||
@newrelic.agent.function_trace()
|
||||
def precise_matcher(text_log_error):
|
||||
"""Query for TextLogErrorMatches identical to matches of the given TextLogError."""
|
||||
failure_line = text_log_error.metadata.failure_line
|
||||
logger.debug("Looking for test match in failure %d", failure_line.id)
|
||||
|
||||
if failure_line.action != "test_result" or failure_line.message is None:
|
||||
return
|
||||
|
||||
f = {
|
||||
'text_log_error___metadata__failure_line__action': 'test_result',
|
||||
'text_log_error___metadata__failure_line__test': failure_line.test,
|
||||
'text_log_error___metadata__failure_line__subtest': failure_line.subtest,
|
||||
'text_log_error___metadata__failure_line__status': failure_line.status,
|
||||
'text_log_error___metadata__failure_line__expected': failure_line.expected,
|
||||
'text_log_error___metadata__failure_line__message': failure_line.message
|
||||
}
|
||||
qwargs = (
|
||||
Q(text_log_error___metadata__best_classification=None)
|
||||
& (Q(text_log_error___metadata__best_is_verified=True)
|
||||
| Q(text_log_error__step__job=text_log_error.step.job))
|
||||
)
|
||||
qs = (TextLogErrorMatch.objects.filter(**f)
|
||||
.exclude(qwargs)
|
||||
.order_by('-score', '-classified_failure'))
|
||||
|
||||
if not qs:
|
||||
return
|
||||
|
||||
# chunk through the QuerySet because it could potentially be very large
|
||||
# time bound each call to the scoring function to avoid job timeouts
|
||||
# returns an iterable of (score, classified_failure_id) tuples
|
||||
chunks = chunked_qs_reverse(qs, chunk_size=20000)
|
||||
return chain.from_iterable(time_boxed(score_matches, chunks, time_budget=500))
|
||||
|
||||
|
||||
class PreciseTestMatcher(Matcher):
|
||||
"""Matcher that looks for existing failures with identical tests and identical error message."""
|
||||
@newrelic.agent.function_trace()
|
||||
def query_best(self, text_log_error):
|
||||
"""Query for TextLogErrorMatches identical to matches of the given TextLogError."""
|
||||
failure_line = text_log_error.metadata.failure_line
|
||||
logger.debug("Looking for test match in failure %d", failure_line.id)
|
||||
@newrelic.agent.function_trace()
|
||||
def elasticsearch_matcher(text_log_error):
|
||||
"""
|
||||
Query Elasticsearch and score the results.
|
||||
|
||||
if failure_line.action != "test_result" or failure_line.message is None:
|
||||
return
|
||||
Uses a filtered search checking test, status, expected, and the message
|
||||
as a phrase query with non-alphabet tokens removed.
|
||||
"""
|
||||
if not settings.ELASTICSEARCH_URL:
|
||||
return []
|
||||
|
||||
f = {
|
||||
'text_log_error___metadata__failure_line__action': 'test_result',
|
||||
'text_log_error___metadata__failure_line__test': failure_line.test,
|
||||
'text_log_error___metadata__failure_line__subtest': failure_line.subtest,
|
||||
'text_log_error___metadata__failure_line__status': failure_line.status,
|
||||
'text_log_error___metadata__failure_line__expected': failure_line.expected,
|
||||
'text_log_error___metadata__failure_line__message': failure_line.message
|
||||
}
|
||||
qwargs = (
|
||||
Q(text_log_error___metadata__best_classification=None)
|
||||
& (Q(text_log_error___metadata__best_is_verified=True)
|
||||
| Q(text_log_error__step__job=text_log_error.step.job))
|
||||
)
|
||||
qs = (TextLogErrorMatch.objects.filter(**f)
|
||||
.exclude(qwargs)
|
||||
.order_by('-score', '-classified_failure'))
|
||||
failure_line = text_log_error.metadata.failure_line
|
||||
|
||||
if not qs:
|
||||
return
|
||||
if failure_line.action != "test_result" or not failure_line.message:
|
||||
logger.debug("Skipped elasticsearch matching")
|
||||
return
|
||||
|
||||
# chunk through the QuerySet because it could potentially be very large
|
||||
# time bound each call to the scoring function to avoid job timeouts
|
||||
# returns an iterable of (score, classified_failure_id) tuples
|
||||
chunks = chunked_qs_reverse(qs, chunk_size=20000)
|
||||
return chain.from_iterable(time_boxed(score_matches, chunks, time_budget=500))
|
||||
filters = [
|
||||
{'term': {'test': failure_line.test}},
|
||||
{'term': {'status': failure_line.status}},
|
||||
{'term': {'expected': failure_line.expected}},
|
||||
{'exists': {'field': 'best_classification'}}
|
||||
]
|
||||
if failure_line.subtest:
|
||||
query = filters.append({'term': {'subtest': failure_line.subtest}})
|
||||
|
||||
|
||||
class ElasticSearchTestMatcher(Matcher):
|
||||
"""Looks for existing failures using Elasticsearch."""
|
||||
@newrelic.agent.function_trace()
|
||||
def query_best(self, text_log_error):
|
||||
"""
|
||||
Query Elasticsearch and score the results.
|
||||
|
||||
Uses a filtered search checking test, status, expected, and the message
|
||||
as a phrase query with non-alphabet tokens removed.
|
||||
"""
|
||||
if not settings.ELASTICSEARCH_URL:
|
||||
return []
|
||||
|
||||
failure_line = text_log_error.metadata.failure_line
|
||||
|
||||
if failure_line.action != "test_result" or not failure_line.message:
|
||||
logger.debug("Skipped elasticsearch matching")
|
||||
return
|
||||
|
||||
filters = [
|
||||
{'term': {'test': failure_line.test}},
|
||||
{'term': {'status': failure_line.status}},
|
||||
{'term': {'expected': failure_line.expected}},
|
||||
{'exists': {'field': 'best_classification'}}
|
||||
]
|
||||
if failure_line.subtest:
|
||||
query = filters.append({'term': {'subtest': failure_line.subtest}})
|
||||
|
||||
query = {
|
||||
'query': {
|
||||
'bool': {
|
||||
'filter': filters,
|
||||
'must': [{
|
||||
'match_phrase': {
|
||||
'message': failure_line.message[:1024],
|
||||
},
|
||||
}],
|
||||
},
|
||||
query = {
|
||||
'query': {
|
||||
'bool': {
|
||||
'filter': filters,
|
||||
'must': [{
|
||||
'match_phrase': {
|
||||
'message': failure_line.message[:1024],
|
||||
},
|
||||
}],
|
||||
},
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
try:
|
||||
results = search(query)
|
||||
except Exception:
|
||||
logger.error("Elasticsearch lookup failed: %s %s %s %s %s",
|
||||
failure_line.test, failure_line.subtest, failure_line.status,
|
||||
failure_line.expected, failure_line.message)
|
||||
raise
|
||||
try:
|
||||
results = search(query)
|
||||
except Exception:
|
||||
logger.error("Elasticsearch lookup failed: %s %s %s %s %s",
|
||||
failure_line.test, failure_line.subtest, failure_line.status,
|
||||
failure_line.expected, failure_line.message)
|
||||
raise
|
||||
|
||||
if len(results) > 1:
|
||||
args = (
|
||||
text_log_error.id,
|
||||
failure_line.id,
|
||||
len(results),
|
||||
)
|
||||
logger.info('text_log_error=%i failure_line=%i Elasticsearch produced %i results' % args)
|
||||
newrelic.agent.record_custom_event('es_matches', {
|
||||
'num_results': len(results),
|
||||
'text_log_error_id': text_log_error.id,
|
||||
'failure_line_id': failure_line.id,
|
||||
})
|
||||
|
||||
scorer = MatchScorer(failure_line.message)
|
||||
matches = [(item, item['message']) for item in results]
|
||||
best_match = scorer.best_match(matches)
|
||||
if not best_match:
|
||||
return
|
||||
|
||||
score, es_result = best_match
|
||||
# TODO: score all results and return
|
||||
# TODO: just return results with score above cut off?
|
||||
return [(score, es_result['best_classification'])]
|
||||
|
||||
|
||||
class CrashSignatureMatcher(Matcher):
|
||||
"""Matcher that looks for crashes with identical signature."""
|
||||
@newrelic.agent.function_trace()
|
||||
def query_best(self, text_log_error):
|
||||
"""
|
||||
Query for TextLogErrorMatches with the same crash signature.
|
||||
|
||||
Produces two queries, first checking if the same test produces matches
|
||||
and secondly checking without the same test but lowering the produced
|
||||
scores.
|
||||
"""
|
||||
failure_line = text_log_error.metadata.failure_line
|
||||
|
||||
if (failure_line.action != "crash" or
|
||||
failure_line.signature is None or
|
||||
failure_line.signature == "None"):
|
||||
return
|
||||
|
||||
f = {
|
||||
'text_log_error___metadata__failure_line__action': 'crash',
|
||||
'text_log_error___metadata__failure_line__signature': failure_line.signature,
|
||||
}
|
||||
qwargs = (
|
||||
Q(text_log_error___metadata__best_classification=None)
|
||||
& (Q(text_log_error___metadata__best_is_verified=True)
|
||||
| Q(text_log_error__step__job=text_log_error.step.job))
|
||||
if len(results) > 1:
|
||||
args = (
|
||||
text_log_error.id,
|
||||
failure_line.id,
|
||||
len(results),
|
||||
)
|
||||
qs = (TextLogErrorMatch.objects.filter(**f)
|
||||
.exclude(qwargs)
|
||||
.select_related('text_log_error', 'text_log_error___metadata')
|
||||
.order_by('-score', '-classified_failure'))
|
||||
logger.info('text_log_error=%i failure_line=%i Elasticsearch produced %i results' % args)
|
||||
newrelic.agent.record_custom_event('es_matches', {
|
||||
'num_results': len(results),
|
||||
'text_log_error_id': text_log_error.id,
|
||||
'failure_line_id': failure_line.id,
|
||||
})
|
||||
|
||||
size = 20000
|
||||
time_budget = 500
|
||||
scorer = MatchScorer(failure_line.message)
|
||||
matches = [(item, item['message']) for item in results]
|
||||
best_match = scorer.best_match(matches)
|
||||
if not best_match:
|
||||
return
|
||||
|
||||
# See if we can get any matches when filtering by the same test
|
||||
first_attempt = qs.filter(text_log_error___metadata__failure_line__test=failure_line.test)
|
||||
chunks = chunked_qs_reverse(first_attempt, chunk_size=size)
|
||||
scored_matches = chain.from_iterable(time_boxed(score_matches, chunks, time_budget))
|
||||
if scored_matches:
|
||||
return scored_matches
|
||||
score, es_result = best_match
|
||||
# TODO: score all results and return
|
||||
# TODO: just return results with score above cut off?
|
||||
return [(score, es_result['best_classification'])]
|
||||
|
||||
# try again without filtering to the test but applying a .8 score multiplyer
|
||||
chunks = chunked_qs_reverse(qs, chunk_size=size)
|
||||
scored_matches = chain.from_iterable(time_boxed(
|
||||
score_matches,
|
||||
chunks,
|
||||
time_budget,
|
||||
score_multiplier=(8, 10),
|
||||
))
|
||||
|
||||
@newrelic.agent.function_trace()
|
||||
def crash_signature_matcher(text_log_error):
|
||||
"""
|
||||
Query for TextLogErrorMatches with the same crash signature.
|
||||
|
||||
Produces two queries, first checking if the same test produces matches
|
||||
and secondly checking without the same test but lowering the produced
|
||||
scores.
|
||||
"""
|
||||
failure_line = text_log_error.metadata.failure_line
|
||||
|
||||
if (failure_line.action != "crash" or
|
||||
failure_line.signature is None or
|
||||
failure_line.signature == "None"):
|
||||
return
|
||||
|
||||
f = {
|
||||
'text_log_error___metadata__failure_line__action': 'crash',
|
||||
'text_log_error___metadata__failure_line__signature': failure_line.signature,
|
||||
}
|
||||
qwargs = (
|
||||
Q(text_log_error___metadata__best_classification=None)
|
||||
& (Q(text_log_error___metadata__best_is_verified=True)
|
||||
| Q(text_log_error__step__job=text_log_error.step.job))
|
||||
)
|
||||
qs = (TextLogErrorMatch.objects.filter(**f)
|
||||
.exclude(qwargs)
|
||||
.select_related('text_log_error', 'text_log_error___metadata')
|
||||
.order_by('-score', '-classified_failure'))
|
||||
|
||||
size = 20000
|
||||
time_budget = 500
|
||||
|
||||
# See if we can get any matches when filtering by the same test
|
||||
first_attempt = qs.filter(text_log_error___metadata__failure_line__test=failure_line.test)
|
||||
chunks = chunked_qs_reverse(first_attempt, chunk_size=size)
|
||||
scored_matches = chain.from_iterable(time_boxed(score_matches, chunks, time_budget))
|
||||
if scored_matches:
|
||||
return scored_matches
|
||||
|
||||
# try again without filtering to the test but applying a .8 score multiplyer
|
||||
chunks = chunked_qs_reverse(qs, chunk_size=size)
|
||||
scored_matches = chain.from_iterable(time_boxed(
|
||||
score_matches,
|
||||
chunks,
|
||||
time_budget,
|
||||
score_multiplier=(8, 10),
|
||||
))
|
||||
return scored_matches
|
||||
|
||||
|
||||
class MatchScorer(object):
|
||||
"""Simple scorer for similarity of strings based on python's difflib SequenceMatcher."""
|
||||
|
|
Загрузка…
Ссылка в новой задаче