зеркало из https://github.com/mozilla/treeherder.git
Fix bug suggestions with PostgreSQL (#7988)
* Use trigram similarity instead of FTS * Skip escaping special characters * Order results by match ranking on Postgres --------- Co-authored-by: Bastien Abadie <bastien@nextcairn.com> Co-authored-by: Sebastian Hengst <aryx.github@gmx-topmail.de>
This commit is contained in:
Родитель
ccd8a1bc25
Коммит
931e8bbaa7
|
@ -1,10 +1,11 @@
|
|||
import pytest
|
||||
from unittest.mock import patch
|
||||
|
||||
from tests.test_utils import add_log_response
|
||||
from treeherder.etl.jobs import store_job_data
|
||||
from treeherder.etl.push import store_push_data
|
||||
from treeherder.model.error_summary import get_error_summary
|
||||
from treeherder.model.models import Job, TextLogError
|
||||
from treeherder.model.error_summary import get_error_summary, bug_suggestions_line
|
||||
from treeherder.model.models import Job, TextLogError, Bugscache
|
||||
|
||||
from ..sampledata import SampleData
|
||||
|
||||
|
@ -62,3 +63,86 @@ def test_create_error_summary(
|
|||
)
|
||||
for failure_line in bug_suggestions:
|
||||
assert set(failure_line.keys()) == expected_keys
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@patch(
|
||||
"treeherder.model.error_summary.get_error_search_term_and_path",
|
||||
return_value={
|
||||
"search_term": ["browser_dbg-pretty-print-inline-scripts.js"],
|
||||
"path_end": "devtools/client/debugger/test/mochitest/browser_dbg-pretty-print-inline-scripts.js",
|
||||
},
|
||||
)
|
||||
def test_bug_suggestion_line(
|
||||
search_mock, failure_classifications, jobs_with_local_log, sample_push, test_repository
|
||||
):
|
||||
"""
|
||||
A test to verify similarity of search term (often test name) derived from
|
||||
the failure line and bug summary gets taken into account. If it is equal
|
||||
for every bug, the expected result won't be returned by the query because
|
||||
of its higher bug ID.
|
||||
"""
|
||||
store_push_data(test_repository, sample_push)
|
||||
for job in jobs_with_local_log:
|
||||
job["job"]["result"] = "testfailed"
|
||||
job["revision"] = sample_push[0]["revision"]
|
||||
store_job_data(test_repository, jobs_with_local_log)
|
||||
|
||||
job = Job.objects.get(id=1)
|
||||
|
||||
Bugscache.objects.create(
|
||||
id=1775819,
|
||||
status="2",
|
||||
keywords="intermittent-failure,regression,test-verify-fail",
|
||||
whiteboard="[retriggered][stockwell unknown]",
|
||||
summary=(
|
||||
"Intermittent devtools/client/debugger/test/mochitest/browser_dbg-pretty-print-inline-scripts.js "
|
||||
"| single tracking bug"
|
||||
),
|
||||
modified="2010-01-01 00:00:00",
|
||||
)
|
||||
|
||||
# Create 50 other results with an inferior ID.
|
||||
# The bug suggestions SQL query fetches up to 50 rows, ordered by match rank then ID.
|
||||
# In case results are returned with a wrong rank (e.g. 0 for each result), above related suggestion will be lost.
|
||||
Bugscache.objects.bulk_create(
|
||||
[
|
||||
Bugscache(
|
||||
id=100 + i,
|
||||
status="2",
|
||||
keywords="intermittent-failure,intermittent-testcase",
|
||||
summary=(
|
||||
f"Intermittent devtools/client/debugger/test/mochitest/browser_unrelated-{i}.js "
|
||||
"| single tracking bug"
|
||||
),
|
||||
modified="2010-01-01 00:00:00",
|
||||
)
|
||||
for i in range(50)
|
||||
]
|
||||
)
|
||||
|
||||
error = job.text_log_error.first()
|
||||
summary, line_cache = bug_suggestions_line(
|
||||
error,
|
||||
project=job.repository,
|
||||
logdate=job.submit_time,
|
||||
term_cache={},
|
||||
line_cache={str(job.submit_time.date()): {}},
|
||||
revision=job.push.revision,
|
||||
)
|
||||
assert summary["bugs"]["open_recent"] == [
|
||||
{
|
||||
"crash_signature": "",
|
||||
"dupe_of": None,
|
||||
"id": 1775819,
|
||||
"keywords": "intermittent-failure,regression,test-verify-fail",
|
||||
"resolution": "",
|
||||
"status": "2",
|
||||
"whiteboard": "[retriggered][stockwell unknown]",
|
||||
"summary": (
|
||||
"Intermittent "
|
||||
"devtools/client/debugger/test/mochitest/browser_dbg-pretty-print-inline-scripts.js "
|
||||
"| single tracking bug"
|
||||
),
|
||||
}
|
||||
]
|
||||
|
|
|
@ -135,7 +135,7 @@
|
|||
{
|
||||
"status": "NEW",
|
||||
"id": 1054669,
|
||||
"summary": "Intermittent test_switch_frame.py TestSwitchFrame.test_should_be_able_to_carry_on_working_if_the_frame_is_deleted_from_under_us | TimeoutException: TimeoutException: Connection timed out",
|
||||
"summary": "Intermittent test_switch_frame.py TestSwitchFrame.test_should_be_able_to_carry_on_working_if_the_frame_is_deleted_from_under_us | TimeoutException",
|
||||
"dupe_of": null,
|
||||
"duplicates": [],
|
||||
"cf_crash_signature": "",
|
||||
|
|
|
@ -0,0 +1,13 @@
|
|||
# Generated by Django 4.1.13 on 2024-03-25 16:15
|
||||
|
||||
from django.db import migrations
|
||||
from django.contrib.postgres.operations import TrigramExtension
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
("model", "0030_group_durations"),
|
||||
]
|
||||
|
||||
operations = [TrigramExtension()]
|
|
@ -12,12 +12,12 @@ warnings.filterwarnings("ignore", category=DeprecationWarning, module="newrelic"
|
|||
import newrelic.agent
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import User
|
||||
from django.contrib.postgres.search import SearchQuery, SearchRank, SearchVector
|
||||
from django.core.cache import cache
|
||||
from django.core.exceptions import ObjectDoesNotExist
|
||||
from django.core.validators import MinLengthValidator
|
||||
from django.db import models, transaction
|
||||
from django.db.models import Count, Max, Min, Q, Subquery
|
||||
from django.contrib.postgres.search import TrigramSimilarity
|
||||
from django.db.utils import ProgrammingError
|
||||
from django.forms import model_to_dict
|
||||
from django.utils import timezone
|
||||
|
@ -248,11 +248,11 @@ class Bugscache(models.Model):
|
|||
def search(cls, search_term):
|
||||
max_size = 50
|
||||
|
||||
# Do not wrap a string in quotes to search as a phrase;
|
||||
# see https://bugzilla.mozilla.org/show_bug.cgi?id=1704311
|
||||
search_term_fulltext = cls.sanitized_search_term(search_term)
|
||||
|
||||
if settings.DATABASES["default"]["ENGINE"] == "django.db.backends.mysql":
|
||||
# Do not wrap a string in quotes to search as a phrase;
|
||||
# see https://bugzilla.mozilla.org/show_bug.cgi?id=1704311
|
||||
search_term_fulltext = cls.sanitized_search_term(search_term)
|
||||
|
||||
# Substitute escape and wildcard characters, so the search term is used
|
||||
# literally in the LIKE statement.
|
||||
search_term_like = (
|
||||
|
@ -275,12 +275,16 @@ class Bugscache(models.Model):
|
|||
[search_term_fulltext, search_term_like, max_size],
|
||||
)
|
||||
else:
|
||||
# On PostgreSQL we can use the full text search features
|
||||
vector = SearchVector("summary")
|
||||
query = SearchQuery(search_term_fulltext)
|
||||
recent_qs = Bugscache.objects.annotate(rank=SearchRank(vector, query)).order_by(
|
||||
"-rank", "id"
|
||||
)[0:max_size]
|
||||
# On PostgreSQL we can use the ORM directly, but NOT the full text search
|
||||
# as the ranking algorithm expects english words, not paths
|
||||
# So we use standard pattern matching AND trigram similarity to compare suite of characters
|
||||
# instead of words
|
||||
# Django already escapes special characters, so we do not need to handle that here
|
||||
recent_qs = (
|
||||
Bugscache.objects.filter(summary__icontains=search_term)
|
||||
.annotate(similarity=TrigramSimilarity("summary", search_term))
|
||||
.order_by("-similarity")[0:max_size]
|
||||
)
|
||||
|
||||
exclude_fields = ["modified", "processed_update"]
|
||||
try:
|
||||
|
|
Загрузка…
Ссылка в новой задаче