Bug 1719848 - search for test file name during generation of bug suggestions must match it at the start of the file name (#7206)

There are short or common test file names like 001.html. When Treeherder tries
to generate bug suggestions for that, it searches the intermittent bugs for
001.html in the summary which also matches other-test-001.html. If too many
bugs are returned (>20), Treeherder won't suggest any bugs.

By matching on path boundaries (/, \), whitespace () and list separators (,),
the other test files won't be matched. Because adding these rules to the SQL
query yields a slower method than filtering the wrong positive bugs out
afterwards, the latter method gets applied. This keeps the risk the SQL query
will not return all matches (limited at 50 lines) and has to be reevaluated if
it turns into an issue.
This commit is contained in:
Sebastian Hengst 2021-07-14 02:33:26 +02:00 коммит произвёл GitHub
Родитель 4f0e881d00
Коммит 54be39dc7f
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
4 изменённых файлов: 119 добавлений и 4 удалений

Просмотреть файл

@ -10,8 +10,8 @@ def test_bz_api_process(mock_bugzilla_api_request):
process.run()
# the number of rows inserted should equal to the number of bugs
assert Bugscache.objects.count() == 17
assert Bugscache.objects.count() == 25
# test that a second ingestion of the same bugs doesn't insert new rows
process.run()
assert Bugscache.objects.count() == 17
assert Bugscache.objects.count() == 25

Просмотреть файл

@ -50,6 +50,11 @@ BUG_SEARCHES = (
("should not be match_d", []),
("should not be match%d", []),
("should not be matche=d", []),
("standalone-without-folder.html", [1690234]),
("slash-folder.html", [1690235]),
("backslash.html", [1690236]),
("listitem-001.html", [1690345]),
("match-at-start.html", [1690456]),
)

Просмотреть файл

@ -176,6 +176,94 @@
"last_change_time": "2016-07-25T01:04:15Z",
"whiteboard": "text"
},
{
"status": "NEW",
"id": 1690123,
"summary": "Intermittent different-standalone-without-folder.html | whitespace separated and trailing end of file name: no match",
"cf_crash_signature": "",
"keywords": ["intermittent-failure"],
"resolution": "",
"op_sys": "Unspecified",
"last_change_time": "2016-07-25T01:04:15Z",
"whiteboard": "text"
},
{
"status": "NEW",
"id": 1690234,
"summary": "Intermittent standalone-without-folder.html | whitespace separated and full file name: match",
"cf_crash_signature": "",
"keywords": ["intermittent-failure"],
"resolution": "",
"op_sys": "Unspecified",
"last_change_time": "2016-07-25T01:04:15Z",
"whiteboard": "text"
},
{
"status": "NEW",
"id": 1690124,
"summary": "Intermittent mock-folder-1/different-start-slash-folder.html | slash no match",
"cf_crash_signature": "",
"keywords": ["intermittent-failure"],
"resolution": "",
"op_sys": "Unspecified",
"last_change_time": "2016-07-25T01:04:15Z",
"whiteboard": "text"
},
{
"status": "NEW",
"id": 1690235,
"summary": "Intermittent mock-folder-2/slash-folder.html | slash match",
"cf_crash_signature": "",
"keywords": ["intermittent-failure"],
"resolution": "",
"op_sys": "Unspecified",
"last_change_time": "2016-07-25T01:04:15Z",
"whiteboard": "text"
},
{
"status": "NEW",
"id": 1690125,
"summary": "Intermittent mock-folder-3\\different-start-backslash.html | backslash no match",
"cf_crash_signature": "",
"keywords": ["intermittent-failure"],
"resolution": "",
"op_sys": "Unspecified",
"last_change_time": "2016-07-25T01:04:15Z",
"whiteboard": "text"
},
{
"status": "NEW",
"id": 1690236,
"summary": "Intermittent mock-folder-4\\backslash.html | backslash match",
"cf_crash_signature": "",
"keywords": ["intermittent-failure"],
"resolution": "",
"op_sys": "Unspecified",
"last_change_time": "2016-07-25T01:04:15Z",
"whiteboard": "text"
},
{
"status": "NEW",
"id": 1690345,
"summary": "Intermittent listitem-000.html,listitem-001.html,listitem-002.html | in comma separated list",
"cf_crash_signature": "",
"keywords": ["intermittent-failure"],
"resolution": "",
"op_sys": "Unspecified",
"last_change_time": "2016-07-25T01:04:15Z",
"whiteboard": "text"
},
{
"status": "NEW",
"id": 1690456,
"summary": "match-at-start.html | match at start",
"cf_crash_signature": "",
"keywords": ["intermittent-failure"],
"resolution": "",
"op_sys": "Unspecified",
"last_change_time": "2016-07-25T01:04:15Z",
"whiteboard": "text"
},
{
"status": "NEW",
"id": 100,

Просмотреть файл

@ -273,7 +273,18 @@ class Bugscache(models.Model):
)
try:
open_recent = [model_to_dict(item, exclude=["modified"]) for item in recent_qs]
open_recent_match_string = [
model_to_dict(item, exclude=["modified"]) for item in recent_qs
]
open_recent = [
match
for match in open_recent_match_string
if match["summary"].startswith(search_term)
or "/" + search_term in match["summary"]
or " " + search_term in match["summary"]
or "\\" + search_term in match["summary"]
or "," + search_term in match["summary"]
]
except ProgrammingError as e:
newrelic.agent.record_exception()
logger.error(
@ -298,7 +309,18 @@ class Bugscache(models.Model):
)
try:
all_others = [model_to_dict(item, exclude=["modified"]) for item in all_others_qs]
all_others_match_string = [
model_to_dict(item, exclude=["modified"]) for item in all_others_qs
]
all_others = [
match
for match in all_others_match_string
if match["summary"].startswith(search_term)
or "/" + search_term in match["summary"]
or " " + search_term in match["summary"]
or "\\" + search_term in match["summary"]
or "," + search_term in match["summary"]
]
except ProgrammingError as e:
newrelic.agent.record_exception()
logger.error(