Bug 1698534 - Go back to properly doing exact matching for quick suggest, and rewrite flatten logic. r=daleharvey

The matching logic can be simplified since we want exact matches and not prefix
matches. There's no need to confirm that the query string is in a candidate
result's list of keywords because by its nature the process of looking up the
query in the tree confirms that the query is equal to some keyword. If the
lookup finds a result, then necessarily the query matches a keyword.

I changed lookup from recursive to iterative. It might be a little faster and
it's not really harder to understand.

I also moved the full keyword computation out of the keyword tree and into
UrlbarQuickSuggest. It was a little hacky how KeywordTree accessed
`UrlbarQuickSuggest._results` during its lookup, and it forced the test to have
to set up `UrlbarQuickSuggest._results`. And big picture, it's not the keyword
tree's job to compute the full keyword/suggestion.

While working on all of this, `test_flatten` started failing when it calls
`basicChecks` because the flattened tree wasn't correct, so I rewrote and
simplified the flatten routine. It's no longer necessary for `RESULT_KEY` to be
in a map all by itself. Instead, it's treated like an ordinary char in the
context of flattening, so now it's more like a sentinel or suffix than a key.

Finally, I removed the "test1" and "test2" data in the test because I added them
when I thought they tested bug 1697678, but they don't actually. I don't think
they're testing anything useful.

Depends on D108683

Differential Revision: https://phabricator.services.mozilla.com/D108564
This commit is contained in:
Drew Willcoxon 2021-03-16 23:46:43 +00:00
Родитель 7d9b571b77
Коммит ce91914029
2 изменённых файлов: 272 добавлений и 222 удалений

Просмотреть файл

@ -69,11 +69,14 @@ class Suggestions {
async query(phrase) {
log.info("Handling query for", phrase);
phrase = phrase.toLowerCase();
let match = this._tree.get(phrase);
if (!match.result || !this._results.has(match.result)) {
let resultID = this._tree.get(phrase);
if (resultID === null) {
return null;
}
let result = this._results.get(resultID);
if (!result) {
return null;
}
let result = this._results.get(match.result);
let d = new Date();
let pad = number => number.toString().padStart(2, "0");
let date =
@ -81,7 +84,7 @@ class Suggestions {
`${pad(d.getDate())}${pad(d.getHours())}`;
let icon = await this.fetchIcon(result.icon);
return {
fullKeyword: match.fullKeyword,
fullKeyword: this.getFullKeyword(phrase, result.keywords),
title: result.title,
url: result.url.replace("%YYYYMMDDHH%", date),
click_url: result.click_url.replace("%YYYYMMDDHH%", date),
@ -94,6 +97,58 @@ class Suggestions {
};
}
/**
* Gets the full keyword (i.e., suggestion) for a result and query. The data
* doesn't include full keywords, so we make our own based on the result's
* keyword phrases and a particular query. We use two heuristics:
*
* (1) Find the first keyword phrase that has more words than the query. Use
* its first `queryWords.length` words as the full keyword. e.g., if the
* query is "moz" and `result.keywords` is ["moz", "mozi", "mozil",
* "mozill", "mozilla", "mozilla firefox"], pick "mozilla firefox", pop
* off the "firefox" and use "mozilla" as the full keyword.
* (2) If there isn't any keyword phrase with more words, then pick the
* longest phrase. e.g., pick "mozilla" in the previous example (assuming
* the "mozilla firefox" phrase isn't there). That might be the query
* itself.
*
* @param {string} query
* The query string that matched `result`.
* @param {array} keywords
* An array of result keywords.
* @returns {string}
* The full keyword.
*/
getFullKeyword(query, keywords) {
let longerPhrase;
let trimmedQuery = query.trim();
let queryWords = trimmedQuery.split(" ");
for (let phrase of keywords) {
if (phrase.startsWith(query)) {
let trimmedPhrase = phrase.trim();
let phraseWords = trimmedPhrase.split(" ");
// As an exception to (1), if the query ends with a space, then look for
// phrases with one more word so that the suggestion includes a word
// following the space.
let extra = query.endsWith(" ") ? 1 : 0;
let len = queryWords.length + extra;
if (len < phraseWords.length) {
// We found a phrase with more words.
return phraseWords.slice(0, len).join(" ");
}
if (
query.length < phrase.length &&
(!longerPhrase || longerPhrase.length < trimmedPhrase.length)
) {
// We found a longer phrase with the same number of words.
longerPhrase = trimmedPhrase;
}
}
}
return longerPhrase || trimmedQuery;
}
/*
* Called if a Urlbar preference is changed.
*/
@ -244,109 +299,41 @@ class KeywordTree {
*
* @param {string} query
* The query string.
* @returns {object}
* The match object: { result, fullKeyword }, where `result` is the matching
* result ID and `fullKeyword` is the suggestion. If there is no match,
* then `result` will be null and `fullKeyword` will not be defined.
* @returns {*}
* The matching result in the tree or null if there isn't a match.
*/
get(query) {
query = query.trimStart();
let match = this._getMatch(query, this.tree, "");
if (!match) {
return { result: null };
}
let result = UrlbarQuickSuggest._results.get(match.resultID);
if (!result) {
return { result: null };
}
let longerPhrase;
let trimmedQuery = query.trim();
let queryWords = trimmedQuery.split(" ");
// We need to determine a suggestion for the result (called `fullKeyword` in
// the returned object). The data doesn't include suggestions, so we'll
// need to make our own based on the keyword phrases in the result. First,
// find a decent matching phrase. We'll use two heuristics:
//
// (1) Find the first phrase that has more words than the query. Use its
// first `queryWords.length` words as the suggestion. e.g., if the
// query is "moz" and `result.keywords` is ["moz", "mozi", "mozil",
// "mozill", "mozilla", "mozilla firefox"], pick "mozilla firefox", pop
// off the "firefox" and use "mozilla" as the suggestion.
// (2) If there isn't any phrase with more words, then pick the longest
// phrase. e.g., pick "mozilla" in the previous example (assuming the
// "mozilla firefox" phrase isn't there).
for (let phrase of result.keywords) {
if (phrase.startsWith(query)) {
let trimmedPhrase = phrase.trim();
let phraseWords = trimmedPhrase.split(" ");
// As an exception to (1), if the query ends with a space, then look for
// phrases with one more word so that the suggestion includes a word
// following the space.
let extra = query.endsWith(" ") ? 1 : 0;
let len = queryWords.length + extra;
if (len < phraseWords.length) {
// We found a phrase with more words.
return {
result: match.resultID,
fullKeyword: phraseWords.slice(0, len).join(" "),
};
}
if (
query.length < phrase.length &&
(!longerPhrase || longerPhrase.length < trimmedPhrase.length)
) {
// We found a longer phrase with the same number of words.
longerPhrase = trimmedPhrase;
}
}
}
return {
result: match.resultID,
fullKeyword: longerPhrase || trimmedQuery,
};
}
/**
* Recursively looks up a match in the tree.
*
* @param {string} query
* The query string.
* @param {Map} node
* The node to start the search at.
* @param {string} phrase
* The current phrase key path through the tree.
* @returns {object}
* The match object: { resultID }, or null if no match was found.
*/
_getMatch(query, node, phrase) {
for (const [key, child] of node.entries()) {
if (key == RESULT_KEY) {
continue;
}
let newPhrase = phrase + key;
let len = Math.min(newPhrase.length, query.length);
if (newPhrase.substring(0, len) == query.substring(0, len)) {
// The new phrase is a prefix of the query or vice versa.
let resultID = child.get(RESULT_KEY);
if (resultID !== undefined) {
// This child has a result. Look it up to see if its keyword phrases
// match the query.
let result = UrlbarQuickSuggest._results.get(resultID);
if (result?.keywords.includes(query)) {
return { resultID };
query = query.trimStart() + RESULT_KEY;
let node = this.tree;
let phrase = "";
while (phrase.length < query.length) {
// First, assume the tree isn't flattened and try to look up the next char
// in the query.
let key = query[phrase.length];
let child = node.get(key);
if (!child) {
// Not found, so fall back to looking through all of the node's keys.
key = null;
for (let childKey of node.keys()) {
let childPhrase = phrase + childKey;
if (childPhrase == query.substring(0, childPhrase.length)) {
key = childKey;
break;
}
}
// Recurse into descendants and return any match.
let match = this._getMatch(query, child, newPhrase);
if (match) {
return match;
if (!key) {
return null;
}
child = node.get(key);
}
node = child;
phrase += key;
}
return null;
if (phrase.length != query.length) {
return null;
}
// At this point, `node` is the found result.
return node;
}
/*
@ -357,34 +344,35 @@ class KeywordTree {
* be flattened to ["he", ["llo"]].
*/
flatten() {
for (let key of Array.from(this.tree.keys())) {
this._flatten(this.tree, key);
}
this._flatten("", this.tree, null);
}
_flatten(parent, key) {
let tree = parent.get(key);
let keys = Array.from(tree.keys()).filter(k => k != RESULT_KEY);
let result = tree.get(RESULT_KEY);
if (keys.length == 1) {
let childKey = keys[0];
let child = tree.get(childKey);
let childResult = child.get(RESULT_KEY);
if (result == childResult) {
let newKey = key + childKey;
parent.set(newKey, child);
parent.delete(key);
this._flatten(parent, newKey);
} else {
this._flatten(tree, childKey);
}
} else {
for (let k of keys) {
this._flatten(tree, k);
/**
* Recursive flatten() helper.
*
* @param {string} key
* The key for `node` in `parent`.
* @param {Map} node
* The currently visited node.
* @param {Map} parent
* The parent of `node`, or null if `node` is the root.
*/
_flatten(key, node, parent) {
// Flatten the node's children. We need to store node.entries() in an array
// rather than iterating over them directly because _flatten() can modify
// them during iteration.
for (let [childKey, child] of [...node.entries()]) {
if (childKey != RESULT_KEY) {
this._flatten(childKey, child, node);
}
}
// If the node has a single child, then replace the node in `parent` with
// the child.
if (node.size == 1 && parent) {
parent.delete(key);
let childKey = [...node.keys()][0];
parent.set(key + childKey, node.get(childKey));
}
}
/*

Просмотреть файл

@ -12,23 +12,14 @@ XPCOMUtils.defineLazyModuleGetters(this, {
let data = [
{
term: "helzo foo",
keywords: ["hel", "helz", "helzo", "helzo f", "helzo fo"],
keywords: ["hel", "helz", "helzo", "helzo f", "helzo fo", "helzo foo"],
},
{
term: "helzo bar",
keywords: ["helzo b", "helzo ba"],
},
{
term: "test1",
keywords: ["aaa", "aaab", "aaa111", "aaab111"],
},
{
term: "test2",
keywords: ["aaa", "aaab", "aaa222", "aaab222"],
keywords: ["helzo b", "helzo ba", "helzo bar"],
},
// These two "test10" and "test11" objects must be in the following order to
// test bug 1697678.
// test10 and test11 must be in the following order to test bug 1697678.
{ term: "test10", keywords: ["kids sn", "kids sneakers"] },
{ term: "test11", keywords: ["ki", "kin", "kind", "kindl", "kindle"] },
@ -39,63 +30,143 @@ let data = [
{ term: "test13", keywords: ["webs", "websit"] },
];
let dataByTerm = data.reduce((map, record) => {
map[record.term] = record;
return map;
}, {});
function basicChecks(tree) {
UrlbarQuickSuggest._results = new Map();
for (let { term, keywords } of data) {
UrlbarQuickSuggest._results.set(term, { keywords: keywords.concat(term) });
let tests = [
{
query: "nomatch",
term: null,
},
{
query: "he",
term: null,
},
{
query: "hel",
term: "helzo foo",
fullKeyword: "helzo",
},
{
query: "helzo",
term: "helzo foo",
fullKeyword: "helzo",
},
{
query: "helzo ",
term: null,
},
{
query: "helzo f",
term: "helzo foo",
fullKeyword: "helzo foo",
},
{
query: "helzo foo",
term: "helzo foo",
fullKeyword: "helzo foo",
},
{
query: "helzo b",
term: "helzo bar",
fullKeyword: "helzo bar",
},
{
query: "helzo bar",
term: "helzo bar",
fullKeyword: "helzo bar",
},
{
query: "f",
term: null,
},
{
query: "fo",
term: null,
},
{
query: "foo",
term: null,
},
{
query: "ki",
term: "test11",
fullKeyword: "kindle",
},
{
query: "kin",
term: "test11",
fullKeyword: "kindle",
},
{
query: "kind",
term: "test11",
fullKeyword: "kindle",
},
{
query: "kid",
term: null,
},
{
query: "kids",
term: null,
},
{
query: "kids ",
term: null,
},
{
query: "kids s",
term: null,
},
{
query: "kids sn",
term: "test10",
fullKeyword: "kids sneakers",
},
{
query: "web",
term: null,
},
{
query: "webs",
term: "test13",
fullKeyword: "websit",
},
{
query: "websi",
term: "test12",
fullKeyword: "websi",
},
{
query: "websit",
term: "test13",
fullKeyword: "websit",
},
{
query: "website",
term: null,
},
];
for (let test of tests) {
info("Checking " + JSON.stringify(test));
let { query, term, fullKeyword } = test;
Assert.equal(tree.get(query), term);
if (term) {
Assert.equal(
UrlbarQuickSuggest.getFullKeyword(query, dataByTerm[term].keywords),
fullKeyword
);
}
}
Assert.equal(tree.get("nomatch").result, null);
Assert.equal(tree.get("he").result, null);
Assert.equal(tree.get("hel").result, "helzo foo");
Assert.equal(tree.get("hel").fullKeyword, "helzo");
Assert.equal(tree.get("helzo").result, "helzo foo");
Assert.equal(tree.get("helzo").fullKeyword, "helzo");
Assert.equal(tree.get("helzo ").result, null);
Assert.equal(tree.get("helzo foo").result, "helzo foo");
Assert.equal(tree.get("helzo b").result, "helzo bar");
Assert.equal(tree.get("helzo b").fullKeyword, "helzo bar");
Assert.equal(tree.get("helzo bar").result, "helzo bar");
Assert.equal(tree.get("f").result, null);
Assert.equal(tree.get("fo").result, null);
Assert.equal(tree.get("foo").result, null);
Assert.equal(tree.get("b").result, null);
Assert.equal(tree.get("ba").result, null);
Assert.equal(tree.get("bar").result, null);
Assert.equal(tree.get("aa").result, null);
Assert.equal(tree.get("aaa").result, "test2");
Assert.equal(tree.get("aaa").fullKeyword, "aaab222");
Assert.equal(tree.get("aaab").result, "test2");
Assert.equal(tree.get("aaab").fullKeyword, "aaab222");
Assert.equal(tree.get("aaab1").result, null);
Assert.equal(tree.get("aaab2").result, null);
Assert.equal(tree.get("aaa1").result, null);
Assert.equal(tree.get("aaa2").result, null);
Assert.equal(tree.get("ki").result, "test11");
Assert.equal(tree.get("ki").fullKeyword, "kindle");
Assert.equal(tree.get("kin").result, "test11");
Assert.equal(tree.get("kin").fullKeyword, "kindle");
Assert.equal(tree.get("kind").result, "test11");
Assert.equal(tree.get("kind").fullKeyword, "kindle");
Assert.equal(tree.get("kid").result, null);
Assert.equal(tree.get("kids").result, null);
Assert.equal(tree.get("kids ").result, null);
Assert.equal(tree.get("kids s").result, null);
Assert.equal(tree.get("kids sn").result, "test10");
Assert.equal(tree.get("kids sn").fullKeyword, "kids sneakers");
Assert.equal(tree.get("web").result, null);
Assert.equal(tree.get("webs").result, "test13");
Assert.equal(tree.get("webs").fullKeyword, "websit");
Assert.equal(tree.get("websi").result, "test12");
Assert.equal(tree.get("websi").fullKeyword, "websi");
Assert.equal(tree.get("websit").result, "test13");
Assert.equal(tree.get("websit").fullKeyword, "websit");
Assert.equal(tree.get("website").result, null);
}
function createTree() {
@ -103,7 +174,6 @@ function createTree() {
for (let { term, keywords } of data) {
keywords.forEach(keyword => tree.set(keyword, term));
tree.set(term, term);
}
return tree;
}
@ -125,42 +195,34 @@ add_task(async function test_flatten() {
Assert.deepEqual(
{
k: {
i: {
"^": "test11",
"ds s": { n: { "^": "test10", eaker: { s: { "^": "test10" } } } },
ndle: { "^": "test11" },
},
},
he: {
lzo: {
hel: {
"^": "helzo foo",
z: {
"^": "helzo foo",
" ": { foo: { "^": "helzo foo" }, bar: { "^": "helzo bar" } },
},
},
aa: {
a: {
"11": { "1": { "^": "test1" } },
"22": { "2": { "^": "test2" } },
"^": "test2",
b: {
"11": { "1": { "^": "test1" } },
"22": { "2": { "^": "test2" } },
"^": "test2",
o: {
"^": "helzo foo",
" ": {
f: {
"^": "helzo foo",
o: { "^": "helzo foo", "o^": "helzo foo" },
},
b: {
"^": "helzo bar",
a: { "^": "helzo bar", "r^": "helzo bar" },
},
},
},
},
},
test: {
"1": {
"0": { "^": "test10" },
"1": { "^": "test11" },
"2": { "^": "test12" },
"3": { "^": "test13" },
"^": "test1",
ki: {
"^": "test11",
n: {
"^": "test11",
d: { "^": "test11", l: { "^": "test11", "e^": "test11" } },
},
"2": { "^": "test2" },
"ds sn": { "^": "test10", "eakers^": "test10" },
},
web: { s: { i: { "^": "test12", t: { "^": "test13" } }, "^": "test13" } },
webs: { i: { "^": "test12", "t^": "test13" }, "^": "test13" },
},
tree.toJSONObject()
);