Bug 1838161 - Add regular expression keywords for address-line1 and address-line2 r=credential-management-reviewers,issammani

Differential Revision: https://phabricator.services.mozilla.com/D180791
This commit is contained in:
Dimi 2023-06-15 06:48:05 +00:00
Родитель c466bdec21
Коммит 63eef94a13
5 изменённых файлов: 197 добавлений и 53 удалений

Просмотреть файл

@ -11,6 +11,7 @@ support-files =
[browser_de_fields.js]
[browser_fr_fields.js]
[browser_ignore_invisible_fields.js]
[browser_label_rules.js]
[browser_multiple_section.js]
[browser_parse_address_fields.js]
[browser_parse_creditcard_expiry_fields.js]

Просмотреть файл

@ -0,0 +1,36 @@
/* Any copyright is dedicated to the Public Domain.
http://creativecommons.org/publicdomain/zero/1.0/ */
/* global add_heuristic_tests */
"use strict";
add_heuristic_tests([
{
fixtureData: `
<html>
<body>
<form>
<input type="text" id="name" autocomplete="name"/>
<input type="text" id="country" autocomplete="country"/>
<label for="test1">sender-address</label>
<input type="text" id="test1"/>
<input type="text" id="test2" name="sender-address"/>
</form>
</body>
</html>`,
expectedResult: [
{
default: {
reason: "regex-heuristic",
},
description: `Only "sender-address" keywords in labels"`,
fields: [
{ fieldName: "name", reason: "autocomplete" },
{ fieldName: "country", reason: "autocomplete" },
{ fieldName: "address-line1" },
],
},
],
},
]);

Просмотреть файл

@ -26,6 +26,12 @@ add_heuristic_tests(
{ fieldName: "email" },
],
},
{
invalid: true,
fields: [
{ fieldName: "address-line1", reason:"regex-heuristic" },
],
},
{
default: {
reason: "regex-heuristic",
@ -45,6 +51,12 @@ add_heuristic_tests(
{ fieldName: "email" },
],
},
{
invalid: true,
fields: [
{ fieldName: "address-line1", reason:"regex-heuristic" },
],
},
{
invalid: true,
fields: [
@ -94,6 +106,12 @@ add_heuristic_tests(
{ fieldName: "email" },
],
},
{
invalid: true,
fields: [
{ fieldName: "address-line1", reason:"regex-heuristic" },
],
},
{
default: {
reason: "regex-heuristic",
@ -113,6 +131,12 @@ add_heuristic_tests(
{ fieldName: "email" },
],
},
{
invalid: true,
fields: [
{ fieldName: "address-line1", reason:"regex-heuristic" },
],
},
{
invalid: true,
fields: [

Просмотреть файл

@ -88,6 +88,7 @@ export class FormSection {
*/
export const FormAutofillHeuristics = {
RULES: HeuristicsRegExp.getRules(),
LABEL_RULES: HeuristicsRegExp.getLabelRules(),
CREDIT_CARD_FIELDNAMES: [],
ADDRESS_FIELDNAMES: [],
@ -260,7 +261,7 @@ export const FormAutofillHeuristics = {
) {
const regExpTelExtension = new RegExp(
"\\bext|ext\\b|extension|ramal", // pt-BR, pt-PT
"iu"
"iug"
);
if (this._matchRegexp(field.element, regExpTelExtension)) {
scanner.updateFieldName(scanner.parsingIndex, "tel-extension");
@ -872,15 +873,21 @@ export const FormAutofillHeuristics = {
* Extract all the signature strings of an element.
*
* @param {HTMLElement} element
* @returns {ElementStrings}
* @returns {Array<string>}
*/
_getElementStrings(element) {
return [element.id, element.name, element.placeholder?.trim()];
},
/**
* Extract all the label strings associated with an element.
*
* @param {HTMLElement} element
* @returns {ElementStrings}
*/
_getElementLabelStrings(element) {
return {
*[Symbol.iterator]() {
yield element.id;
yield element.name;
yield element.placeholder?.trim();
const labels = lazy.LabelUtils.findLabelElements(element);
for (let label of labels) {
yield* lazy.LabelUtils.extractLabelStrings(label);
@ -912,46 +919,75 @@ export const FormAutofillHeuristics = {
},
/**
* Find the first matched field name of the element wih given regex list.
* Find the first matching field name from a given list of field names
* that matches an HTML element.
*
* @param {HTMLElement} element
* @param {Array<string>} regexps
* The regex key names that correspond to pattern in the rule list. It will
* be matched against the element string converted to lower case.
* @returns {?string} The first matched field name
* The function first tries to match the element against a set of
* pre-defined regular expression rules. If no match is found, it
* then checks for label-specific rules, if they exist.
*
* Note: For label rules, the keyword is often more general
* (e.g., "^\\W*address"), hence they are only searched within labels
* to reduce the occurrence of false positives.
*
* @param {HTMLElement} element The element to match.
* @param {Array<string>} fieldNames An array of field names to compare against.
* @returns {string|null} The name of the matched field, or null if no match was found.
*/
_findMatchedFieldName(element, regexps) {
if (!regexps.length) {
_findMatchedFieldName(element, fieldNames) {
if (!fieldNames.length) {
return null;
}
const getElementStrings = this._getElementStrings(element);
for (let regexp of regexps) {
for (let string of getElementStrings) {
if (this.testRegex(this.RULES[regexp], string?.toLowerCase())) {
return regexp;
}
}
}
// Attempt to match the element against the default set of rules
let matchedFieldName = fieldNames.find(fieldName =>
this._matchRegexp(element, this.RULES[fieldName])
);
return null;
// If no match is found, and if a label rule exists for the field,
// attempt to match against the label rules
if (!matchedFieldName) {
matchedFieldName = fieldNames.find(fieldName => {
const regexp = this.LABEL_RULES[fieldName];
return this._matchRegexp(element, regexp, { attribute: false });
});
}
return matchedFieldName;
},
/**
* Determine whether the regexp can match any of element strings.
*
* @param {HTMLElement} element
* @param {RegExp} regexp
*
* @returns {boolean}
* @param {HTMLElement} element The HTML element to match.
* @param {RegExp} regexp The regular expression to match against.
* @param {object} [options] Optional parameters for matching.
* @param {boolean} [options.attribute=true]
* Whether to match against the element's attributes.
* @param {boolean} [options.label=true]
* Whether to match against the element's labels.
* @returns {boolean} True if a match is found, otherwise false.
*/
_matchRegexp(element, regexp) {
const elemStrings = this._getElementStrings(element);
for (const str of elemStrings) {
if (regexp.test(str)) {
_matchRegexp(element, regexp, { attribute = true, label = true } = {}) {
if (!regexp) {
return false;
}
if (attribute) {
const elemStrings = this._getElementStrings(element);
if (elemStrings.find(s => this.testRegex(regexp, s?.toLowerCase()))) {
return true;
}
}
if (label) {
const elementLabelStrings = this._getElementLabelStrings(element);
for (const s of elementLabelStrings) {
if (this.testRegex(regexp, s?.toLowerCase())) {
return true;
}
}
}
return false;
},

Просмотреть файл

@ -33,6 +33,12 @@ export const HeuristicsRegExp = {
"cc-type": undefined,
},
// regular expressions that only apply to label
LABEL_RULES: {
"address-line1": undefined,
"address-line2": undefined,
},
RULE_SETS: [
//=========================================================================
// Firefox-specific rules
@ -596,35 +602,76 @@ export const HeuristicsRegExp = {
},
],
_getRule(name) {
let rules = [];
this.RULE_SETS.forEach(set => {
if (set[name]) {
// Add the rule.
// We make the regex lower case so that we can match it against the
// lower-cased field name and get a rough equivalent of a case-insensitive
// match. This avoids a performance cliff with the "iu" flag on regular
// expressions.
rules.push(`(${set[name].toLowerCase()})`.normalize("NFKC"));
}
});
LABEL_RULE_SETS: [
{
"address-line1":
"(^\\W*address)" +
"|(address\\W*$)" +
"|(?:shipping|billing|mailing|pick.?up|drop.?off|delivery|sender|postal|" +
"recipient|home|work|office|school|business|mail)[\\s\\-]+address" +
"|address\\s+(of|for|to|from)" +
"|adresse" + // fr-FR
"|indirizzo" + // it-IT
"|住所" + // ja-JP
"|地址" + // zh-CN
"|(\\b|_)adres(?! tarifi)(\\b|_)" + // tr
"|주소" + // ko-KR
"|^alamat" + // id
// Should contain street and any other address component, in any order
"|street.*(house|building|apartment|floor)" + // en
"|(house|building|apartment|floor).*street" +
"|(sokak|cadde).*(apartman|bina|daire|mahalle)" + // tr
"|(apartman|bina|daire|mahalle).*(sokak|cadde)" +
"|улиц.*(дом|корпус|квартир|этаж)|(дом|корпус|квартир|этаж).*улиц", // ru
},
{
"address-line2":
"address|line" +
"|adresse" + // fr-FR
"|indirizzo" + // it-IT
"|地址" + // zh-CN
"|주소", // ko-KR
},
],
const value = new RegExp(rules.join("|"), "gu");
Object.defineProperty(this.RULES, name, { get: undefined });
Object.defineProperty(this.RULES, name, { value });
return value;
},
_getRules(rules, rulesets) {
function computeRule(name) {
let regexps = [];
rulesets.forEach(set => {
if (set[name]) {
// Add the rule.
// We make the regex lower case so that we can match it against the
// lower-cased field name and get a rough equivalent of a case-insensitive
// match. This avoids a performance cliff with the "iu" flag on regular
// expressions.
regexps.push(`(${set[name].toLowerCase()})`.normalize("NFKC"));
}
});
getRules() {
Object.keys(this.RULES).forEach(field =>
Object.defineProperty(this.RULES, field, {
const value = new RegExp(regexps.join("|"), "gu");
Object.defineProperty(rules, name, { get: undefined });
Object.defineProperty(rules, name, { value });
return value;
}
Object.keys(rules).forEach(field =>
Object.defineProperty(rules, field, {
get() {
return HeuristicsRegExp._getRule(field);
return computeRule(field);
},
})
);
return this.RULES;
return rules;
},
getLabelRules() {
return this._getRules(this.LABEL_RULES, this.LABEL_RULE_SETS);
},
getRules() {
return this._getRules(this.RULES, this.RULE_SETS);
},
};